diff --git a/core/cfg/option.cpp b/core/cfg/option.cpp index cf751af5b..139a5fc72 100644 --- a/core/cfg/option.cpp +++ b/core/cfg/option.cpp @@ -101,6 +101,7 @@ Option TextureFiltering("rend.TextureFiltering", 0); // Default Option ThreadedRendering("rend.ThreadedRendering", true); Option DupeFrames("rend.DupeFrames", false); Option PerPixelLayers("rend.PerPixelLayers", 32); +Option NativeDepthInterpolation("rend.NativeDepthInterpolation", false); // Misc diff --git a/core/cfg/option.h b/core/cfg/option.h index b79c8c619..9e06b167b 100644 --- a/core/cfg/option.h +++ b/core/cfg/option.h @@ -459,6 +459,7 @@ extern Option AnisotropicFiltering; extern Option TextureFiltering; // 0: default, 1: force nearest, 2: force linear extern Option ThreadedRendering; extern Option DupeFrames; +extern Option NativeDepthInterpolation; // Misc diff --git a/core/rend/dx11/dx11_renderer.cpp b/core/rend/dx11/dx11_renderer.cpp index 28978b44e..7b08cba84 100644 --- a/core/rend/dx11/dx11_renderer.cpp +++ b/core/rend/dx11/dx11_renderer.cpp @@ -318,154 +318,6 @@ bool DX11Renderer::Process(TA_context* ctx) } } -// -// Efficient Triangle and Quadrilateral Clipping within Shaders. M. McGuire -// Journal of Graphics GPU and Game Tools - November 2011 -// -static glm::vec3 intersect(const glm::vec3& A, float Adist , const glm::vec3& B, float Bdist) -{ - return (A * std::abs(Bdist) + B * std::abs(Adist)) / (std::abs(Adist) + std::abs(Bdist)); -} - -// Clip the triangle 'trig' with respect to the plane defined by the given point and normal vector. -static int sutherlandHodgmanClip(const glm::vec2& point, const glm::vec2& normal, ModTriangle& trig, ModTriangle& newTrig) -{ - constexpr float clipEpsilon = 0.f; //0.00001; - constexpr float clipEpsilon2 = 0.f; //0.01; - - glm::vec3 v0(trig.x0, trig.y0, trig.z0); - glm::vec3 v1(trig.x1, trig.y1, trig.z1); - glm::vec3 v2(trig.x2, trig.y2, trig.z2); - - glm::vec3 dist = glm::vec3( - glm::dot(glm::vec2(v0) - point, normal), - glm::dot(glm::vec2(v1) - point, normal), - glm::dot(glm::vec2(v2) - point, normal)); - if (!glm::any(glm::greaterThanEqual(dist , glm::vec3(clipEpsilon2)))) - // all clipped - return 0; - if (glm::all(glm::greaterThanEqual(dist , glm::vec3(-clipEpsilon)))) - // none clipped - return 3; - - // There are either 1 or 2 vertices above the clipping plane. - glm::bvec3 above = glm::greaterThanEqual(dist, glm::vec3(0.f)); - bool nextIsAbove; - glm::vec3 v3; - // Find the CCW-most vertex above the plane. - if (above[1] && !above[0]) - { - // Cycle once CCW. Use v3 as a temp - nextIsAbove = above[2]; - v3 = v0; - v0 = v1; - v1 = v2; - v2 = v3; - dist = glm::vec3(dist.y, dist.z, dist.x); - } - else if (above[2] && !above[1]) - { - // Cycle once CW. Use v3 as a temp. - nextIsAbove = above[0]; - v3 = v2; - v2 = v1; - v1 = v0; - v0 = v3; - dist = glm::vec3(dist.z, dist.x, dist.y); - } - else - nextIsAbove = above[1]; - trig.x0 = v0.x; - trig.y0 = v0.y; - trig.z0 = v0.z; - // We always need to clip v2-v0. - v3 = intersect(v0, dist[0], v2, dist[2]); - if (nextIsAbove) - { - v2 = intersect(v1, dist[1], v2, dist[2]); - trig.x1 = v1.x; - trig.y1 = v1.y; - trig.z1 = v1.z; - trig.x2 = v2.x; - trig.y2 = v2.y; - trig.z2 = v2.z; - newTrig.x0 = v0.x; - newTrig.y0 = v0.y; - newTrig.z0 = v0.z; - newTrig.x1 = v2.x; - newTrig.y1 = v2.y; - newTrig.z1 = v2.z; - newTrig.x2 = v3.x; - newTrig.y2 = v3.y; - newTrig.z2 = v3.z; - - return 4; - } - else - { - v1 = intersect(v0, dist[0], v1, dist[1]); - trig.x1 = v1.x; - trig.y1 = v1.y; - trig.z1 = v1.z; - trig.x2 = v3.x; - trig.y2 = v3.y; - trig.z2 = v3.z; - - return 3; - } -} - -static void clipModVols(List& params, std::vector& triangles) -{ - for (ModifierVolumeParam& param : params) - { - std::vector trigs(&pvrrc.modtrig.head()[param.first], &pvrrc.modtrig.head()[param.first + param.count]); - std::vector nextTrigs; - nextTrigs.reserve(trigs.size()); - for (int axis = 0; axis < 4; axis++) - { - glm::vec2 point; - glm::vec2 normal; - switch (axis) - { - case 0: // left - point = glm::vec2(-6400.f, 0.f); - normal = glm::vec2(1.f, 0.f); - break; - case 1: // top - point = glm::vec2(0.f, -4800.f); - normal = glm::vec2(0.f, 1.f); - break; - case 2: // right - point = glm::vec2(7040.f, 0.f); - normal = glm::vec2(-1.f, 0.f); - break; - case 3: // bottom - point = glm::vec2(-0.f, 5280.f); - normal = glm::vec2(0.f, -1.f); - break; - } - - for (ModTriangle& trig : trigs) - { - ModTriangle newTrig; - int size = sutherlandHodgmanClip(point, normal, trig, newTrig); - if (size > 0) - { - nextTrigs.push_back(trig); - if (size == 4) - nextTrigs.push_back(newTrig); - } - } - std::swap(trigs, nextTrigs); - nextTrigs.clear(); - } - param.first = (u32)triangles.size(); - param.count = (u32)trigs.size(); - triangles.insert(triangles.end(), trigs.begin(), trigs.end()); - } -} - void DX11Renderer::configVertexShader() { matrices.CalcMatrices(&pvrrc, width, height); @@ -522,33 +374,12 @@ void DX11Renderer::uploadGeometryBuffers() if (config::ModifierVolumes && pvrrc.modtrig.used()) { - const ModTriangle *data = nullptr; - u32 size = 0; - std::vector modVolTriangles; - if (!settings.platform.isNaomi2()) // TODO for naomi2 as well? - { - // clip triangles - modVolTriangles.reserve(pvrrc.modtrig.used()); - clipModVols(pvrrc.global_param_mvo, modVolTriangles); - clipModVols(pvrrc.global_param_mvo_tr, modVolTriangles); - if (!modVolTriangles.empty()) - { - size = (u32)(modVolTriangles.size() * sizeof(ModTriangle)); - data = modVolTriangles.data(); - } - } - else - { - size = pvrrc.modtrig.bytes(); - data = pvrrc.modtrig.head(); - } - if (size > 0) - { - verify(ensureBufferSize(modvolBuffer, D3D11_BIND_VERTEX_BUFFER, modvolBufferSize, size)); - deviceContext->Map(modvolBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedSubres); - memcpy(mappedSubres.pData, data, size); - deviceContext->Unmap(modvolBuffer, 0); - } + const ModTriangle *data = pvrrc.modtrig.head(); + u32 size = pvrrc.modtrig.bytes(); + verify(ensureBufferSize(modvolBuffer, D3D11_BIND_VERTEX_BUFFER, modvolBufferSize, size)); + deviceContext->Map(modvolBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedSubres); + memcpy(mappedSubres.pData, data, size); + deviceContext->Unmap(modvolBuffer, 0); } unsigned int stride = sizeof(Vertex); unsigned int offset = 0; @@ -812,7 +643,6 @@ void DX11Renderer::setRenderState(const PolyParam *gp) linearFiltering = false; else linearFiltering = true; - auto sampler = samplers->getSampler(linearFiltering, gp->tsp.ClampU, gp->tsp.ClampV, gp->tsp.FlipU, gp->tsp.FlipV); deviceContext->PSSetSamplers(0, 1, &sampler.get()); } diff --git a/core/rend/dx11/dx11_shaders.cpp b/core/rend/dx11/dx11_shaders.cpp index fe7037a28..830edd8ab 100644 --- a/core/rend/dx11/dx11_shaders.cpp +++ b/core/rend/dx11/dx11_shaders.cpp @@ -59,18 +59,26 @@ VertexOut main(in VertexIn vin) { VertexOut vo; vo.pos = mul(transMatrix, float4(vin.pos.xyz, 1.f)); -#if pp_Gouraud == 1 - vo.col = vin.col * vo.pos.z; - vo.spec = vin.spec * vo.pos.z; -#else - // flat shading: no interpolation +#if DIV_POS_Z == 1 + vo.pos /= vo.pos.z; + vo.pos.z = vo.pos.w; +#endif vo.col = vin.col; vo.spec = vin.spec; +#if pp_Gouraud == 1 && DIV_POS_Z != 1 + vo.col *= vo.pos.z; + vo.spec *= vo.pos.z; #endif - vo.uv = float4(vin.uv * vo.pos.z, 0.f, vo.pos.z); + vo.uv.xyz = float3(vin.uv, 0.f); +#if DIV_POS_Z == 1 + vo.uv.w = vo.pos.w; +#else + vo.uv.xy *= vo.pos.z; + vo.uv.w = vo.pos.z; vo.pos.w = 1.f; vo.pos.z = 0.f; +#endif return vo; } @@ -102,11 +110,15 @@ VertexOut main(in VertexIn vin) { VertexOut vo; vo.pos = mul(transMatrix, float4(vin.pos.xyz, 1.f)); +#if DIV_POS_Z == 1 + vo.pos /= vo.pos.z; + vo.pos.z = vo.pos.w; + vo.uv = float4(0.f, 0.f, 0.f, vo.pos.w); +#else vo.uv = float4(0.f, 0.f, 0.f, vo.pos.z); - vo.pos.w = 1.f; vo.pos.z = 0.f; - +#endif return vo; } @@ -159,7 +171,13 @@ cbuffer polyConstantBuffer : register(b1) float fog_mode2(float w) { - float z = clamp(w * fogDensity, 1.0f, 255.9999f); + float z = clamp( +#if DIV_POS_Z == 1 + fogDensity / w +#else + fogDensity * w +#endif + , 1.0f, 255.9999f); float exp = floor(log2(z)); float m = z * 16.0f / pow(2.0, exp) - 16.0f; float idx = floor(m) + exp * 16.0f + 0.5f; @@ -201,17 +219,16 @@ PSO main(in Pixel inpix) && inpix.pos.y >= clipTest.y && inpix.pos.y <= clipTest.w) discard; #endif -#if pp_Gouraud == 1 - float4 color = inpix.col / inpix.uv.w; - #if pp_BumpMap == 1 || pp_Offset == 1 - float4 specular = inpix.spec / inpix.uv.w; - #endif -#else float4 color = inpix.col; #if pp_BumpMap == 1 || pp_Offset == 1 float4 specular = inpix.spec; #endif -#endif + #if pp_Gouraud == 1 && DIV_POS_Z != 1 + color /= inpix.uv.w; + #if pp_BumpMap == 1 || pp_Offset == 1 + specular /= inpix.uv.w; + #endif + #endif #if pp_UseAlpha == 0 color.a = 1.0f; #endif @@ -220,7 +237,10 @@ PSO main(in Pixel inpix) #endif #if pp_Texture == 1 { - float2 uv = inpix.uv.xy / inpix.uv.w; + float2 uv = inpix.uv.xy; + #if DIV_POS_Z != 1 + uv /= inpix.uv.w; + #endif #if NearestWrapFix == 1 uv = min(fmod(uv, 1.f), 0.9997f); #endif @@ -279,7 +299,11 @@ PSO main(in Pixel inpix) #endif PSO pso; - float w = inpix.uv.w * 100000.0f; +#if DIV_POS_Z == 1 + float w = 100000.0f / inpix.uv.w; +#else + float w = 100000.0f * inpix.uv.w; +#endif pso.z = log2(1.0f + w) / 34.0f; pso.col = color; @@ -295,7 +319,11 @@ struct MVPixel PSO modifierVolume(in MVPixel inpix) { PSO pso; - float w = inpix.uv.w * 100000.0f; +#if DIV_POS_Z == 1 + float w = 100000.0f / inpix.uv.w; +#else + float w = 100000.0f * inpix.uv.w; +#endif pso.z = log2(1.0f + w) / 34.0f; pso.col = float4(0, 0, 0, 1.f - shadowScale); @@ -354,9 +382,18 @@ float4 main(in VertexIn vin) : SV_Target const char * const MacroValues[] { "0", "1", "2", "3" }; +enum VertexMacroEnum { + MacroGouraud, + MacroDivPosZ, + MacroPositionOnly, + MacroTwoVolumes, + MacroLightOn, +}; + static D3D_SHADER_MACRO VertexMacros[] { { "pp_Gouraud", "1" }, + { "DIV_POS_Z", "0" }, { "POSITION_ONLY", "0" }, { "pp_TwoVolumes", "0" }, { "LIGHT_ON", "1" }, @@ -364,8 +401,7 @@ static D3D_SHADER_MACRO VertexMacros[] }; enum PixelMacroEnum { - MacroGouraud, - MacroTexture, + MacroTexture = 2, MacroUseAlpha, MacroIgnoreTexA, MacroShadInstr, @@ -383,6 +419,7 @@ enum PixelMacroEnum { static D3D_SHADER_MACRO PixelMacros[] { { "pp_Gouraud", "1" }, + { "DIV_POS_Z", "0" }, { "pp_Texture", "0" }, { "pp_UseAlpha", "0" }, { "pp_IgnoreTexA", "0" }, @@ -403,6 +440,7 @@ const ComPtr& DX11Shaders::getShader(bool pp_Texture, bool pp bool pp_Offset, u32 pp_FogCtrl, bool pp_BumpMap, bool fog_clamping, bool trilinear, bool palette, bool gouraud, bool alphaTest, bool clipInside, bool nearestWrapFix) { + bool divPosZ = !settings.platform.isNaomi2() && config::NativeDepthInterpolation; const u32 hash = (int)pp_Texture | (pp_UseAlpha << 1) | (pp_IgnoreTexA << 2) @@ -416,7 +454,8 @@ const ComPtr& DX11Shaders::getShader(bool pp_Texture, bool pp | (gouraud << 12) | (alphaTest << 13) | (clipInside << 14) - | (nearestWrapFix << 15); + | (nearestWrapFix << 15) + | (divPosZ << 16); auto& shader = shaders[hash]; if (shader == nullptr) { @@ -436,6 +475,7 @@ const ComPtr& DX11Shaders::getShader(bool pp_Texture, bool pp PixelMacros[MacroAlphaTest].Definition = MacroValues[alphaTest]; PixelMacros[MacroClipInside].Definition = MacroValues[clipInside]; PixelMacros[MacroNearestWrapFix].Definition = MacroValues[nearestWrapFix]; + PixelMacros[MacroDivPosZ].Definition = MacroValues[divPosZ]; shader = compilePS(PixelShader, "main", PixelMacros); verify(shader != nullptr); @@ -445,20 +485,22 @@ const ComPtr& DX11Shaders::getShader(bool pp_Texture, bool pp const ComPtr& DX11Shaders::getVertexShader(bool gouraud, bool naomi2) { - int index = (int)gouraud | ((int)naomi2 << 1); + bool divPosZ = !settings.platform.isNaomi2() && config::NativeDepthInterpolation; + int index = (int)gouraud | ((int)naomi2 << 1) | ((int)divPosZ << 2); ComPtr& vertexShader = vertexShaders[index]; if (!vertexShader) { - VertexMacros[0].Definition = MacroValues[gouraud]; + VertexMacros[MacroGouraud].Definition = MacroValues[gouraud]; if (!naomi2) { + VertexMacros[MacroDivPosZ].Definition = MacroValues[divPosZ]; vertexShader = compileVS(VertexShader, "main", VertexMacros); } else { - VertexMacros[1].Definition = MacroValues[false]; - VertexMacros[2].Definition = MacroValues[false]; - VertexMacros[3].Definition = MacroValues[true]; + VertexMacros[MacroPositionOnly].Definition = MacroValues[false]; + VertexMacros[MacroTwoVolumes].Definition = MacroValues[false]; + VertexMacros[MacroLightOn].Definition = MacroValues[true]; std::string source(DX11N2VertexShader); source += std::string("\n") + DX11N2ColorShader; vertexShader = compileVS(source.c_str(), "main", VertexMacros); @@ -470,21 +512,26 @@ const ComPtr& DX11Shaders::getVertexShader(bool gouraud, boo const ComPtr& DX11Shaders::getMVVertexShader(bool naomi2) { - if (!modVolVertexShaders[naomi2]) + bool divPosZ = !settings.platform.isNaomi2() && config::NativeDepthInterpolation; + int index = (int)naomi2 | ((int)divPosZ << 1); + if (!modVolVertexShaders[index]) { if (!naomi2) - modVolVertexShaders[0] = compileVS(ModVolVertexShader, "main", nullptr); + { + VertexMacros[MacroDivPosZ].Definition = MacroValues[divPosZ]; + modVolVertexShaders[index] = compileVS(ModVolVertexShader, "main", VertexMacros); + } else { - VertexMacros[0].Definition = MacroValues[false]; - VertexMacros[1].Definition = MacroValues[true]; - VertexMacros[2].Definition = MacroValues[false]; - VertexMacros[3].Definition = MacroValues[false]; - modVolVertexShaders[1] = compileVS(DX11N2VertexShader, "main", VertexMacros); + VertexMacros[MacroGouraud].Definition = MacroValues[false]; + VertexMacros[MacroPositionOnly].Definition = MacroValues[true]; + VertexMacros[MacroTwoVolumes].Definition = MacroValues[false]; + VertexMacros[MacroLightOn].Definition = MacroValues[false]; + modVolVertexShaders[index] = compileVS(DX11N2VertexShader, "main", VertexMacros); } } - return modVolVertexShaders[naomi2]; + return modVolVertexShaders[index]; } const ComPtr& DX11Shaders::getModVolShader() @@ -564,10 +611,10 @@ ComPtr DX11Shaders::compilePS(const char* source, const char* ComPtr DX11Shaders::getVertexShaderBlob() { - VertexMacros[0].Definition = MacroValues[true]; + VertexMacros[MacroGouraud].Definition = MacroValues[true]; // FIXME code dup - VertexMacros[1].Definition = MacroValues[false]; - VertexMacros[2].Definition = MacroValues[false]; + VertexMacros[MacroPositionOnly].Definition = MacroValues[false]; + VertexMacros[MacroTwoVolumes].Definition = MacroValues[false]; std::string source(DX11N2VertexShader); source += std::string("\n") + DX11N2ColorShader; return compileShader(source.c_str(), "main", "vs_4_0", VertexMacros); @@ -576,9 +623,9 @@ ComPtr DX11Shaders::getVertexShaderBlob() ComPtr DX11Shaders::getMVVertexShaderBlob() { // FIXME code dup - VertexMacros[0].Definition = MacroValues[false]; - VertexMacros[1].Definition = MacroValues[true]; - VertexMacros[2].Definition = MacroValues[false]; + VertexMacros[MacroGouraud].Definition = MacroValues[false]; + VertexMacros[MacroPositionOnly].Definition = MacroValues[true]; + VertexMacros[MacroTwoVolumes].Definition = MacroValues[false]; return compileShader(DX11N2VertexShader, "main", "vs_4_0", VertexMacros); } diff --git a/core/rend/dx11/dx11_shaders.h b/core/rend/dx11/dx11_shaders.h index 3a91ef2ad..69245aa68 100644 --- a/core/rend/dx11/dx11_shaders.h +++ b/core/rend/dx11/dx11_shaders.h @@ -70,9 +70,9 @@ private: ComPtr device; std::unordered_map> shaders; - ComPtr vertexShaders[4]; + ComPtr vertexShaders[8]; ComPtr modVolShader; - ComPtr modVolVertexShaders[2]; + ComPtr modVolVertexShaders[4]; ComPtr quadPixelShader; ComPtr quadVertexShader; ComPtr quadRotateVertexShader; diff --git a/core/rend/dx11/oit/dx11_oitshaders.cpp b/core/rend/dx11/oit/dx11_oitshaders.cpp index 14e7202bc..252f6bc5e 100644 --- a/core/rend/dx11/oit/dx11_oitshaders.cpp +++ b/core/rend/dx11/oit/dx11_oitshaders.cpp @@ -71,24 +71,33 @@ VertexOut main(in VertexIn vin) { VertexOut vo; vo.pos = mul(transMatrix, float4(vin.pos.xyz, 1.f)); -#if pp_Gouraud == 1 - vo.col = vin.col * vo.pos.z; - vo.spec = vin.spec * vo.pos.z; - vo.col1 = vin.col1 * vo.pos.z; - vo.spec1 = vin.spec1 * vo.pos.z; -#else - // flat shading: no interpolation +#if DIV_POS_Z == 1 + vo.pos /= vo.pos.z; + vo.pos.z = vo.pos.w; +#endif vo.col = vin.col; vo.spec = vin.spec; vo.col1 = vin.col1; vo.spec1 = vin.spec1; +#if pp_Gouraud == 1 && DIV_POS_Z != 1 + vo.col *= vo.pos.z; + vo.spec *= vo.pos.z; + vo.col1 *= vo.pos.z; + vo.spec1 *= vo.pos.z; #endif - vo.uv = float4(vin.uv * vo.pos.z, 0.f, vo.pos.z); - vo.uv1 = vin.uv1 * vo.pos.z; + vo.uv.xyz = float3(vin.uv, 0.f); + vo.uv1 = vin.uv1; vo.index = uint(polyNumber) + vin.vertexId; +#if DIV_POS_Z == 1 + vo.uv.w = vo.pos.w; +#else + vo.uv.xy *= vo.pos.z; + vo.uv.w = vo.pos.z; + vo.uv1 *= vo.pos.z; vo.pos.w = 1.f; vo.pos.z = 0.f; +#endif return vo; } @@ -131,7 +140,11 @@ struct Pixel { float getFragDepth(float z) { +#if DIV_POS_Z == 1 + float w = 100000.0 / z; +#else float w = 100000.0 * z; +#endif return log2(1.0 + w) / 34.0; } @@ -329,7 +342,13 @@ cbuffer polyConstantBuffer : register(b1) float fog_mode2(float w) { - float z = clamp(w * fogDensity, 1.0f, 255.9999f); + float z = clamp( +#if DIV_POS_Z == 1 + fogDensity / w +#else + fogDensity * w +#endif + , 1.0f, 255.9999f); float exp = floor(log2(z)); float m = z * 16.0f / pow(2.0, exp) - 16.0f; float idx = floor(m) + exp * 16.0f + 0.5f; @@ -404,7 +423,7 @@ PSO main(in VertexIn inpix) } #endif #endif - #if pp_Gouraud == 1 + #if pp_Gouraud == 1 && DIV_POS_Z != 1 color /= inpix.uv.w; specular /= inpix.uv.w; #endif @@ -421,10 +440,14 @@ PSO main(in VertexIn inpix) float2 uv; #if pp_TwoVolumes == 1 if (area1) - uv = inpix.uv1 / inpix.uv.w; + uv = inpix.uv1; else #endif - uv = inpix.uv.xy / inpix.uv.w; + uv = inpix.uv.xy; + #if DIV_POS_Z != 1 + uv /= inpix.uv.w; + #endif + #if NearestWrapFix == 1 uv = min(fmod(uv, 1.f), 0.9997f); #endif @@ -564,7 +587,7 @@ PSO main(in VertexIn inpix) Pixel pixel; pixel.color = packColors(clamp(color, 0.f, 1.f)); - pixel.depth = inpix.uv.w; + pixel.depth = pso.z; pixel.seq_num = inpix.index; InterlockedExchange(abufferPointers[coords], idx, pixel.next); Pixels[idx] = pixel; @@ -829,18 +852,26 @@ struct IncludeManager : public ID3DInclude const char * const MacroValues[] { "0", "1", "2", "3" }; +enum VertexMacroEnum { + MacroGouraud, + MacroTwoVolumes, + MacroDivPosZ, + MacroPositionOnly, + MacroLightOn, +}; + static D3D_SHADER_MACRO VertexMacros[] { { "pp_Gouraud", "1" }, - { "POSITION_ONLY", "0" }, { "pp_TwoVolumes", "0" }, + { "DIV_POS_Z", "0" }, + { "POSITION_ONLY", "0" }, { "LIGHT_ON", "1" }, { nullptr, nullptr } }; enum PixelMacroEnum { - MacroGouraud, - MacroTexture, + MacroTexture = 3, MacroUseAlpha, MacroIgnoreTexA, MacroShadInstr, @@ -852,13 +883,14 @@ enum PixelMacroEnum { MacroAlphaTest, MacroClipInside, MacroNearestWrapFix, - MacroTwoVolumes, MacroPass }; static D3D_SHADER_MACRO PixelMacros[] { { "pp_Gouraud", "1" }, + { "pp_TwoVolumes", "0" }, + { "DIV_POS_Z", "0" }, { "pp_Texture", "0" }, { "pp_UseAlpha", "0" }, { "pp_IgnoreTexA", "0" }, @@ -871,7 +903,6 @@ static D3D_SHADER_MACRO PixelMacros[] { "cp_AlphaTest", "0" }, { "pp_ClipInside", "0" }, { "NearestWrapFix", "0" }, - { "pp_TwoVolumes", "0" }, { "PASS", "0" }, { nullptr, nullptr } }; @@ -880,6 +911,7 @@ const ComPtr& DX11OITShaders::getShader(bool pp_Texture, bool bool pp_Offset, u32 pp_FogCtrl, bool pp_BumpMap, bool fog_clamping, bool palette, bool gouraud, bool alphaTest, bool clipInside, bool nearestWrapFix, bool twoVolumes, Pass pass) { + bool divPosZ = !settings.platform.isNaomi2() && config::NativeDepthInterpolation; const u32 hash = (int)pp_Texture | (pp_UseAlpha << 1) | (pp_IgnoreTexA << 2) @@ -894,7 +926,8 @@ const ComPtr& DX11OITShaders::getShader(bool pp_Texture, bool | (clipInside << 13) | (nearestWrapFix << 14) | (twoVolumes << 15) - | (pass << 16); + | (pass << 16) + | (divPosZ << 18); auto& shader = shaders[hash]; if (shader == nullptr) { @@ -915,6 +948,7 @@ const ComPtr& DX11OITShaders::getShader(bool pp_Texture, bool PixelMacros[MacroClipInside].Definition = MacroValues[clipInside]; PixelMacros[MacroNearestWrapFix].Definition = MacroValues[nearestWrapFix]; PixelMacros[MacroTwoVolumes].Definition = MacroValues[twoVolumes]; + PixelMacros[MacroDivPosZ].Definition = MacroValues[divPosZ]; PixelMacros[MacroPass].Definition = MacroValues[pass]; shader = compilePS(PixelShader, "main", PixelMacros); @@ -925,24 +959,28 @@ const ComPtr& DX11OITShaders::getShader(bool pp_Texture, bool const ComPtr& DX11OITShaders::getVertexShader(bool gouraud, bool naomi2, bool positionOnly, bool lightOn, bool twoVolumes) { + bool divPosZ = !settings.platform.isNaomi2() && config::NativeDepthInterpolation; const u32 hash = (int)gouraud | ((int)naomi2 << 1) | ((int)positionOnly << 2) | ((int)lightOn << 3) - | ((int)twoVolumes << 4); + | ((int)twoVolumes << 4) + | ((int)divPosZ << 5); auto& shader = vertexShaders[hash]; if (shader == nullptr) { - VertexMacros[0].Definition = MacroValues[gouraud]; + VertexMacros[MacroGouraud].Definition = MacroValues[gouraud]; if (!naomi2) { + VertexMacros[MacroDivPosZ].Definition = MacroValues[divPosZ]; shader = compileVS(VertexShader, "main", VertexMacros); } else { - VertexMacros[1].Definition = MacroValues[positionOnly]; - VertexMacros[2].Definition = MacroValues[twoVolumes]; - VertexMacros[3].Definition = MacroValues[lightOn]; + VertexMacros[MacroDivPosZ].Definition = MacroValues[false]; + VertexMacros[MacroPositionOnly].Definition = MacroValues[positionOnly]; + VertexMacros[MacroTwoVolumes].Definition = MacroValues[twoVolumes]; + VertexMacros[MacroLightOn].Definition = MacroValues[lightOn]; std::string source(DX11N2VertexShader); if (!positionOnly && lightOn) source += std::string("\n") + DX11N2ColorShader; @@ -955,27 +993,37 @@ const ComPtr& DX11OITShaders::getVertexShader(bool gouraud, const ComPtr& DX11OITShaders::getMVVertexShader(bool naomi2) { - if (!modVolVertexShaders[naomi2]) + bool divPosZ = !settings.platform.isNaomi2() && config::NativeDepthInterpolation; + auto& mvVertexShader = modVolVertexShaders[(int)naomi2 | ((int)divPosZ << 1)]; + if (!mvVertexShader) { if (!naomi2) - modVolVertexShaders[0] = compileVS(ModVolVertexShader, "main", nullptr); + { + VertexMacros[MacroDivPosZ].Definition = MacroValues[divPosZ]; + mvVertexShader = compileVS(ModVolVertexShader, "main", VertexMacros); + } else { - VertexMacros[0].Definition = MacroValues[false]; - VertexMacros[1].Definition = MacroValues[true]; - VertexMacros[2].Definition = MacroValues[false]; - VertexMacros[3].Definition = MacroValues[false]; - modVolVertexShaders[1] = compileVS(DX11N2VertexShader, "main", VertexMacros); + VertexMacros[MacroGouraud].Definition = MacroValues[false]; + VertexMacros[MacroPositionOnly].Definition = MacroValues[true]; + VertexMacros[MacroTwoVolumes].Definition = MacroValues[false]; + VertexMacros[MacroLightOn].Definition = MacroValues[false]; + mvVertexShader = compileVS(DX11N2VertexShader, "main", VertexMacros); } } - return modVolVertexShaders[naomi2]; + return mvVertexShader; } const ComPtr& DX11OITShaders::getModVolShader() { + bool divPosZ = !settings.platform.isNaomi2() && config::NativeDepthInterpolation; + auto& modVolShader = modVolShaders[divPosZ]; if (!modVolShader) + { + PixelMacros[MacroDivPosZ].Definition = MacroValues[divPosZ]; modVolShader = compilePS(PixelShader, "modifierVolume", PixelMacros); + } return modVolShader; } @@ -1082,10 +1130,10 @@ ComPtr DX11OITShaders::compilePS(const char* source, const ch ComPtr DX11OITShaders::getVertexShaderBlob() { - VertexMacros[0].Definition = MacroValues[true]; + VertexMacros[MacroGouraud].Definition = MacroValues[true]; // FIXME code dup - VertexMacros[1].Definition = MacroValues[false]; - VertexMacros[2].Definition = MacroValues[true]; + VertexMacros[MacroPositionOnly].Definition = MacroValues[false]; + VertexMacros[MacroTwoVolumes].Definition = MacroValues[true]; std::string source(DX11N2VertexShader); source += std::string("\n") + DX11N2ColorShader; return compileShader(source.c_str(), "main", "vs_5_0", VertexMacros); @@ -1094,9 +1142,9 @@ ComPtr DX11OITShaders::getVertexShaderBlob() ComPtr DX11OITShaders::getMVVertexShaderBlob() { // FIXME code dup - VertexMacros[0].Definition = MacroValues[false]; - VertexMacros[1].Definition = MacroValues[true]; - VertexMacros[2].Definition = MacroValues[false]; + VertexMacros[MacroGouraud].Definition = MacroValues[false]; + VertexMacros[MacroPositionOnly].Definition = MacroValues[true]; + VertexMacros[MacroTwoVolumes].Definition = MacroValues[false]; return compileShader(DX11N2VertexShader, "main", "vs_5_0", VertexMacros); } diff --git a/core/rend/dx11/oit/dx11_oitshaders.h b/core/rend/dx11/oit/dx11_oitshaders.h index 1dbf436e5..eb3d4bb8c 100644 --- a/core/rend/dx11/oit/dx11_oitshaders.h +++ b/core/rend/dx11/oit/dx11_oitshaders.h @@ -35,7 +35,7 @@ public: const ComPtr& getShader(bool pp_Texture, bool pp_UseAlpha, bool pp_IgnoreTexA, u32 pp_ShadInstr, bool pp_Offset, u32 pp_FogCtrl, bool pp_BumpMap, bool fog_clamping, bool palette, bool gouraud, bool alphaTest, bool clipInside, bool nearestWrapFix, bool twoVolumes, Pass pass); - const ComPtr& getVertexShader(bool gouraud, bool naomi2, bool positionOnly, bool lightOn, bool twoVolumes = true); + const ComPtr& getVertexShader(bool gouraud, bool naomi2, bool positionOnly, bool lightOn, bool twoVolumes = false); const ComPtr& getModVolShader(); const ComPtr& getMVVertexShader(bool naomi2); const ComPtr& getFinalShader(); @@ -47,9 +47,10 @@ public: saveCache(CacheFile); shaders.clear(); vertexShaders.clear(); - modVolShader.reset(); for (auto& shader : modVolVertexShaders) shader.reset(); + for (auto& shader : modVolShaders) + shader.reset(); for (auto& shader : trModVolShaders) shader.reset(); finalShader.reset(); @@ -69,8 +70,8 @@ private: ComPtr device; std::unordered_map> shaders; std::unordered_map> vertexShaders; - ComPtr modVolShader; - ComPtr modVolVertexShaders[2]; + ComPtr modVolShaders[2]; + ComPtr modVolVertexShaders[4]; ComPtr trModVolShaders[4]; ComPtr finalShader; diff --git a/core/rend/dx9/d3d_shaders.cpp b/core/rend/dx9/d3d_shaders.cpp index e94e2589c..12e7690e2 100644 --- a/core/rend/dx9/d3d_shaders.cpp +++ b/core/rend/dx9/d3d_shaders.cpp @@ -17,6 +17,7 @@ along with Flycast. If not, see . */ #include "d3d_shaders.h" +#include "cfg/option.h" #define SHADER_DEBUG 0 // D3DXSHADER_DEBUG|D3DXSHADER_SKIPOPTIMIZATION @@ -43,18 +44,23 @@ VertexOut main(in VertexIn vin) { VertexOut vo; vo.pos = mul(transMatrix, float4(vin.pos.xyz, 1.f)); -#if pp_Gouraud == 1 - vo.col = vin.col * vo.pos.z; - vo.spec = vin.spec * vo.pos.z; -#else - // flat shading: no interpolation +#if DIV_POS_Z == 1 + vo.pos /= vo.pos.z; + vo.pos.z = vo.pos.w; +#endif vo.col = vin.col; vo.spec = vin.spec; +#if pp_Gouraud == 1 && DIV_POS_Z != 1 + vo.col *= vo.pos.z; + vo.spec *= vo.pos.z; #endif - vo.uv = float4(vin.uv * vo.pos.z, 0.f, vo.pos.z); + vo.uv = float4(vin.uv, 0.f, vo.pos.z); +#if DIV_POS_Z != 1 + vo.uv.xy *= vo.pos.z; vo.pos.w = 1.f; vo.pos.z = 0.f; +#endif return vo; } @@ -75,7 +81,7 @@ struct pixel #endif }; - + sampler2D samplr : register(s0); sampler2D tex_pal : register(s1); sampler2D fog_table : register(s2); @@ -91,7 +97,13 @@ float4 colorClampMax : register(c7); float fog_mode2(float w) { - float z = clamp(w * FOG_DENSITY_SCALE.x, 1.0f, 255.9999f); + float z = clamp( +#if DIV_POS_Z == 1 + FOG_DENSITY_SCALE.x / w +#else + FOG_DENSITY_SCALE.x * w +#endif + , 1.0f, 255.9999f); float exp = floor(log2(z)); float m = z * 16.0f / pow(2.0, exp) - 16.0f; float idx = floor(m) + exp * 16.0f + 0.5f; @@ -112,7 +124,12 @@ float4 clampColor(float4 color) float4 palettePixel(float4 coords) { - int colorIdx = int(floor(tex2Dproj(samplr, coords).a * 255.0f + 0.5f) + paletteIndex.x); +#if DIV_POS_Z == 1 + float texColIdx = tex2D(samplr, coords.xy).a; +#else + float texColIdx = tex2Dproj(samplr, coords).a; +#endif + int colorIdx = int(floor(texColIdx * 255.0f + 0.5f) + paletteIndex.x); float2 c = float2((fmod(float(colorIdx), 32.0f) * 2.0f + 1.0f) / 64.0f, (float(colorIdx / 32) * 2.0f + 1.0f) / 64.0f); return tex2D(tex_pal, c); } @@ -134,17 +151,16 @@ PSO main(in pixel inpix) discard; #endif -#if pp_Gouraud == 1 - float4 color = inpix.col / inpix.uv.w; - #if pp_BumpMap == 1 || pp_Offset == 1 - float4 specular = inpix.spec / inpix.uv.w; - #endif -#else float4 color = inpix.col; #if pp_BumpMap == 1 || pp_Offset == 1 float4 specular = inpix.spec; #endif -#endif + #if pp_Gouraud == 1 && DIV_POS_Z != 1 + color /= inpix.uv.w; + #if pp_BumpMap == 1 || pp_Offset == 1 + specular /= inpix.uv.w; + #endif + #endif #if pp_UseAlpha == 0 color.a = 1.0f; #endif @@ -154,7 +170,11 @@ PSO main(in pixel inpix) #if pp_Texture == 1 { #if pp_Palette == 0 - float4 texcol = tex2Dproj(samplr, inpix.uv); + #if DIV_POS_Z == 1 + float4 texcol = tex2D(samplr, inpix.uv.xy); + #else + float4 texcol = tex2Dproj(samplr, inpix.uv); + #endif #else float4 texcol = palettePixel(inpix.uv); #endif @@ -204,7 +224,11 @@ PSO main(in pixel inpix) //color.rgb = float3(inpix.uv.w * FOG_DENSITY_SCALE.x / 128.0f); PSO pso; - float w = inpix.uv.w * 100000.0f; +#if DIV_POS_Z == 1 + float w = 100000.0f / inpix.uv.w; +#else + float w = 100000.0f * inpix.uv.w; +#endif pso.z = log2(1.0f + w) / 34.0f; pso.col = color; @@ -214,7 +238,11 @@ PSO main(in pixel inpix) PSO modifierVolume(float4 uv : TEXCOORD0) { PSO pso; - float w = uv.w * 100000.0f; +#if DIV_POS_Z == 1 + float w = 100000.0f / uv.w; +#else + float w = 100000.0f * uv.w; +#endif pso.z = log2(1.0f + w) / 34.0f; pso.col = float4(0, 0, 0, FOG_DENSITY_SCALE.y); @@ -227,24 +255,30 @@ const char * const MacroValues[] { "0", "1", "2", "3" }; static D3DXMACRO VertexMacros[] { { "pp_Gouraud", "1" }, + { "DIV_POS_Z", "0" }, { 0, 0 } }; -constexpr u32 MacroTexture = 0; -constexpr u32 MacroOffset = 1; -constexpr u32 MacroShadInstr = 2; -constexpr u32 MacroIgnoreTexA = 3; -constexpr u32 MacroUseAlpha = 4; -constexpr u32 MacroFogCtrl = 5; -constexpr u32 MacroFogClamping = 6; -constexpr u32 MacroPalette = 7; -constexpr u32 MacroBumpMap = 8; -constexpr u32 MacroTriLinear = 9; -constexpr u32 MacroGouraud = 10; -constexpr u32 MacroClipInside = 11; +enum ShaderMacros { + MacroGouraud, + MacroDivPosZ, + MacroTexture, + MacroOffset, + MacroShadInstr, + MacroIgnoreTexA, + MacroUseAlpha, + MacroFogCtrl, + MacroFogClamping, + MacroPalette, + MacroBumpMap, + MacroTriLinear, + MacroClipInside, +}; static D3DXMACRO PixelMacros[] { + { "pp_Gouraud", "1" }, + { "DIV_POS_Z", "0" }, { "pp_Texture", "0" }, { "pp_Offset", "0" }, { "pp_ShadInstr", "0" }, @@ -255,7 +289,6 @@ static D3DXMACRO PixelMacros[] { "pp_Palette", "0" }, { "pp_BumpMap", "0" }, { "pp_TriLinear", "0" }, - { "pp_Gouraud", "1" }, { "pp_ClipInside", "0" }, {0, 0} }; @@ -275,7 +308,8 @@ const ComPtr& D3DShaders::getShader(bool pp_Texture, bool | (trilinear << 10) | (palette << 11) | (gouraud << 12) - | (clipInside << 13); + | (clipInside << 13) + | ((int)config::NativeDepthInterpolation << 14); auto it = shaders.find(hash); if (it == shaders.end()) { @@ -293,6 +327,7 @@ const ComPtr& D3DShaders::getShader(bool pp_Texture, bool PixelMacros[MacroPalette].Definition = MacroValues[palette]; PixelMacros[MacroGouraud].Definition = MacroValues[gouraud]; PixelMacros[MacroClipInside].Definition = MacroValues[clipInside]; + PixelMacros[MacroDivPosZ].Definition = MacroValues[config::NativeDepthInterpolation]; ComPtr shader = compilePS(PixelShader, "main", PixelMacros); verify((bool )shader); it = shaders.insert(std::make_pair(hash, shader)).first; @@ -302,10 +337,11 @@ const ComPtr& D3DShaders::getShader(bool pp_Texture, bool const ComPtr& D3DShaders::getVertexShader(bool gouraud) { - ComPtr& vertexShader = gouraud ? gouraudVertexShader : flatVertexShader; + ComPtr& vertexShader = vertexShaders[(int)gouraud | ((int)config::NativeDepthInterpolation << 1)]; if (!vertexShader) { - VertexMacros[0].Definition = MacroValues[gouraud]; + VertexMacros[MacroGouraud].Definition = MacroValues[gouraud]; + VertexMacros[MacroDivPosZ].Definition = MacroValues[config::NativeDepthInterpolation]; vertexShader = compileVS(VertexShader, "main", VertexMacros); } @@ -314,8 +350,12 @@ const ComPtr& D3DShaders::getVertexShader(bool gouraud) const ComPtr& D3DShaders::getModVolShader() { + ComPtr& modVolShader = modVolShaders[config::NativeDepthInterpolation]; if (!modVolShader) + { + PixelMacros[MacroDivPosZ].Definition = MacroValues[config::NativeDepthInterpolation]; modVolShader = compilePS(PixelShader, "modifierVolume", PixelMacros); + } return modVolShader; } diff --git a/core/rend/dx9/d3d_shaders.h b/core/rend/dx9/d3d_shaders.h index 1741ad77a..3ea5eccb7 100644 --- a/core/rend/dx9/d3d_shaders.h +++ b/core/rend/dx9/d3d_shaders.h @@ -36,9 +36,10 @@ public: const ComPtr& getModVolShader(); void term() { shaders.clear(); - gouraudVertexShader.reset(); - flatVertexShader.reset(); - modVolShader.reset(); + for (auto& shader : vertexShaders) + shader.reset(); + for (auto& shader : modVolShaders) + shader.reset(); device.reset(); } @@ -49,7 +50,6 @@ private: ComPtr device; std::unordered_map> shaders; - ComPtr gouraudVertexShader; - ComPtr flatVertexShader; - ComPtr modVolShader; + ComPtr vertexShaders[4]; + ComPtr modVolShaders[2]; }; diff --git a/core/rend/gl4/abuffer.cpp b/core/rend/gl4/abuffer.cpp index 054f4b3f5..02100784d 100644 --- a/core/rend/gl4/abuffer.cpp +++ b/core/rend/gl4/abuffer.cpp @@ -198,7 +198,7 @@ void main(void) )"; static const char *tr_modvol_shader_source = R"( -noperspective in vec3 vtx_uv; +in vec3 vtx_uv; // Must match ModifierVolumeMode enum values #define MV_XOR 0 diff --git a/core/rend/gl4/gl4.h b/core/rend/gl4/gl4.h index 0fe895e4f..80df0d109 100755 --- a/core/rend/gl4/gl4.h +++ b/core/rend/gl4/gl4.h @@ -100,6 +100,7 @@ struct gl4PipelineShader bool fog_clamping; bool palette; bool naomi2; + bool divPosZ; }; diff --git a/core/rend/gl4/gl4naomi2.cpp b/core/rend/gl4/gl4naomi2.cpp index cc1f5c4c3..c40a29f2d 100644 --- a/core/rend/gl4/gl4naomi2.cpp +++ b/core/rend/gl4/gl4naomi2.cpp @@ -25,14 +25,14 @@ static const char *gouraudSource = R"( #if pp_Gouraud == 0 #define INTERPOLATION flat #else -#define INTERPOLATION noperspective +#define INTERPOLATION #endif -#define NOPERSPECTIVE noperspective )"; N2Vertex4Source::N2Vertex4Source(const gl4PipelineShader* shader) : OpenGl4Source() { addConstant("OIT_RENDER"); + addConstant("DIV_POS_Z", false); if (shader == nullptr) { addConstant("POSITION_ONLY", 1); diff --git a/core/rend/gl4/gldraw.cpp b/core/rend/gl4/gldraw.cpp index 3f2e94c14..ebd7fcb03 100644 --- a/core/rend/gl4/gldraw.cpp +++ b/core/rend/gl4/gldraw.cpp @@ -54,6 +54,7 @@ static gl4PipelineShader *gl4GetProgram(bool cp_AlphaTest, bool pp_InsideClippin rv <<= 1; rv |= (int)palette; rv <<= 1; rv |= (int)naomi2; rv <<= 2; rv |= (int)pass; + rv <<= 1; rv |= (int)(!settings.platform.isNaomi2() && config::NativeDepthInterpolation); gl4PipelineShader *shader = &gl4.shaders[rv]; if (shader->program == 0) @@ -73,6 +74,7 @@ static gl4PipelineShader *gl4GetProgram(bool cp_AlphaTest, bool pp_InsideClippin shader->palette = palette; shader->naomi2 = naomi2; shader->pass = pass; + shader->divPosZ = !settings.platform.isNaomi2() && config::NativeDepthInterpolation; gl4CompilePipelineShader(shader); } diff --git a/core/rend/gl4/gles.cpp b/core/rend/gl4/gles.cpp index 37ac0f8b5..990a534fd 100644 --- a/core/rend/gl4/gles.cpp +++ b/core/rend/gl4/gles.cpp @@ -55,7 +55,7 @@ static const char* VertexShaderSource = R"( #if pp_Gouraud == 0 #define INTERPOLATION flat #else -#define INTERPOLATION noperspective +#define INTERPOLATION #endif // Uniforms @@ -74,31 +74,39 @@ in vec2 in_uv1; // Output INTERPOLATION out vec4 vtx_base; INTERPOLATION out vec4 vtx_offs; -noperspective out vec3 vtx_uv; +out vec3 vtx_uv; INTERPOLATION out vec4 vtx_base1; INTERPOLATION out vec4 vtx_offs1; -noperspective out vec2 vtx_uv1; +out vec2 vtx_uv1; flat out uint vtx_index; void main() { vec4 vpos = ndcMat * in_pos; + #if DIV_POS_Z == 1 + vpos /= vpos.z; + vpos.z = vpos.w; + #endif vtx_base = in_base; vtx_offs = in_offs; - vtx_uv = vec3(in_uv * vpos.z, vpos.z); + vtx_uv = vec3(in_uv, vpos.z); vtx_base1 = in_base1; vtx_offs1 = in_offs1; - vtx_uv1 = in_uv1 * vpos.z; + vtx_uv1 = in_uv1; vtx_index = uint(pp_Number) + uint(gl_VertexID); -#if pp_Gouraud == 1 - vtx_base *= vpos.z; - vtx_offs *= vpos.z; - vtx_base1 *= vpos.z; - vtx_offs1 *= vpos.z; -#endif + #if pp_Gouraud == 1 && DIV_POS_Z != 1 + vtx_base *= vpos.z; + vtx_offs *= vpos.z; + vtx_base1 *= vpos.z; + vtx_offs1 *= vpos.z; + #endif - vpos.w = 1.0; - vpos.z = 0.0; + #if DIV_POS_Z != 1 + vtx_uv.xy *= vpos.z; + vtx_uv1 *= vpos.z; + vpos.w = 1.0; + vpos.z = 0.0; + #endif gl_Position = vpos; } )"; @@ -123,7 +131,7 @@ out vec4 FragColor; #if pp_Gouraud == 0 #define INTERPOLATION flat #else -#define INTERPOLATION noperspective +#define INTERPOLATION #endif // Uniforms @@ -155,15 +163,21 @@ uniform int fog_control[2]; // Input INTERPOLATION in vec4 vtx_base; INTERPOLATION in vec4 vtx_offs; -noperspective in vec3 vtx_uv; +in vec3 vtx_uv; INTERPOLATION in vec4 vtx_base1; INTERPOLATION in vec4 vtx_offs1; -noperspective in vec2 vtx_uv1; +in vec2 vtx_uv1; flat in uint vtx_index; float fog_mode2(float w) { - float z = clamp(w * sp_FOG_DENSITY, 1.0, 255.9999); + float z = clamp( +#if DIV_POS_Z == 1 + sp_FOG_DENSITY / w +#else + sp_FOG_DENSITY * w +#endif + , 1.0, 255.9999); float exp = floor(log2(z)); float m = z * 16.0 / pow(2.0, exp) - 16.0; float idx = floor(m) + exp * 16.0 + 0.5; @@ -184,7 +198,12 @@ vec4 fog_clamp(vec4 col) vec4 palettePixel(sampler2D tex, vec3 coords) { - int color_idx = int(floor(textureProj(tex, coords).r * 255.0 + 0.5)) + palette_index; +#if DIV_POS_Z == 1 + float colIdx = texture(tex, coords.xy).r; +#else + float colIdx = textureProj(tex, coords).r; +#endif + int color_idx = int(floor(colIdx * 255.0 + 0.5)) + palette_index; ivec2 c = ivec2(color_idx % 32, color_idx / 32); return texelFetch(palette, c, 0); } @@ -233,7 +252,7 @@ void main() } #endif #endif - #if pp_Gouraud == 1 + #if pp_Gouraud == 1 && DIV_POS_Z != 1 color /= vtx_uv.z; offset /= vtx_uv.z; #endif @@ -250,10 +269,17 @@ void main() { vec4 texcol; #if pp_Palette == 0 - if (area1) - texcol = textureProj(tex1, vec3(vtx_uv1.xy, vtx_uv.z)); - else - texcol = textureProj(tex0, vtx_uv); + #if DIV_POS_Z == 1 + if (area1) + texcol = texture(tex1, vtx_uv1); + else + texcol = texture(tex0, vtx_uv.xy); + #else + if (area1) + texcol = textureProj(tex1, vec3(vtx_uv1.xy, vtx_uv.z)); + else + texcol = textureProj(tex0, vtx_uv); + #endif #else if (area1) texcol = palettePixel(tex1, vec3(vtx_uv1.xy, vtx_uv.z)); @@ -415,7 +441,7 @@ void main() )"; static const char* ModifierVolumeShader = R"( -noperspective in vec3 vtx_uv; +in vec3 vtx_uv; void main() { @@ -426,8 +452,9 @@ void main() class Vertex4Source : public OpenGl4Source { public: - Vertex4Source(bool gouraud) : OpenGl4Source() { + Vertex4Source(bool gouraud, bool divPosZ) : OpenGl4Source() { addConstant("pp_Gouraud", gouraud); + addConstant("DIV_POS_Z", divPosZ); addSource(VertexShaderSource); } @@ -453,6 +480,7 @@ public: addConstant("pp_Palette", s->palette); addConstant("NOUVEAU", gl.mesa_nouveau); addConstant("PASS", (int)s->pass); + addConstant("DIV_POS_Z", s->divPosZ); addSource(ShaderHeader); addSource(gl4PixelPipelineShader); @@ -472,7 +500,7 @@ bool gl4CompilePipelineShader(gl4PipelineShader* s, const char *fragment_source if (s->naomi2) vertexSource = N2Vertex4Source(s).generate(); else - vertexSource = Vertex4Source(s->pp_Gouraud).generate(); + vertexSource = Vertex4Source(s->pp_Gouraud, s->divPosZ).generate(); Fragment4ShaderSource fragmentSource(s); s->program = gl_CompileAndLink(vertex_source != nullptr ? vertex_source : vertexSource.c_str(), @@ -588,15 +616,17 @@ static void create_modvol_shader() { if (gl4.modvol_shader.program != 0) return; - Vertex4Source vertexShader(false); + Vertex4Source vertexShader(false, config::NativeDepthInterpolation); OpenGl4Source fragmentShader; - fragmentShader.addSource(ShaderHeader) + fragmentShader.addConstant("DIV_POS_Z", config::NativeDepthInterpolation) + .addSource(ShaderHeader) .addSource(ModifierVolumeShader); gl4.modvol_shader.program = gl_CompileAndLink(vertexShader.generate().c_str(), fragmentShader.generate().c_str()); gl4.modvol_shader.ndcMat = glGetUniformLocation(gl4.modvol_shader.program, "ndcMat"); N2Vertex4Source n2VertexShader; + fragmentShader.setConstant("DIV_POS_Z", false); gl4.n2ModVolShader.program = gl_CompileAndLink(n2VertexShader.generate().c_str(), fragmentShader.generate().c_str()); gl4.n2ModVolShader.ndcMat = glGetUniformLocation(gl4.n2ModVolShader.program, "ndcMat"); gl4.n2ModVolShader.mvMat = glGetUniformLocation(gl4.n2ModVolShader.program, "mvMat"); @@ -626,8 +656,6 @@ static bool gl_create_resources() gl4SetupModvolVBO(); } - create_modvol_shader(); - initQuad(); glCheck(); @@ -757,8 +785,9 @@ static bool RenderFrame(int width, int height) pvrrc.fog_clamp_min.getRGBAColor(gl4ShaderUniforms.fog_clamp_min); pvrrc.fog_clamp_max.getRGBAColor(gl4ShaderUniforms.fog_clamp_max); - if (config::Fog) + if (config::ModifierVolumes) { + create_modvol_shader(); glcache.UseProgram(gl4.modvol_shader.program); glUniformMatrix4fv(gl4.modvol_shader.ndcMat, 1, GL_FALSE, &gl4ShaderUniforms.ndcMat[0][0]); diff --git a/core/rend/gl4/glsl.h b/core/rend/gl4/glsl.h index 9c83a38e5..de8bcc70f 100644 --- a/core/rend/gl4/glsl.h +++ b/core/rend/gl4/glsl.h @@ -40,7 +40,11 @@ struct Pixel { \n\ \n\ void setFragDepth(float z) \n\ { \n\ +#if DIV_POS_Z == 1 \n\ + float w = 100000.0 / z; \n\ +#else \n\ float w = 100000.0 * z; \n\ +#endif \n\ gl_FragDepth = log2(1.0 + w) / 34.0; \n\ } \n\ \n\ diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 1c6bc14dc..85469bdc2 100644 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -69,22 +69,9 @@ const char *PixelCompatShader = R"( )"; const char* GouraudSource = R"( -#if TARGET_GL == GL3 - #define NOPERSPECTIVE noperspective - #if pp_Gouraud == 0 - #define INTERPOLATION flat - #else - #define INTERPOLATION noperspective - #endif -#elif TARGET_GL == GLES3 - #define NOPERSPECTIVE - #if pp_Gouraud == 0 - #define INTERPOLATION flat - #else - #define INTERPOLATION - #endif +#if (TARGET_GL == GL3 || TARGET_GL == GLES3) && pp_Gouraud == 0 + #define INTERPOLATION flat #else - #define NOPERSPECTIVE #define INTERPOLATION #endif )"; @@ -103,7 +90,7 @@ in highp vec2 in_uv; /* output */ INTERPOLATION out highp vec4 vtx_base; INTERPOLATION out highp vec4 vtx_offs; -NOPERSPECTIVE out highp vec3 vtx_uv; +out highp vec3 vtx_uv; void main() { @@ -116,13 +103,20 @@ void main() vpos.z = depth_scale.x + depth_scale.y * vpos.w; vpos.xy *= vpos.w; #else -#if pp_Gouraud == 1 - vtx_base *= vpos.z; - vtx_offs *= vpos.z; -#endif - vtx_uv = vec3(in_uv * vpos.z, vpos.z); - vpos.w = 1.0; - vpos.z = 0.0; + #if DIV_POS_Z == 1 + vpos /= vpos.z; + vpos.z = vpos.w; + #endif + #if pp_Gouraud == 1 && DIV_POS_Z != 1 + vtx_base *= vpos.z; + vtx_offs *= vpos.z; + #endif + vtx_uv = vec3(in_uv, vpos.z); + #if DIV_POS_Z != 1 + vtx_uv.xy *= vpos.z; + vpos.w = 1.0; + vpos.z = 0.0; + #endif #endif gl_Position = vpos; } @@ -149,15 +143,19 @@ uniform mediump int palette_index; /* Vertex input*/ INTERPOLATION in highp vec4 vtx_base; INTERPOLATION in highp vec4 vtx_offs; -NOPERSPECTIVE in highp vec3 vtx_uv; +in highp vec3 vtx_uv; lowp float fog_mode2(highp float w) { + highp float z = clamp( #if TARGET_GL == GLES2 - highp float z = clamp(vtx_uv.z, 1.0, 255.9999); + vtx_uv.z +#elif DIV_POS_Z == 1 + sp_FOG_DENSITY / w #else - highp float z = clamp(w * sp_FOG_DENSITY, 1.0, 255.9999); + sp_FOG_DENSITY * w #endif + , 1.0, 255.9999); mediump float exp = floor(log2(z)); highp float m = z * 16.0 / pow(2.0, exp) - 16.0; mediump float idx = floor(m) + exp * 16.0 + 0.5; @@ -178,7 +176,7 @@ highp vec4 fog_clamp(lowp vec4 col) lowp vec4 palettePixel(highp vec3 coords) { -#if TARGET_GL == GLES2 || TARGET_GL == GL2 +#if TARGET_GL == GLES2 || TARGET_GL == GL2 || DIV_POS_Z == 1 highp int color_idx = int(floor(texture(tex, coords.xy).FOG_CHANNEL * 255.0 + 0.5)) + palette_index; highp vec2 c = vec2((mod(float(color_idx), 32.0) * 2.0 + 1.0) / 64.0, (float(color_idx / 32) * 2.0 + 1.0) / 64.0); return texture(palette, c); @@ -208,7 +206,7 @@ void main() highp vec4 color = vtx_base; highp vec4 offset = vtx_offs; - #if pp_Gouraud == 1 && TARGET_GL != GLES2 + #if pp_Gouraud == 1 && TARGET_GL != GLES2 && DIV_POS_Z != 1 color /= vtx_uv.z; offset /= vtx_uv.z; #endif @@ -221,7 +219,7 @@ void main() #if pp_Texture==1 { #if pp_Palette == 0 - #if TARGET_GL == GLES2 || TARGET_GL == GL2 + #if TARGET_GL == GLES2 || TARGET_GL == GL2 || DIV_POS_Z == 1 lowp vec4 texcol = texture(tex, vtx_uv.xy); #else lowp vec4 texcol = textureProj(tex, vtx_uv); @@ -289,7 +287,11 @@ void main() //color.rgb = vec3(vtx_uv.z * sp_FOG_DENSITY / 128.0); #if TARGET_GL != GLES2 - highp float w = vtx_uv.z * 100000.0; +#if DIV_POS_Z == 1 + highp float w = 100000.0 / vtx_uv.z; +#else + highp float w = 100000.0 * vtx_uv.z; +#endif gl_FragDepth = log2(1.0 + w) / 34.0; #endif gl_FragColor = color; @@ -300,12 +302,16 @@ static const char* ModifierVolumeShader = R"( uniform lowp float sp_ShaderColor; /* Vertex input*/ -NOPERSPECTIVE in highp vec3 vtx_uv; +in highp vec3 vtx_uv; void main() { #if TARGET_GL != GLES2 - highp float w = vtx_uv.z * 100000.0; +#if DIV_POS_Z == 1 + highp float w = 100000.0 / vtx_uv.z; +#else + highp float w = 100000.0 * vtx_uv.z; +#endif gl_FragDepth = log2(1.0 + w) / 34.0; #endif gl_FragColor=vec4(0.0, 0.0, 0.0, sp_ShaderColor); @@ -635,19 +641,20 @@ PipelineShader *GetProgram(bool cp_AlphaTest, bool pp_InsideClipping, u32 rv=0; rv |= pp_InsideClipping; - rv<<=1; rv|=cp_AlphaTest; - rv<<=1; rv|=pp_Texture; - rv<<=1; rv|=pp_UseAlpha; - rv<<=1; rv|=pp_IgnoreTexA; - rv<<=2; rv|=pp_ShadInstr; - rv<<=1; rv|=pp_Offset; - rv<<=2; rv|=pp_FogCtrl; - rv<<=1; rv|=pp_Gouraud; - rv<<=1; rv|=pp_BumpMap; - rv<<=1; rv|=fog_clamping; - rv<<=1; rv|=trilinear; - rv<<=1; rv|=palette; - rv<<=1; rv|=naomi2; + rv <<= 1; rv |= cp_AlphaTest; + rv <<= 1; rv |= pp_Texture; + rv <<= 1; rv |= pp_UseAlpha; + rv <<= 1; rv |= pp_IgnoreTexA; + rv <<= 2; rv |= pp_ShadInstr; + rv <<= 1; rv |= pp_Offset; + rv <<= 2; rv |= pp_FogCtrl; + rv <<= 1; rv |= pp_Gouraud; + rv <<= 1; rv |= pp_BumpMap; + rv <<= 1; rv |= fog_clamping; + rv <<= 1; rv |= trilinear; + rv <<= 1; rv |= palette; + rv <<= 1; rv |= naomi2; + rv <<= 1, rv |= !settings.platform.isNaomi2() && config::NativeDepthInterpolation; PipelineShader *shader = &gl.shaders[rv]; if (shader->program == 0) @@ -666,6 +673,7 @@ PipelineShader *GetProgram(bool cp_AlphaTest, bool pp_InsideClipping, shader->trilinear = trilinear; shader->palette = palette; shader->naomi2 = naomi2; + shader->divPosZ = !settings.platform.isNaomi2() && config::NativeDepthInterpolation; CompilePipelineShader(shader); } @@ -675,8 +683,9 @@ PipelineShader *GetProgram(bool cp_AlphaTest, bool pp_InsideClipping, class VertexSource : public OpenGlSource { public: - VertexSource(bool gouraud) : OpenGlSource() { + VertexSource(bool gouraud, bool divPosZ) : OpenGlSource() { addConstant("pp_Gouraud", gouraud); + addConstant("DIV_POS_Z", divPosZ); addSource(VertexCompatShader); addSource(GouraudSource); @@ -702,6 +711,7 @@ public: addConstant("FogClamping", s->fog_clamping); addConstant("pp_TriLinear", s->trilinear); addConstant("pp_Palette", s->palette); + addConstant("DIV_POS_Z", s->divPosZ); addSource(PixelCompatShader); addSource(GouraudSource); @@ -715,7 +725,7 @@ bool CompilePipelineShader(PipelineShader* s) if (s->naomi2) vertexShader = N2VertexSource(s->pp_Gouraud, false, s->pp_Texture).generate(); else - vertexShader = VertexSource(s->pp_Gouraud).generate(); + vertexShader = VertexSource(s->pp_Gouraud, s->divPosZ).generate(); FragmentShaderSource fragmentSource(s); s->program = gl_CompileAndLink(vertexShader.c_str(), fragmentSource.generate().c_str()); @@ -860,10 +870,11 @@ static void create_modvol_shader() { if (gl.modvol_shader.program != 0) return; - VertexSource vertexShader(false); + VertexSource vertexShader(false, config::NativeDepthInterpolation); OpenGlSource fragmentShader; fragmentShader.addConstant("pp_Gouraud", 0) + .addConstant("DIV_POS_Z", config::NativeDepthInterpolation) .addSource(PixelCompatShader) .addSource(GouraudSource) .addSource(ModifierVolumeShader); @@ -876,6 +887,7 @@ static void create_modvol_shader() if (gl.gl_major >= 3) { N2VertexSource n2vertexShader(false, true, false); + fragmentShader.setConstant("DIV_POS_Z", false); gl.n2ModVolShader.program = gl_CompileAndLink(n2vertexShader.generate().c_str(), fragmentShader.generate().c_str()); gl.n2ModVolShader.ndcMat = glGetUniformLocation(gl.n2ModVolShader.program, "ndcMat"); gl.n2ModVolShader.sp_ShaderColor = glGetUniformLocation(gl.n2ModVolShader.program, "sp_ShaderColor"); @@ -885,7 +897,7 @@ static void create_modvol_shader() } } -bool gl_create_resources() +static bool gl_create_resources() { if (gl.vbo.geometry != nullptr) // Assume the resources have already been created @@ -903,7 +915,6 @@ bool gl_create_resources() gl.vbo.idxs = std::unique_ptr(new GlBuffer(GL_ELEMENT_ARRAY_BUFFER)); gl.vbo.idxs2 = std::unique_ptr(new GlBuffer(GL_ELEMENT_ARRAY_BUFFER)); - create_modvol_shader(); initQuad(); return true; @@ -911,8 +922,6 @@ bool gl_create_resources() GLuint gl_CompileShader(const char* shader,GLuint type); -bool gl_create_resources(); - //setup #ifndef __APPLE__ @@ -1200,17 +1209,21 @@ bool RenderFrame(int width, int height) pvrrc.fog_clamp_min.getRGBAColor(ShaderUniforms.fog_clamp_min); pvrrc.fog_clamp_max.getRGBAColor(ShaderUniforms.fog_clamp_max); - glcache.UseProgram(gl.modvol_shader.program); - if (gl.modvol_shader.depth_scale != -1) - glUniform4fv(gl.modvol_shader.depth_scale, 1, ShaderUniforms.depth_coefs); - glUniformMatrix4fv(gl.modvol_shader.ndcMat, 1, GL_FALSE, &ShaderUniforms.ndcMat[0][0]); - glUniform1f(gl.modvol_shader.sp_ShaderColor, 1 - FPU_SHAD_SCALE.scale_factor / 256.f); + if (config::ModifierVolumes) + { + create_modvol_shader(); + glcache.UseProgram(gl.modvol_shader.program); + if (gl.modvol_shader.depth_scale != -1) + glUniform4fv(gl.modvol_shader.depth_scale, 1, ShaderUniforms.depth_coefs); + glUniformMatrix4fv(gl.modvol_shader.ndcMat, 1, GL_FALSE, &ShaderUniforms.ndcMat[0][0]); + glUniform1f(gl.modvol_shader.sp_ShaderColor, 1 - FPU_SHAD_SCALE.scale_factor / 256.f); - glcache.UseProgram(gl.n2ModVolShader.program); - if (gl.n2ModVolShader.depth_scale != -1) - glUniform4fv(gl.n2ModVolShader.depth_scale, 1, ShaderUniforms.depth_coefs); - glUniformMatrix4fv(gl.n2ModVolShader.ndcMat, 1, GL_FALSE, &ShaderUniforms.ndcMat[0][0]); - glUniform1f(gl.n2ModVolShader.sp_ShaderColor, 1 - FPU_SHAD_SCALE.scale_factor / 256.f); + glcache.UseProgram(gl.n2ModVolShader.program); + if (gl.n2ModVolShader.depth_scale != -1) + glUniform4fv(gl.n2ModVolShader.depth_scale, 1, ShaderUniforms.depth_coefs); + glUniformMatrix4fv(gl.n2ModVolShader.ndcMat, 1, GL_FALSE, &ShaderUniforms.ndcMat[0][0]); + glUniform1f(gl.n2ModVolShader.sp_ShaderColor, 1 - FPU_SHAD_SCALE.scale_factor / 256.f); + } ShaderUniforms.PT_ALPHA=(PT_ALPHA_REF&0xFF)/255.0f; diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index b905320ea..f37cc613f 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -110,6 +110,7 @@ struct PipelineShader bool trilinear; bool palette; bool naomi2; + bool divPosZ; }; class GlBuffer diff --git a/core/rend/gles/naomi2.cpp b/core/rend/gles/naomi2.cpp index 52fd1d6d9..752b68f1f 100644 --- a/core/rend/gles/naomi2.cpp +++ b/core/rend/gles/naomi2.cpp @@ -46,10 +46,10 @@ INTERPOLATION out highp vec4 vtx_offs; #if pp_TwoVolumes == 1 INTERPOLATION out vec4 vtx_base1; INTERPOLATION out vec4 vtx_offs1; -noperspective out vec2 vtx_uv1; +out vec2 vtx_uv1; #endif #endif -NOPERSPECTIVE out highp vec3 vtx_uv; +out highp vec3 vtx_uv; #ifdef OIT_RENDER flat out uint vtx_index; #endif diff --git a/core/rend/gui.cpp b/core/rend/gui.cpp index 6525961ab..2558f60ea 100644 --- a/core/rend/gui.cpp +++ b/core/rend/gui.cpp @@ -1731,25 +1731,30 @@ static void gui_display_settings() #ifndef TARGET_IPHONE OptionCheckbox("VSync", config::VSync, "Synchronizes the frame rate with the screen refresh rate. Recommended"); - ImGui::Indent(); - if (!config::VSync || !isVulkan(config::RendererType)) + if (isVulkan(config::RendererType)) { - ImGui::PushItemFlag(ImGuiItemFlags_Disabled, true); - ImGui::PushStyleVar(ImGuiStyleVar_Alpha, ImGui::GetStyle().Alpha * 0.5f); + ImGui::Indent(); + if (!config::VSync) + { + ImGui::PushItemFlag(ImGuiItemFlags_Disabled, true); + ImGui::PushStyleVar(ImGuiStyleVar_Alpha, ImGui::GetStyle().Alpha * 0.5f); + } + OptionCheckbox("Duplicate frames", config::DupeFrames, "Duplicate frames on high refresh rate monitors (120 Hz and higher)"); + if (!config::VSync) + { + ImGui::PopItemFlag(); + ImGui::PopStyleVar(); + } + ImGui::Unindent(); } - OptionCheckbox("Duplicate frames", config::DupeFrames, "Duplicate frames on high refresh rate monitors (120 Hz and higher)"); - if (!config::VSync || !isVulkan(config::RendererType)) - { - ImGui::PopItemFlag(); - ImGui::PopStyleVar(); - } - ImGui::Unindent(); #endif OptionCheckbox("Show FPS Counter", config::ShowFPS, "Show on-screen frame/sec counter"); OptionCheckbox("Show VMU In-game", config::FloatVMUs, "Show the VMU LCD screens while in-game"); OptionCheckbox("Rotate Screen 90°", config::Rotate90, "Rotate the screen 90° counterclockwise"); OptionCheckbox("Delay Frame Swapping", config::DelayFrameSwapping, "Useful to avoid flashing screen or glitchy videos. Not recommended on slow platforms"); + OptionCheckbox("Native Depth Interpolation", config::NativeDepthInterpolation, + "Helps with texture corruption and depth issues on AMD GPUs. Can also help Intel GPUs in some cases."); constexpr int apiCount = 0 #ifdef USE_VULKAN + 1 diff --git a/core/rend/vulkan/oit/oit_pipeline.cpp b/core/rend/vulkan/oit/oit_pipeline.cpp index 89dcf8b04..e8af79f6b 100644 --- a/core/rend/vulkan/oit/oit_pipeline.cpp +++ b/core/rend/vulkan/oit/oit_pipeline.cpp @@ -140,8 +140,9 @@ void OITPipelineManager::CreatePipeline(u32 listType, bool autosort, const PolyP vk::PipelineDynamicStateCreateInfo pipelineDynamicStateCreateInfo(vk::PipelineDynamicStateCreateFlags(), 2, dynamicStates); bool twoVolume = pp.tsp1.full != (u32)-1 || pp.tcw1.full != (u32)-1; + bool divPosZ = !settings.platform.isNaomi2() && config::NativeDepthInterpolation; vk::ShaderModule vertex_module = shaderManager->GetVertexShader( - OITShaderManager::VertexShaderParams{ pp.pcw.Gouraud == 1, pp.isNaomi2(), pass != Pass::Depth, twoVolume, pp.pcw.Texture == 1 }); + OITShaderManager::VertexShaderParams{ pp.pcw.Gouraud == 1, pp.isNaomi2(), pass != Pass::Depth, twoVolume, pp.pcw.Texture == 1, divPosZ }); OITShaderManager::FragmentShaderParams params = {}; params.alphaTest = listType == ListType_Punch_Through; params.bumpmap = pp.tcw.PixelFmt == PixelBumpMap; @@ -157,6 +158,7 @@ void OITPipelineManager::CreatePipeline(u32 listType, bool autosort, const PolyP params.pass = pass; params.twoVolume = twoVolume; params.palette = gpuPalette; + params.divPosZ = divPosZ; vk::ShaderModule fragment_module = shaderManager->GetFragmentShader(params); vk::PipelineShaderStageCreateInfo stages[] = { @@ -440,8 +442,8 @@ void OITPipelineManager::CreateModVolPipeline(ModVolMode mode, int cullMode, boo vk::DynamicState dynamicStates[2] = { vk::DynamicState::eViewport, vk::DynamicState::eScissor }; vk::PipelineDynamicStateCreateInfo pipelineDynamicStateCreateInfo(vk::PipelineDynamicStateCreateFlags(), 2, dynamicStates); - vk::ShaderModule vertex_module = shaderManager->GetModVolVertexShader(naomi2); - vk::ShaderModule fragment_module = shaderManager->GetModVolShader(); + vk::ShaderModule vertex_module = shaderManager->GetModVolVertexShader(OITShaderManager::ModVolShaderParams{ naomi2, !settings.platform.isNaomi2() && config::NativeDepthInterpolation }); + vk::ShaderModule fragment_module = shaderManager->GetModVolShader(!settings.platform.isNaomi2() && config::NativeDepthInterpolation); vk::PipelineShaderStageCreateInfo stages[] = { { vk::PipelineShaderStageCreateFlags(), vk::ShaderStageFlagBits::eVertex, vertex_module, "main" }, @@ -535,8 +537,9 @@ void OITPipelineManager::CreateTrModVolPipeline(ModVolMode mode, int cullMode, b vk::DynamicState dynamicStates[2] = { vk::DynamicState::eViewport, vk::DynamicState::eScissor }; vk::PipelineDynamicStateCreateInfo pipelineDynamicStateCreateInfo(vk::PipelineDynamicStateCreateFlags(), 2, dynamicStates); - vk::ShaderModule vertex_module = shaderManager->GetModVolVertexShader(naomi2); - vk::ShaderModule fragment_module = shaderManager->GetTrModVolShader(mode); + bool divPosZ = !settings.platform.isNaomi2() && config::NativeDepthInterpolation; + vk::ShaderModule vertex_module = shaderManager->GetModVolVertexShader(OITShaderManager::ModVolShaderParams{ naomi2, divPosZ }); + vk::ShaderModule fragment_module = shaderManager->GetTrModVolShader(OITShaderManager::TrModVolShaderParams{ mode, divPosZ }); vk::PipelineShaderStageCreateInfo stages[] = { { vk::PipelineShaderStageCreateFlags(), vk::ShaderStageFlagBits::eVertex, vertex_module, "main" }, diff --git a/core/rend/vulkan/oit/oit_pipeline.h b/core/rend/vulkan/oit/oit_pipeline.h index 538b3f3f0..7c51db72d 100644 --- a/core/rend/vulkan/oit/oit_pipeline.h +++ b/core/rend/vulkan/oit/oit_pipeline.h @@ -405,12 +405,13 @@ private: } hash |= (pp->isp.ZWriteDis << 20) | (pp->isp.CullMode << 21) | ((autosort ? 6 : pp->isp.DepthMode) << 23); hash |= ((u32)gpuPalette << 26) | ((u32)pass << 27) | ((u32)pp->isNaomi2() << 29); + hash |= (u32)(!settings.platform.isNaomi2() && config::NativeDepthInterpolation) << 30; return hash; } u32 hash(ModVolMode mode, int cullMode, bool naomi2) const { - return ((int)mode << 2) | cullMode | ((u32)naomi2 << 5); + return ((int)mode << 2) | cullMode | ((u32)naomi2 << 5) | ((u32)(!settings.platform.isNaomi2() && config::NativeDepthInterpolation) << 6); } vk::PipelineVertexInputStateCreateInfo GetMainVertexInputStateCreateInfo(bool full = true) const diff --git a/core/rend/vulkan/oit/oit_shaders.cpp b/core/rend/vulkan/oit/oit_shaders.cpp index d926cb6a4..df06af9f5 100644 --- a/core/rend/vulkan/oit/oit_shaders.cpp +++ b/core/rend/vulkan/oit/oit_shaders.cpp @@ -44,30 +44,38 @@ layout (location = 6) in mediump vec2 in_uv1; layout (location = 0) INTERPOLATION out highp vec4 vtx_base; layout (location = 1) INTERPOLATION out highp vec4 vtx_offs; -layout (location = 2) noperspective out highp vec3 vtx_uv; +layout (location = 2) out highp vec3 vtx_uv; layout (location = 3) INTERPOLATION out highp vec4 vtx_base1; // New for OIT, only for OP/PT with 2-volume layout (location = 4) INTERPOLATION out highp vec4 vtx_offs1; -layout (location = 5) noperspective out highp vec2 vtx_uv1; +layout (location = 5) out highp vec2 vtx_uv1; layout (location = 6) flat out uint vtx_index; void main() { vec4 vpos = uniformBuffer.ndcMat * in_pos; +#if DIV_POS_Z == 1 + vpos /= vpos.z; + vpos.z = vpos.w; +#endif vtx_base = vec4(in_base) / 255.0; vtx_offs = vec4(in_offs) / 255.0; - vtx_uv = vec3(in_uv * vpos.z, vpos.z); + vtx_uv = vec3(in_uv, vpos.z); vtx_base1 = vec4(in_base1) / 255.0; vtx_offs1 = vec4(in_offs1) / 255.0; - vtx_uv1 = in_uv1 * vpos.z; -#if pp_Gouraud == 1 + vtx_uv1 = in_uv1; +#if pp_Gouraud == 1 && DIV_POS_Z != 1 vtx_base *= vpos.z; vtx_offs *= vpos.z; vtx_base1 *= vpos.z; vtx_offs1 *= vpos.z; #endif vtx_index = uint(pushConstants.polyNumber) + uint(gl_VertexIndex); +#if DIV_POS_Z != 1 + vtx_uv.xy *= vpos.z; + vtx_uv1 *= vpos.z; vpos.w = 1.0; vpos.z = 0.0; +#endif gl_Position = vpos; } )"; @@ -179,10 +187,10 @@ layout (input_attachment_index = 0, set = 0, binding = 5) uniform subpassInput D // Vertex input layout (location = 0) INTERPOLATION in highp vec4 vtx_base; layout (location = 1) INTERPOLATION in highp vec4 vtx_offs; -layout (location = 2) noperspective in highp vec3 vtx_uv; +layout (location = 2) in highp vec3 vtx_uv; layout (location = 3) INTERPOLATION in highp vec4 vtx_base1; // new for OIT. Only if 2 vol layout (location = 4) INTERPOLATION in highp vec4 vtx_offs1; -layout (location = 5) noperspective in highp vec2 vtx_uv1; +layout (location = 5) in highp vec2 vtx_uv1; layout (location = 6) flat in uint vtx_index; #if pp_FogCtrl != 2 || pp_TwoVolumes == 1 @@ -190,7 +198,13 @@ layout (set = 0, binding = 2) uniform sampler2D fog_table; float fog_mode2(float w) { - float z = clamp(w * uniformBuffer.sp_FOG_DENSITY, 1.0, 255.9999); + float z = clamp( +#if DIV_POS_Z == 1 + uniformBuffer.sp_FOG_DENSITY / w +#else + uniformBuffer.sp_FOG_DENSITY * w +#endif + , 1.0, 255.9999); float exp = floor(log2(z)); float m = z * 16.0 / pow(2.0, exp) - 16.0; float idx = floor(m) + exp * 16.0 + 0.5; @@ -213,7 +227,12 @@ vec4 colorClamp(vec4 col) vec4 palettePixel(sampler2D tex, vec3 coords) { - vec4 c = vec4(textureProj(tex, coords).r * 255.0 / 1023.0 + pushConstants.palette_index, 0.5, 0.0, 0.0); +#if DIV_POS_Z == 1 + float texIdx = texture(tex, coords.xy).r; +#else + float texIdx = textureProj(tex, coords).r; +#endif + vec4 c = vec4(texIdx * 255.0 / 1023.0 + pushConstants.palette_index, 0.5, 0.0, 0.0); return texture(palette, c.xy); } @@ -261,7 +280,7 @@ void main() } #endif #endif - #if pp_Gouraud == 1 + #if pp_Gouraud == 1 && DIV_POS_Z != 1 color /= vtx_uv.z; offset /= vtx_uv.z; #endif @@ -280,14 +299,22 @@ void main() #if pp_TwoVolumes == 1 if (area1) #if pp_Palette == 0 - texcol = textureProj(tex1, vec3(vtx_uv1, vtx_uv.z)); + #if DIV_POS_Z == 1 + texcol = texture(tex1, vtx_uv1); + #else + texcol = textureProj(tex1, vec3(vtx_uv1, vtx_uv.z)); + #endif #else texcol = palettePixel(tex1, vec3(vtx_uv1, vtx_uv.z)); #endif else #endif #if pp_Palette == 0 + #if DIV_POS_Z == 1 + texcol = texture(tex0, vtx_uv.xy); + #else texcol = textureProj(tex0, vtx_uv); + #endif #else texcol = palettePixel(tex0, vtx_uv); #endif @@ -428,7 +455,7 @@ void main() Pixel pixel; pixel.color = packColors(clamp(color, vec4(0.0), vec4(1.0))); - pixel.depth = vtx_uv.z; + pixel.depth = gl_FragDepth; pixel.seq_num = vtx_index; pixel.next = atomicExchange(abufferPointer.pointers[coords.x + coords.y * uniformBuffer.viewportWidth], idx); PixelBuffer.pixels[idx] = pixel; @@ -438,7 +465,7 @@ void main() )"; static const char OITModifierVolumeShader[] = R"( -layout (location = 0) noperspective in highp float depth; +layout (location = 0) in highp float depth; void main() { @@ -614,7 +641,7 @@ void main(void) )"; static const char OITTranslucentModvolShaderSource[] = R"( -layout (location = 0) noperspective in highp float depth; +layout (location = 0) in highp float depth; // Must match ModifierVolumeMode enum values #define MV_XOR 0 @@ -624,6 +651,9 @@ layout (location = 0) noperspective in highp float depth; void main() { +#if MV_MODE == MV_XOR || MV_MODE == MV_OR + setFragDepth(depth); +#endif ivec2 coords = ivec2(gl_FragCoord.xy); uint idx = abufferPointer.pointers[coords.x + coords.y * uniformBuffer.viewportWidth]; @@ -635,10 +665,10 @@ void main() if (getShadowEnable(pp)) { #if MV_MODE == MV_XOR - if (depth >= pixel.depth) + if (gl_FragDepth >= pixel.depth) atomicXor(PixelBuffer.pixels[idx].seq_num, SHADOW_STENCIL); #elif MV_MODE == MV_OR - if (depth >= pixel.depth) + if (gl_FragDepth >= pixel.depth) atomicOr(PixelBuffer.pixels[idx].seq_num, SHADOW_STENCIL); #elif MV_MODE == MV_INCLUSION uint prev_val = atomicAnd(PixelBuffer.pixels[idx].seq_num, ~(SHADOW_STENCIL)); @@ -692,10 +722,10 @@ layout (location = 7) in vec3 in_normal; layout (location = 0) INTERPOLATION out highp vec4 vtx_base; layout (location = 1) INTERPOLATION out highp vec4 vtx_offs; -layout (location = 2) noperspective out highp vec3 vtx_uv; +layout (location = 2) out highp vec3 vtx_uv; layout (location = 3) INTERPOLATION out highp vec4 vtx_base1; layout (location = 4) INTERPOLATION out highp vec4 vtx_offs1; -layout (location = 5) noperspective out highp vec2 vtx_uv1; +layout (location = 5) out highp vec2 vtx_uv1; layout (location = 6) flat out uint vtx_index; void wDivide(inout vec4 vpos) @@ -779,7 +809,8 @@ vk::UniqueShaderModule OITShaderManager::compileShader(const VertexShaderParams& { VulkanSource src; src.addConstant("pp_Gouraud", (int)params.gouraud) - .addSource(GouraudSource); + .addConstant("DIV_POS_Z", (int)params.divPosZ) + .addSource(GouraudSource); if (params.naomi2) src.addConstant("pp_TwoVolumes", (int)params.twoVolume) .addConstant("LIGHT_ON", (int)params.lightOn) @@ -807,6 +838,7 @@ vk::UniqueShaderModule OITShaderManager::compileShader(const FragmentShaderParam .addConstant("pp_BumpMap", (int)params.bumpmap) .addConstant("ColorClamping", (int)params.clamping) .addConstant("pp_Palette", (int)params.palette) + .addConstant("DIV_POS_Z", (int)params.divPosZ) .addConstant("PASS", (int)params.pass) .addSource(GouraudSource) .addSource(OITShaderHeader) @@ -836,30 +868,31 @@ vk::UniqueShaderModule OITShaderManager::compileClearShader() return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eFragment, src.generate()); } -vk::UniqueShaderModule OITShaderManager::compileModVolVertexShader(bool naomi2) +vk::UniqueShaderModule OITShaderManager::compileShader(const ModVolShaderParams& params) { VulkanSource src; - if (naomi2) + if (params.naomi2) src.addSource(N2ModVolVertexShaderSource); else - src.addSource(ModVolVertexShaderSource); + src.addConstant("DIV_POS_Z", (int)params.divPosZ) + .addSource(ModVolVertexShaderSource); return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eVertex, src.generate()); } -vk::UniqueShaderModule OITShaderManager::compileModVolFragmentShader() +vk::UniqueShaderModule OITShaderManager::compileModVolFragmentShader(bool divPosZ) { VulkanSource src; - src.addSource(OITShaderHeader) + src.addConstant("DIV_POS_Z", (int)divPosZ) + .addSource(OITShaderHeader) .addSource(OITModifierVolumeShader); return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eFragment, src.generate()); } -void OITShaderManager::compileTrModVolFragmentShader(ModVolMode mode) +vk::UniqueShaderModule OITShaderManager::compileShader(const TrModVolShaderParams& params) { - if (trModVolShaders.empty()) - trModVolShaders.resize((size_t)ModVolMode::Final); VulkanSource src; src.addConstant("MAX_PIXELS_PER_FRAGMENT", config::PerPixelLayers) - .addConstant("MV_MODE", (int)mode) + .addConstant("MV_MODE", (int)params.mode) + .addConstant("DIV_POS_Z", (int)params.divPosZ) .addSource(OITShaderHeader) .addSource(OITTranslucentModvolShaderSource); - trModVolShaders[(size_t)mode] = ShaderCompiler::Compile(vk::ShaderStageFlagBits::eFragment, src.generate()); + return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eFragment, src.generate()); } diff --git a/core/rend/vulkan/oit/oit_shaders.h b/core/rend/vulkan/oit/oit_shaders.h index d241e7022..356cae7b2 100644 --- a/core/rend/vulkan/oit/oit_shaders.h +++ b/core/rend/vulkan/oit/oit_shaders.h @@ -35,9 +35,10 @@ public: bool lightOn; bool twoVolume; bool texture; + bool divPosZ; u32 hash() { return (u32)gouraud | ((u32)naomi2 << 1) | ((u32)lightOn << 2) - | ((u32)twoVolume << 3) | ((u32)texture << 4); } + | ((u32)twoVolume << 3) | ((u32)texture << 4) | ((u32)divPosZ << 5); } }; // alpha test, clip test, use alpha, texture, ignore alpha, shader instr, offset, fog, gouraud, bump, clamp @@ -56,6 +57,7 @@ public: bool clamping; bool twoVolume; bool palette; + bool divPosZ; Pass pass; u32 hash() @@ -64,39 +66,39 @@ public: | ((u32)texture << 3) | ((u32)ignoreTexAlpha << 4) | (shaderInstr << 5) | ((u32)offset << 7) | ((u32)fog << 8) | ((u32)gouraud << 10) | ((u32)bumpmap << 11) | ((u32)clamping << 12) | ((u32)twoVolume << 13) - | ((u32)palette << 14) | ((int)pass << 15); + | ((u32)palette << 14) | ((int)pass << 15) | ((u32)divPosZ << 17); } }; + struct ModVolShaderParams + { + bool naomi2; + bool divPosZ; + + u32 hash() { return (u32)naomi2 | ((u32)divPosZ << 1); } + }; + + struct TrModVolShaderParams + { + ModVolMode mode; + bool divPosZ; + + u32 hash() { return (u32)mode | ((u32)divPosZ << 3); } + }; + vk::ShaderModule GetVertexShader(const VertexShaderParams& params) { return getShader(vertexShaders, params); } vk::ShaderModule GetFragmentShader(const FragmentShaderParams& params) { return getShader(fragmentShaders, params); } - vk::ShaderModule GetModVolVertexShader(bool naomi2) - { - vk::UniqueShaderModule& shader = naomi2 ? n2ModVolVertexShader : modVolVertexShader; - if (!shader) - shader = compileModVolVertexShader(naomi2); - return *shader; - } - vk::ShaderModule GetModVolShader() + vk::ShaderModule GetModVolVertexShader(const ModVolShaderParams& params) { return getShader(modVolVertexShaders, params); } + + vk::ShaderModule GetModVolShader(bool divPosZ) { + auto& modVolShader = modVolShaders[divPosZ]; if (!modVolShader) - modVolShader = compileModVolFragmentShader(); + modVolShader = compileModVolFragmentShader(divPosZ); return *modVolShader; } - vk::ShaderModule GetTrModVolShader(ModVolMode mode) - { - if (trModVolShaders.empty() || !trModVolShaders[(size_t)mode] || maxLayers != config::PerPixelLayers) - { - if (maxLayers != config::PerPixelLayers) - { - trModVolShaders.clear(); - finalFragmentShader.reset(); - } - compileTrModVolFragmentShader(mode); - maxLayers = config::PerPixelLayers; - } - return *trModVolShaders[(size_t)mode]; - } + + vk::ShaderModule GetTrModVolShader(const TrModVolShaderParams& params) { return getShader(trModVolShaders, params); } vk::ShaderModule GetFinalShader() { @@ -126,27 +128,27 @@ private: template vk::ShaderModule getShader(std::map& map, T params) { - auto it = map.find(params.hash()); + u32 h = params.hash(); + auto it = map.find(h); if (it != map.end()) return it->second.get(); - map[params.hash()] = compileShader(params); - return map[params.hash()].get(); + map[h] = compileShader(params); + return map[h].get(); } vk::UniqueShaderModule compileShader(const VertexShaderParams& params); vk::UniqueShaderModule compileShader(const FragmentShaderParams& params); - vk::UniqueShaderModule compileModVolVertexShader(bool naomi2); - vk::UniqueShaderModule compileModVolFragmentShader(); - void compileTrModVolFragmentShader(ModVolMode mode); + vk::UniqueShaderModule compileShader(const ModVolShaderParams& params); + vk::UniqueShaderModule compileModVolFragmentShader(bool divPosZ); + vk::UniqueShaderModule compileShader(const TrModVolShaderParams& params); vk::UniqueShaderModule compileFinalShader(); vk::UniqueShaderModule compileFinalVertexShader(); vk::UniqueShaderModule compileClearShader(); std::map vertexShaders; std::map fragmentShaders; - vk::UniqueShaderModule modVolVertexShader; - vk::UniqueShaderModule n2ModVolVertexShader; - vk::UniqueShaderModule modVolShader; - std::vector trModVolShaders; + std::map modVolVertexShaders; + vk::UniqueShaderModule modVolShaders[2]; + std::map trModVolShaders; vk::UniqueShaderModule finalVertexShader; vk::UniqueShaderModule finalFragmentShader; diff --git a/core/rend/vulkan/pipeline.cpp b/core/rend/vulkan/pipeline.cpp index cf8adf16b..652bed89c 100644 --- a/core/rend/vulkan/pipeline.cpp +++ b/core/rend/vulkan/pipeline.cpp @@ -138,8 +138,9 @@ void PipelineManager::CreateModVolPipeline(ModVolMode mode, int cullMode, bool n vk::DynamicState dynamicStates[2] = { vk::DynamicState::eViewport, vk::DynamicState::eScissor }; vk::PipelineDynamicStateCreateInfo pipelineDynamicStateCreateInfo(vk::PipelineDynamicStateCreateFlags(), 2, dynamicStates); - vk::ShaderModule vertex_module = shaderManager->GetModVolVertexShader(naomi2); - vk::ShaderModule fragment_module = shaderManager->GetModVolShader(); + ModVolShaderParams shaderParams { naomi2, !settings.platform.isNaomi2() && config::NativeDepthInterpolation }; + vk::ShaderModule vertex_module = shaderManager->GetModVolVertexShader(shaderParams); + vk::ShaderModule fragment_module = shaderManager->GetModVolShader(!settings.platform.isNaomi2() && config::NativeDepthInterpolation); vk::PipelineShaderStageCreateInfo stages[] = { { vk::PipelineShaderStageCreateFlags(), vk::ShaderStageFlagBits::eVertex, vertex_module, "main" }, @@ -241,8 +242,9 @@ void PipelineManager::CreateDepthPassPipeline(int cullMode, bool naomi2) vk::DynamicState dynamicStates[2] = { vk::DynamicState::eViewport, vk::DynamicState::eScissor }; vk::PipelineDynamicStateCreateInfo pipelineDynamicStateCreateInfo(vk::PipelineDynamicStateCreateFlags(), 2, dynamicStates); - vk::ShaderModule vertex_module = shaderManager->GetModVolVertexShader(naomi2); - vk::ShaderModule fragment_module = shaderManager->GetModVolShader(); + ModVolShaderParams shaderParams { naomi2, !settings.platform.isNaomi2() && config::NativeDepthInterpolation }; + vk::ShaderModule vertex_module = shaderManager->GetModVolVertexShader(shaderParams); + vk::ShaderModule fragment_module = shaderManager->GetModVolShader(!settings.platform.isNaomi2() && config::NativeDepthInterpolation); vk::PipelineShaderStageCreateInfo stages[] = { { vk::PipelineShaderStageCreateFlags(), vk::ShaderStageFlagBits::eVertex, vertex_module, "main" }, @@ -392,7 +394,8 @@ void PipelineManager::CreatePipeline(u32 listType, bool sortTriangles, const Pol vk::DynamicState dynamicStates[2] = { vk::DynamicState::eViewport, vk::DynamicState::eScissor }; vk::PipelineDynamicStateCreateInfo pipelineDynamicStateCreateInfo(vk::PipelineDynamicStateCreateFlags(), 2, dynamicStates); - vk::ShaderModule vertex_module = shaderManager->GetVertexShader(VertexShaderParams{ pp.pcw.Gouraud == 1, pp.isNaomi2() }); + bool divPosZ = !settings.platform.isNaomi2() && config::NativeDepthInterpolation; + vk::ShaderModule vertex_module = shaderManager->GetVertexShader(VertexShaderParams { pp.pcw.Gouraud == 1, pp.isNaomi2(), divPosZ }); FragmentShaderParams params = {}; params.alphaTest = listType == ListType_Punch_Through; params.bumpmap = pp.tcw.PixelFmt == PixelBumpMap; @@ -407,6 +410,7 @@ void PipelineManager::CreatePipeline(u32 listType, bool sortTriangles, const Pol params.trilinear = pp.pcw.Texture && pp.tsp.FilterMode > 1 && listType != ListType_Punch_Through && pp.tcw.MipMapped == 1; params.useAlpha = pp.tsp.UseAlpha; params.palette = gpuPalette; + params.divPosZ = divPosZ; vk::ShaderModule fragment_module = shaderManager->GetFragmentShader(params); vk::PipelineShaderStageCreateInfo stages[] = { diff --git a/core/rend/vulkan/pipeline.h b/core/rend/vulkan/pipeline.h index 304d9e473..cf83b84fc 100644 --- a/core/rend/vulkan/pipeline.h +++ b/core/rend/vulkan/pipeline.h @@ -275,16 +275,17 @@ private: | (pp->tsp.DstInstr << 17); hash |= (pp->isp.ZWriteDis << 20) | (pp->isp.CullMode << 21) | (pp->isp.DepthMode << 23); hash |= ((u32)sortTriangles << 26) | ((u32)gpuPalette << 27) | ((u32)pp->isNaomi2() << 28); + hash |= (u32)(!settings.platform.isNaomi2() && config::NativeDepthInterpolation) << 29; return hash; } u32 hash(ModVolMode mode, int cullMode, bool naomi2) const { - return ((int)mode << 2) | cullMode | ((int)naomi2 << 5); + return ((int)mode << 2) | cullMode | ((int)naomi2 << 5) | ((int)(!settings.platform.isNaomi2() && config::NativeDepthInterpolation) << 6); } u32 hash(int cullMode, bool naomi2) const { - return cullMode | ((int)naomi2 << 2); + return cullMode | ((int)naomi2 << 2) | ((int)(!settings.platform.isNaomi2() && config::NativeDepthInterpolation) << 3); } vk::PipelineVertexInputStateCreateInfo GetMainVertexInputStateCreateInfo(bool full = true) const diff --git a/core/rend/vulkan/shaders.cpp b/core/rend/vulkan/shaders.cpp index 55e4a2fa5..75e327602 100644 --- a/core/rend/vulkan/shaders.cpp +++ b/core/rend/vulkan/shaders.cpp @@ -36,20 +36,28 @@ layout (location = 3) in mediump vec2 in_uv; layout (location = 0) INTERPOLATION out highp vec4 vtx_base; layout (location = 1) INTERPOLATION out highp vec4 vtx_offs; -layout (location = 2) noperspective out highp vec3 vtx_uv; +layout (location = 2) out highp vec3 vtx_uv; void main() { vec4 vpos = uniformBuffer.ndcMat * in_pos; +#if DIV_POS_Z == 1 + vpos /= vpos.z; + vpos.z = vpos.w; +#endif vtx_base = vec4(in_base) / 255.0; vtx_offs = vec4(in_offs) / 255.0; - vtx_uv = vec3(in_uv * vpos.z, vpos.z); -#if pp_Gouraud == 1 + vtx_uv = vec3(in_uv, vpos.z); +#if pp_Gouraud == 1 && DIV_POS_Z != 1 vtx_base *= vpos.z; vtx_offs *= vpos.z; #endif + +#if DIV_POS_Z != 1 + vtx_uv.xy *= vpos.z; vpos.w = 1.0; vpos.z = 0.0; +#endif gl_Position = vpos; } )"; @@ -87,14 +95,20 @@ layout (set = 0, binding = 3) uniform sampler2D palette; // Vertex input layout (location = 0) INTERPOLATION in highp vec4 vtx_base; layout (location = 1) INTERPOLATION in highp vec4 vtx_offs; -layout (location = 2) noperspective in highp vec3 vtx_uv; +layout (location = 2) in highp vec3 vtx_uv; #if pp_FogCtrl != 2 layout (set = 0, binding = 2) uniform sampler2D fog_table; float fog_mode2(float w) { - float z = clamp(w * uniformBuffer.sp_FOG_DENSITY, 1.0, 255.9999); + float z = clamp( +#if DIV_POS_Z == 1 + uniformBuffer.sp_FOG_DENSITY / w +#else + uniformBuffer.sp_FOG_DENSITY * w +#endif + , 1.0, 255.9999); float exp = floor(log2(z)); float m = z * 16.0 / pow(2.0, exp) - 16.0; float idx = floor(m) + exp * 16.0 + 0.5; @@ -116,7 +130,12 @@ vec4 colorClamp(vec4 col) vec4 palettePixel(sampler2D tex, vec3 coords) { - vec4 c = vec4(textureProj(tex, coords).r * 255.0 / 1023.0 + pushConstants.palette_index, 0.5, 0.0, 0.0); +#if DIV_POS_Z == 1 + float texIdx = texture(tex, coords.xy).r; +#else + float texIdx = textureProj(tex, coords).r; +#endif + vec4 c = vec4(texIdx * 255.0 / 1023.0 + pushConstants.palette_index, 0.5, 0.0, 0.0); return texture(palette, c.xy); } @@ -133,7 +152,7 @@ void main() highp vec4 color = vtx_base; highp vec4 offset = vtx_offs; - #if pp_Gouraud == 1 + #if pp_Gouraud == 1 && DIV_POS_Z != 1 color /= vtx_uv.z; offset /= vtx_uv.z; #endif @@ -141,12 +160,16 @@ void main() color.a = 1.0; #endif #if pp_FogCtrl == 3 - color = vec4(uniformBuffer.sp_FOG_COL_RAM.rgb, fog_mode2(gl_FragCoord.w)); + color = vec4(uniformBuffer.sp_FOG_COL_RAM.rgb, fog_mode2(vtx_uv.z)); #endif #if pp_Texture == 1 { #if pp_Palette == 0 - vec4 texcol = textureProj(tex, vtx_uv); + #if DIV_POS_Z == 1 + vec4 texcol = texture(tex, vtx_uv.xy); + #else + vec4 texcol = textureProj(tex, vtx_uv); + #endif #else vec4 texcol = palettePixel(tex, vtx_uv); #endif @@ -216,7 +239,11 @@ void main() //color.rgb = vec3(gl_FragCoord.w * uniformBuffer.sp_FOG_DENSITY / 128.0); - highp float w = vtx_uv.z * 100000.0; +#if DIV_POS_Z == 1 + highp float w = 100000.0 / vtx_uv.z; +#else + highp float w = 100000.0 * vtx_uv.z; +#endif gl_FragDepth = log2(1.0 + w) / 34.0; gl_FragColor = color; @@ -230,20 +257,26 @@ layout (std140, set = 0, binding = 0) uniform VertexShaderUniforms } uniformBuffer; layout (location = 0) in vec4 in_pos; -layout (location = 0) noperspective out highp float depth; +layout (location = 0) out highp float depth; void main() { vec4 vpos = uniformBuffer.ndcMat * in_pos; +#if DIV_POS_Z == 1 + vpos /= vpos.z; + vpos.z = vpos.w; + depth = vpos.w; +#else depth = vpos.z; vpos.w = 1.0; vpos.z = 0.0; +#endif gl_Position = vpos; } )"; static const char ModVolFragmentShaderSource[] = R"( -layout (location = 0) noperspective in highp float depth; +layout (location = 0) in highp float depth; layout (location = 0) out vec4 FragColor; layout (push_constant) uniform pushBlock @@ -253,7 +286,11 @@ layout (push_constant) uniform pushBlock void main() { - highp float w = depth * 100000.0; +#if DIV_POS_Z == 1 + highp float w = 100000.0 / depth; +#else + highp float w = 100000.0 * depth; +#endif gl_FragDepth = log2(1.0 + w) / 34.0; FragColor = vec4(0.0, 0.0, 0.0, pushConstants.sp_ShaderColor); } @@ -570,7 +607,7 @@ layout (location = 4) in vec3 in_normal; layout (location = 0) INTERPOLATION out highp vec4 vtx_base; layout (location = 1) INTERPOLATION out highp vec4 vtx_offs; -layout (location = 2) noperspective out highp vec3 vtx_uv; +layout (location = 2) out highp vec3 vtx_uv; void wDivide(inout vec4 vpos) { @@ -633,7 +670,7 @@ layout (std140, set = 1, binding = 2) uniform N2VertexShaderUniforms } n2Uniform; layout (location = 0) in vec4 in_pos; -layout (location = 0) noperspective out highp float depth; +layout (location = 0) out highp float depth; void wDivide(inout vec4 vpos) { @@ -660,6 +697,7 @@ vk::UniqueShaderModule ShaderManager::compileShader(const VertexShaderParams& pa if (!params.naomi2) { src.addConstant("pp_Gouraud", (int)params.gouraud) + .addConstant("DIV_POS_Z", (int)params.divPosZ) .addSource(GouraudSource) .addSource(VertexShaderSource); } @@ -689,20 +727,24 @@ vk::UniqueShaderModule ShaderManager::compileShader(const FragmentShaderParams& .addConstant("ColorClamping", (int)params.clamping) .addConstant("pp_TriLinear", (int)params.trilinear) .addConstant("pp_Palette", (int)params.palette) + .addConstant("DIV_POS_Z", (int)params.divPosZ) .addSource(GouraudSource) .addSource(FragmentShaderSource); return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eFragment, src.generate()); } -vk::UniqueShaderModule ShaderManager::compileModVolVertexShader(bool naomi2) +vk::UniqueShaderModule ShaderManager::compileShader(const ModVolShaderParams& params) { return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eVertex, - VulkanSource().addSource(naomi2 ? N2ModVolVertexShaderSource : ModVolVertexShaderSource).generate()); + VulkanSource().addConstant("DIV_POS_Z", (int)params.divPosZ) + .addSource(params.naomi2 ? N2ModVolVertexShaderSource : ModVolVertexShaderSource).generate()); } -vk::UniqueShaderModule ShaderManager::compileModVolFragmentShader() +vk::UniqueShaderModule ShaderManager::compileModVolFragmentShader(bool divPosZ) { - return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eFragment, VulkanSource().addSource(ModVolFragmentShaderSource).generate()); + return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eFragment, + VulkanSource().addConstant("DIV_POS_Z", (int)divPosZ) + .addSource(ModVolFragmentShaderSource).generate()); } vk::UniqueShaderModule ShaderManager::compileQuadVertexShader(bool rotate) diff --git a/core/rend/vulkan/shaders.h b/core/rend/vulkan/shaders.h index 971bca47c..f58ac0d44 100644 --- a/core/rend/vulkan/shaders.h +++ b/core/rend/vulkan/shaders.h @@ -29,8 +29,9 @@ struct VertexShaderParams { bool gouraud; bool naomi2; + bool divPosZ; - u32 hash() { return (u32)gouraud | ((u32)naomi2 << 1); } + u32 hash() { return (u32)gouraud | ((u32)naomi2 << 1) | ((u32)divPosZ << 2); } }; // alpha test, clip test, use alpha, texture, ignore alpha, shader instr, offset, fog, gouraud, bump, clamp, trilinear @@ -49,6 +50,7 @@ struct FragmentShaderParams bool clamping; bool trilinear; bool palette; + bool divPosZ; u32 hash() { @@ -56,10 +58,18 @@ struct FragmentShaderParams | ((u32)texture << 3) | ((u32)ignoreTexAlpha << 4) | (shaderInstr << 5) | ((u32)offset << 7) | ((u32)fog << 8) | ((u32)gouraud << 10) | ((u32)bumpmap << 11) | ((u32)clamping << 12) | ((u32)trilinear << 13) - | ((u32)palette << 14); + | ((u32)palette << 14) | ((u32)divPosZ << 15); } }; +struct ModVolShaderParams +{ + bool naomi2; + bool divPosZ; + + u32 hash() { return (u32)naomi2 | ((u32)divPosZ << 1); } +}; + // std140 alignment required struct VertexShaderUniforms { @@ -96,17 +106,13 @@ class ShaderManager public: vk::ShaderModule GetVertexShader(const VertexShaderParams& params) { return getShader(vertexShaders, params); } vk::ShaderModule GetFragmentShader(const FragmentShaderParams& params) { return getShader(fragmentShaders, params); } - vk::ShaderModule GetModVolVertexShader(bool naomi2) - { - vk::UniqueShaderModule& shader = naomi2 ? n2ModVolVertexShader : modVolVertexShader; - if (!shader) - shader = compileModVolVertexShader(naomi2); - return *shader; - } - vk::ShaderModule GetModVolShader() + vk::ShaderModule GetModVolVertexShader(const ModVolShaderParams& params) { return getShader(modVolVertexShaders, params); } + + vk::ShaderModule GetModVolShader(bool divPosZ) { + auto& modVolShader = modVolShaders[divPosZ]; if (!modVolShader) - modVolShader = compileModVolFragmentShader(); + modVolShader = compileModVolFragmentShader(divPosZ); return *modVolShader; } vk::ShaderModule GetQuadVertexShader(bool rotate = false) @@ -156,16 +162,17 @@ private: template vk::ShaderModule getShader(std::map& map, T params) { - auto it = map.find(params.hash()); + u32 h = params.hash(); + auto it = map.find(h); if (it != map.end()) return it->second.get(); - map[params.hash()] = compileShader(params); - return map[params.hash()].get(); + map[h] = compileShader(params); + return map[h].get(); } vk::UniqueShaderModule compileShader(const VertexShaderParams& params); vk::UniqueShaderModule compileShader(const FragmentShaderParams& params); - vk::UniqueShaderModule compileModVolVertexShader(bool naomi2); - vk::UniqueShaderModule compileModVolFragmentShader(); + vk::UniqueShaderModule compileShader(const ModVolShaderParams& params); + vk::UniqueShaderModule compileModVolFragmentShader(bool divPosZ); vk::UniqueShaderModule compileQuadVertexShader(bool rotate); vk::UniqueShaderModule compileQuadFragmentShader(bool ignoreTexAlpha); vk::UniqueShaderModule compileOSDVertexShader(); @@ -173,9 +180,8 @@ private: std::map vertexShaders; std::map fragmentShaders; - vk::UniqueShaderModule modVolVertexShader; - vk::UniqueShaderModule n2ModVolVertexShader; - vk::UniqueShaderModule modVolShader; + std::map modVolVertexShaders; + vk::UniqueShaderModule modVolShaders[2]; vk::UniqueShaderModule quadVertexShader; vk::UniqueShaderModule quadRotateVertexShader; vk::UniqueShaderModule quadFragmentShader; diff --git a/core/rend/vulkan/utils.h b/core/rend/vulkan/utils.h index 4d563cdc9..863affc0e 100644 --- a/core/rend/vulkan/utils.h +++ b/core/rend/vulkan/utils.h @@ -82,7 +82,7 @@ static const char GouraudSource[] = R"( #if pp_Gouraud == 0 #define INTERPOLATION flat #else -#define INTERPOLATION noperspective +#define INTERPOLATION #endif )"; diff --git a/shell/libretro/libretro_core_options.h b/shell/libretro/libretro_core_options.h index 7d479efc4..6d7d56993 100644 --- a/shell/libretro/libretro_core_options.h +++ b/shell/libretro/libretro_core_options.h @@ -534,6 +534,20 @@ struct retro_core_option_v2_definition option_defs_us[] = { "256", }, #endif + { + CORE_OPTION_NAME "_native_depth_interpolation", + "Native Depth Interpolation", + NULL, + "Helps with texture corruption and depth issues on AMD GPUs. Can also help Intel GPUs in some cases.", + NULL, + "video", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "disabled", + }, { CORE_OPTION_NAME "_threaded_rendering", "Threaded Rendering", diff --git a/shell/libretro/option.cpp b/shell/libretro/option.cpp index 15ab858aa..564282128 100644 --- a/shell/libretro/option.cpp +++ b/shell/libretro/option.cpp @@ -90,6 +90,7 @@ Option TextureFiltering(CORE_OPTION_NAME "_texture_filtering"); Option PowerVR2Filter(CORE_OPTION_NAME "_pvr2_filtering"); Option PixelBufferSize("", 512 * 1024 * 1024); IntOption PerPixelLayers(CORE_OPTION_NAME "_oit_layers"); +Option NativeDepthInterpolation(CORE_OPTION_NAME "_native_depth_interpolation"); // Misc