native depth interpolation option to help AMD gpus

When enabled, set 1/z in pos.w and use native interpolation for colors,
UV and depth.
Get rid of manual modvol clipping in dx11.
This commit is contained in:
flyinghead 2022-05-12 13:43:43 +02:00 committed by flyinghead
parent ddcd3861d3
commit bf6c73060b
30 changed files with 658 additions and 528 deletions

View File

@ -101,6 +101,7 @@ Option<int> TextureFiltering("rend.TextureFiltering", 0); // Default
Option<bool> ThreadedRendering("rend.ThreadedRendering", true);
Option<bool> DupeFrames("rend.DupeFrames", false);
Option<int> PerPixelLayers("rend.PerPixelLayers", 32);
Option<bool> NativeDepthInterpolation("rend.NativeDepthInterpolation", false);
// Misc

View File

@ -459,6 +459,7 @@ extern Option<int> AnisotropicFiltering;
extern Option<int> TextureFiltering; // 0: default, 1: force nearest, 2: force linear
extern Option<bool> ThreadedRendering;
extern Option<bool> DupeFrames;
extern Option<bool> NativeDepthInterpolation;
// Misc

View File

@ -318,154 +318,6 @@ bool DX11Renderer::Process(TA_context* ctx)
}
}
//
// Efficient Triangle and Quadrilateral Clipping within Shaders. M. McGuire
// Journal of Graphics GPU and Game Tools - November 2011
//
static glm::vec3 intersect(const glm::vec3& A, float Adist , const glm::vec3& B, float Bdist)
{
return (A * std::abs(Bdist) + B * std::abs(Adist)) / (std::abs(Adist) + std::abs(Bdist));
}
// Clip the triangle 'trig' with respect to the plane defined by the given point and normal vector.
static int sutherlandHodgmanClip(const glm::vec2& point, const glm::vec2& normal, ModTriangle& trig, ModTriangle& newTrig)
{
constexpr float clipEpsilon = 0.f; //0.00001;
constexpr float clipEpsilon2 = 0.f; //0.01;
glm::vec3 v0(trig.x0, trig.y0, trig.z0);
glm::vec3 v1(trig.x1, trig.y1, trig.z1);
glm::vec3 v2(trig.x2, trig.y2, trig.z2);
glm::vec3 dist = glm::vec3(
glm::dot(glm::vec2(v0) - point, normal),
glm::dot(glm::vec2(v1) - point, normal),
glm::dot(glm::vec2(v2) - point, normal));
if (!glm::any(glm::greaterThanEqual(dist , glm::vec3(clipEpsilon2))))
// all clipped
return 0;
if (glm::all(glm::greaterThanEqual(dist , glm::vec3(-clipEpsilon))))
// none clipped
return 3;
// There are either 1 or 2 vertices above the clipping plane.
glm::bvec3 above = glm::greaterThanEqual(dist, glm::vec3(0.f));
bool nextIsAbove;
glm::vec3 v3;
// Find the CCW-most vertex above the plane.
if (above[1] && !above[0])
{
// Cycle once CCW. Use v3 as a temp
nextIsAbove = above[2];
v3 = v0;
v0 = v1;
v1 = v2;
v2 = v3;
dist = glm::vec3(dist.y, dist.z, dist.x);
}
else if (above[2] && !above[1])
{
// Cycle once CW. Use v3 as a temp.
nextIsAbove = above[0];
v3 = v2;
v2 = v1;
v1 = v0;
v0 = v3;
dist = glm::vec3(dist.z, dist.x, dist.y);
}
else
nextIsAbove = above[1];
trig.x0 = v0.x;
trig.y0 = v0.y;
trig.z0 = v0.z;
// We always need to clip v2-v0.
v3 = intersect(v0, dist[0], v2, dist[2]);
if (nextIsAbove)
{
v2 = intersect(v1, dist[1], v2, dist[2]);
trig.x1 = v1.x;
trig.y1 = v1.y;
trig.z1 = v1.z;
trig.x2 = v2.x;
trig.y2 = v2.y;
trig.z2 = v2.z;
newTrig.x0 = v0.x;
newTrig.y0 = v0.y;
newTrig.z0 = v0.z;
newTrig.x1 = v2.x;
newTrig.y1 = v2.y;
newTrig.z1 = v2.z;
newTrig.x2 = v3.x;
newTrig.y2 = v3.y;
newTrig.z2 = v3.z;
return 4;
}
else
{
v1 = intersect(v0, dist[0], v1, dist[1]);
trig.x1 = v1.x;
trig.y1 = v1.y;
trig.z1 = v1.z;
trig.x2 = v3.x;
trig.y2 = v3.y;
trig.z2 = v3.z;
return 3;
}
}
static void clipModVols(List<ModifierVolumeParam>& params, std::vector<ModTriangle>& triangles)
{
for (ModifierVolumeParam& param : params)
{
std::vector<ModTriangle> trigs(&pvrrc.modtrig.head()[param.first], &pvrrc.modtrig.head()[param.first + param.count]);
std::vector<ModTriangle> nextTrigs;
nextTrigs.reserve(trigs.size());
for (int axis = 0; axis < 4; axis++)
{
glm::vec2 point;
glm::vec2 normal;
switch (axis)
{
case 0: // left
point = glm::vec2(-6400.f, 0.f);
normal = glm::vec2(1.f, 0.f);
break;
case 1: // top
point = glm::vec2(0.f, -4800.f);
normal = glm::vec2(0.f, 1.f);
break;
case 2: // right
point = glm::vec2(7040.f, 0.f);
normal = glm::vec2(-1.f, 0.f);
break;
case 3: // bottom
point = glm::vec2(-0.f, 5280.f);
normal = glm::vec2(0.f, -1.f);
break;
}
for (ModTriangle& trig : trigs)
{
ModTriangle newTrig;
int size = sutherlandHodgmanClip(point, normal, trig, newTrig);
if (size > 0)
{
nextTrigs.push_back(trig);
if (size == 4)
nextTrigs.push_back(newTrig);
}
}
std::swap(trigs, nextTrigs);
nextTrigs.clear();
}
param.first = (u32)triangles.size();
param.count = (u32)trigs.size();
triangles.insert(triangles.end(), trigs.begin(), trigs.end());
}
}
void DX11Renderer::configVertexShader()
{
matrices.CalcMatrices(&pvrrc, width, height);
@ -522,33 +374,12 @@ void DX11Renderer::uploadGeometryBuffers()
if (config::ModifierVolumes && pvrrc.modtrig.used())
{
const ModTriangle *data = nullptr;
u32 size = 0;
std::vector<ModTriangle> modVolTriangles;
if (!settings.platform.isNaomi2()) // TODO for naomi2 as well?
{
// clip triangles
modVolTriangles.reserve(pvrrc.modtrig.used());
clipModVols(pvrrc.global_param_mvo, modVolTriangles);
clipModVols(pvrrc.global_param_mvo_tr, modVolTriangles);
if (!modVolTriangles.empty())
{
size = (u32)(modVolTriangles.size() * sizeof(ModTriangle));
data = modVolTriangles.data();
}
}
else
{
size = pvrrc.modtrig.bytes();
data = pvrrc.modtrig.head();
}
if (size > 0)
{
verify(ensureBufferSize(modvolBuffer, D3D11_BIND_VERTEX_BUFFER, modvolBufferSize, size));
deviceContext->Map(modvolBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedSubres);
memcpy(mappedSubres.pData, data, size);
deviceContext->Unmap(modvolBuffer, 0);
}
const ModTriangle *data = pvrrc.modtrig.head();
u32 size = pvrrc.modtrig.bytes();
verify(ensureBufferSize(modvolBuffer, D3D11_BIND_VERTEX_BUFFER, modvolBufferSize, size));
deviceContext->Map(modvolBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedSubres);
memcpy(mappedSubres.pData, data, size);
deviceContext->Unmap(modvolBuffer, 0);
}
unsigned int stride = sizeof(Vertex);
unsigned int offset = 0;
@ -812,7 +643,6 @@ void DX11Renderer::setRenderState(const PolyParam *gp)
linearFiltering = false;
else
linearFiltering = true;
auto sampler = samplers->getSampler(linearFiltering, gp->tsp.ClampU, gp->tsp.ClampV, gp->tsp.FlipU, gp->tsp.FlipV);
deviceContext->PSSetSamplers(0, 1, &sampler.get());
}

View File

@ -59,18 +59,26 @@ VertexOut main(in VertexIn vin)
{
VertexOut vo;
vo.pos = mul(transMatrix, float4(vin.pos.xyz, 1.f));
#if pp_Gouraud == 1
vo.col = vin.col * vo.pos.z;
vo.spec = vin.spec * vo.pos.z;
#else
// flat shading: no interpolation
#if DIV_POS_Z == 1
vo.pos /= vo.pos.z;
vo.pos.z = vo.pos.w;
#endif
vo.col = vin.col;
vo.spec = vin.spec;
#if pp_Gouraud == 1 && DIV_POS_Z != 1
vo.col *= vo.pos.z;
vo.spec *= vo.pos.z;
#endif
vo.uv = float4(vin.uv * vo.pos.z, 0.f, vo.pos.z);
vo.uv.xyz = float3(vin.uv, 0.f);
#if DIV_POS_Z == 1
vo.uv.w = vo.pos.w;
#else
vo.uv.xy *= vo.pos.z;
vo.uv.w = vo.pos.z;
vo.pos.w = 1.f;
vo.pos.z = 0.f;
#endif
return vo;
}
@ -102,11 +110,15 @@ VertexOut main(in VertexIn vin)
{
VertexOut vo;
vo.pos = mul(transMatrix, float4(vin.pos.xyz, 1.f));
#if DIV_POS_Z == 1
vo.pos /= vo.pos.z;
vo.pos.z = vo.pos.w;
vo.uv = float4(0.f, 0.f, 0.f, vo.pos.w);
#else
vo.uv = float4(0.f, 0.f, 0.f, vo.pos.z);
vo.pos.w = 1.f;
vo.pos.z = 0.f;
#endif
return vo;
}
@ -159,7 +171,13 @@ cbuffer polyConstantBuffer : register(b1)
float fog_mode2(float w)
{
float z = clamp(w * fogDensity, 1.0f, 255.9999f);
float z = clamp(
#if DIV_POS_Z == 1
fogDensity / w
#else
fogDensity * w
#endif
, 1.0f, 255.9999f);
float exp = floor(log2(z));
float m = z * 16.0f / pow(2.0, exp) - 16.0f;
float idx = floor(m) + exp * 16.0f + 0.5f;
@ -201,17 +219,16 @@ PSO main(in Pixel inpix)
&& inpix.pos.y >= clipTest.y && inpix.pos.y <= clipTest.w)
discard;
#endif
#if pp_Gouraud == 1
float4 color = inpix.col / inpix.uv.w;
#if pp_BumpMap == 1 || pp_Offset == 1
float4 specular = inpix.spec / inpix.uv.w;
#endif
#else
float4 color = inpix.col;
#if pp_BumpMap == 1 || pp_Offset == 1
float4 specular = inpix.spec;
#endif
#endif
#if pp_Gouraud == 1 && DIV_POS_Z != 1
color /= inpix.uv.w;
#if pp_BumpMap == 1 || pp_Offset == 1
specular /= inpix.uv.w;
#endif
#endif
#if pp_UseAlpha == 0
color.a = 1.0f;
#endif
@ -220,7 +237,10 @@ PSO main(in Pixel inpix)
#endif
#if pp_Texture == 1
{
float2 uv = inpix.uv.xy / inpix.uv.w;
float2 uv = inpix.uv.xy;
#if DIV_POS_Z != 1
uv /= inpix.uv.w;
#endif
#if NearestWrapFix == 1
uv = min(fmod(uv, 1.f), 0.9997f);
#endif
@ -279,7 +299,11 @@ PSO main(in Pixel inpix)
#endif
PSO pso;
float w = inpix.uv.w * 100000.0f;
#if DIV_POS_Z == 1
float w = 100000.0f / inpix.uv.w;
#else
float w = 100000.0f * inpix.uv.w;
#endif
pso.z = log2(1.0f + w) / 34.0f;
pso.col = color;
@ -295,7 +319,11 @@ struct MVPixel
PSO modifierVolume(in MVPixel inpix)
{
PSO pso;
float w = inpix.uv.w * 100000.0f;
#if DIV_POS_Z == 1
float w = 100000.0f / inpix.uv.w;
#else
float w = 100000.0f * inpix.uv.w;
#endif
pso.z = log2(1.0f + w) / 34.0f;
pso.col = float4(0, 0, 0, 1.f - shadowScale);
@ -354,9 +382,18 @@ float4 main(in VertexIn vin) : SV_Target
const char * const MacroValues[] { "0", "1", "2", "3" };
enum VertexMacroEnum {
MacroGouraud,
MacroDivPosZ,
MacroPositionOnly,
MacroTwoVolumes,
MacroLightOn,
};
static D3D_SHADER_MACRO VertexMacros[]
{
{ "pp_Gouraud", "1" },
{ "DIV_POS_Z", "0" },
{ "POSITION_ONLY", "0" },
{ "pp_TwoVolumes", "0" },
{ "LIGHT_ON", "1" },
@ -364,8 +401,7 @@ static D3D_SHADER_MACRO VertexMacros[]
};
enum PixelMacroEnum {
MacroGouraud,
MacroTexture,
MacroTexture = 2,
MacroUseAlpha,
MacroIgnoreTexA,
MacroShadInstr,
@ -383,6 +419,7 @@ enum PixelMacroEnum {
static D3D_SHADER_MACRO PixelMacros[]
{
{ "pp_Gouraud", "1" },
{ "DIV_POS_Z", "0" },
{ "pp_Texture", "0" },
{ "pp_UseAlpha", "0" },
{ "pp_IgnoreTexA", "0" },
@ -403,6 +440,7 @@ const ComPtr<ID3D11PixelShader>& DX11Shaders::getShader(bool pp_Texture, bool pp
bool pp_Offset, u32 pp_FogCtrl, bool pp_BumpMap, bool fog_clamping,
bool trilinear, bool palette, bool gouraud, bool alphaTest, bool clipInside, bool nearestWrapFix)
{
bool divPosZ = !settings.platform.isNaomi2() && config::NativeDepthInterpolation;
const u32 hash = (int)pp_Texture
| (pp_UseAlpha << 1)
| (pp_IgnoreTexA << 2)
@ -416,7 +454,8 @@ const ComPtr<ID3D11PixelShader>& DX11Shaders::getShader(bool pp_Texture, bool pp
| (gouraud << 12)
| (alphaTest << 13)
| (clipInside << 14)
| (nearestWrapFix << 15);
| (nearestWrapFix << 15)
| (divPosZ << 16);
auto& shader = shaders[hash];
if (shader == nullptr)
{
@ -436,6 +475,7 @@ const ComPtr<ID3D11PixelShader>& DX11Shaders::getShader(bool pp_Texture, bool pp
PixelMacros[MacroAlphaTest].Definition = MacroValues[alphaTest];
PixelMacros[MacroClipInside].Definition = MacroValues[clipInside];
PixelMacros[MacroNearestWrapFix].Definition = MacroValues[nearestWrapFix];
PixelMacros[MacroDivPosZ].Definition = MacroValues[divPosZ];
shader = compilePS(PixelShader, "main", PixelMacros);
verify(shader != nullptr);
@ -445,20 +485,22 @@ const ComPtr<ID3D11PixelShader>& DX11Shaders::getShader(bool pp_Texture, bool pp
const ComPtr<ID3D11VertexShader>& DX11Shaders::getVertexShader(bool gouraud, bool naomi2)
{
int index = (int)gouraud | ((int)naomi2 << 1);
bool divPosZ = !settings.platform.isNaomi2() && config::NativeDepthInterpolation;
int index = (int)gouraud | ((int)naomi2 << 1) | ((int)divPosZ << 2);
ComPtr<ID3D11VertexShader>& vertexShader = vertexShaders[index];
if (!vertexShader)
{
VertexMacros[0].Definition = MacroValues[gouraud];
VertexMacros[MacroGouraud].Definition = MacroValues[gouraud];
if (!naomi2)
{
VertexMacros[MacroDivPosZ].Definition = MacroValues[divPosZ];
vertexShader = compileVS(VertexShader, "main", VertexMacros);
}
else
{
VertexMacros[1].Definition = MacroValues[false];
VertexMacros[2].Definition = MacroValues[false];
VertexMacros[3].Definition = MacroValues[true];
VertexMacros[MacroPositionOnly].Definition = MacroValues[false];
VertexMacros[MacroTwoVolumes].Definition = MacroValues[false];
VertexMacros[MacroLightOn].Definition = MacroValues[true];
std::string source(DX11N2VertexShader);
source += std::string("\n") + DX11N2ColorShader;
vertexShader = compileVS(source.c_str(), "main", VertexMacros);
@ -470,21 +512,26 @@ const ComPtr<ID3D11VertexShader>& DX11Shaders::getVertexShader(bool gouraud, boo
const ComPtr<ID3D11VertexShader>& DX11Shaders::getMVVertexShader(bool naomi2)
{
if (!modVolVertexShaders[naomi2])
bool divPosZ = !settings.platform.isNaomi2() && config::NativeDepthInterpolation;
int index = (int)naomi2 | ((int)divPosZ << 1);
if (!modVolVertexShaders[index])
{
if (!naomi2)
modVolVertexShaders[0] = compileVS(ModVolVertexShader, "main", nullptr);
{
VertexMacros[MacroDivPosZ].Definition = MacroValues[divPosZ];
modVolVertexShaders[index] = compileVS(ModVolVertexShader, "main", VertexMacros);
}
else
{
VertexMacros[0].Definition = MacroValues[false];
VertexMacros[1].Definition = MacroValues[true];
VertexMacros[2].Definition = MacroValues[false];
VertexMacros[3].Definition = MacroValues[false];
modVolVertexShaders[1] = compileVS(DX11N2VertexShader, "main", VertexMacros);
VertexMacros[MacroGouraud].Definition = MacroValues[false];
VertexMacros[MacroPositionOnly].Definition = MacroValues[true];
VertexMacros[MacroTwoVolumes].Definition = MacroValues[false];
VertexMacros[MacroLightOn].Definition = MacroValues[false];
modVolVertexShaders[index] = compileVS(DX11N2VertexShader, "main", VertexMacros);
}
}
return modVolVertexShaders[naomi2];
return modVolVertexShaders[index];
}
const ComPtr<ID3D11PixelShader>& DX11Shaders::getModVolShader()
@ -564,10 +611,10 @@ ComPtr<ID3D11PixelShader> DX11Shaders::compilePS(const char* source, const char*
ComPtr<ID3DBlob> DX11Shaders::getVertexShaderBlob()
{
VertexMacros[0].Definition = MacroValues[true];
VertexMacros[MacroGouraud].Definition = MacroValues[true];
// FIXME code dup
VertexMacros[1].Definition = MacroValues[false];
VertexMacros[2].Definition = MacroValues[false];
VertexMacros[MacroPositionOnly].Definition = MacroValues[false];
VertexMacros[MacroTwoVolumes].Definition = MacroValues[false];
std::string source(DX11N2VertexShader);
source += std::string("\n") + DX11N2ColorShader;
return compileShader(source.c_str(), "main", "vs_4_0", VertexMacros);
@ -576,9 +623,9 @@ ComPtr<ID3DBlob> DX11Shaders::getVertexShaderBlob()
ComPtr<ID3DBlob> DX11Shaders::getMVVertexShaderBlob()
{
// FIXME code dup
VertexMacros[0].Definition = MacroValues[false];
VertexMacros[1].Definition = MacroValues[true];
VertexMacros[2].Definition = MacroValues[false];
VertexMacros[MacroGouraud].Definition = MacroValues[false];
VertexMacros[MacroPositionOnly].Definition = MacroValues[true];
VertexMacros[MacroTwoVolumes].Definition = MacroValues[false];
return compileShader(DX11N2VertexShader, "main", "vs_4_0", VertexMacros);
}

View File

@ -70,9 +70,9 @@ private:
ComPtr<ID3D11Device> device;
std::unordered_map<u32, ComPtr<ID3D11PixelShader>> shaders;
ComPtr<ID3D11VertexShader> vertexShaders[4];
ComPtr<ID3D11VertexShader> vertexShaders[8];
ComPtr<ID3D11PixelShader> modVolShader;
ComPtr<ID3D11VertexShader> modVolVertexShaders[2];
ComPtr<ID3D11VertexShader> modVolVertexShaders[4];
ComPtr<ID3D11PixelShader> quadPixelShader;
ComPtr<ID3D11VertexShader> quadVertexShader;
ComPtr<ID3D11VertexShader> quadRotateVertexShader;

View File

@ -71,24 +71,33 @@ VertexOut main(in VertexIn vin)
{
VertexOut vo;
vo.pos = mul(transMatrix, float4(vin.pos.xyz, 1.f));
#if pp_Gouraud == 1
vo.col = vin.col * vo.pos.z;
vo.spec = vin.spec * vo.pos.z;
vo.col1 = vin.col1 * vo.pos.z;
vo.spec1 = vin.spec1 * vo.pos.z;
#else
// flat shading: no interpolation
#if DIV_POS_Z == 1
vo.pos /= vo.pos.z;
vo.pos.z = vo.pos.w;
#endif
vo.col = vin.col;
vo.spec = vin.spec;
vo.col1 = vin.col1;
vo.spec1 = vin.spec1;
#if pp_Gouraud == 1 && DIV_POS_Z != 1
vo.col *= vo.pos.z;
vo.spec *= vo.pos.z;
vo.col1 *= vo.pos.z;
vo.spec1 *= vo.pos.z;
#endif
vo.uv = float4(vin.uv * vo.pos.z, 0.f, vo.pos.z);
vo.uv1 = vin.uv1 * vo.pos.z;
vo.uv.xyz = float3(vin.uv, 0.f);
vo.uv1 = vin.uv1;
vo.index = uint(polyNumber) + vin.vertexId;
#if DIV_POS_Z == 1
vo.uv.w = vo.pos.w;
#else
vo.uv.xy *= vo.pos.z;
vo.uv.w = vo.pos.z;
vo.uv1 *= vo.pos.z;
vo.pos.w = 1.f;
vo.pos.z = 0.f;
#endif
return vo;
}
@ -131,7 +140,11 @@ struct Pixel {
float getFragDepth(float z)
{
#if DIV_POS_Z == 1
float w = 100000.0 / z;
#else
float w = 100000.0 * z;
#endif
return log2(1.0 + w) / 34.0;
}
@ -329,7 +342,13 @@ cbuffer polyConstantBuffer : register(b1)
float fog_mode2(float w)
{
float z = clamp(w * fogDensity, 1.0f, 255.9999f);
float z = clamp(
#if DIV_POS_Z == 1
fogDensity / w
#else
fogDensity * w
#endif
, 1.0f, 255.9999f);
float exp = floor(log2(z));
float m = z * 16.0f / pow(2.0, exp) - 16.0f;
float idx = floor(m) + exp * 16.0f + 0.5f;
@ -404,7 +423,7 @@ PSO main(in VertexIn inpix)
}
#endif
#endif
#if pp_Gouraud == 1
#if pp_Gouraud == 1 && DIV_POS_Z != 1
color /= inpix.uv.w;
specular /= inpix.uv.w;
#endif
@ -421,10 +440,14 @@ PSO main(in VertexIn inpix)
float2 uv;
#if pp_TwoVolumes == 1
if (area1)
uv = inpix.uv1 / inpix.uv.w;
uv = inpix.uv1;
else
#endif
uv = inpix.uv.xy / inpix.uv.w;
uv = inpix.uv.xy;
#if DIV_POS_Z != 1
uv /= inpix.uv.w;
#endif
#if NearestWrapFix == 1
uv = min(fmod(uv, 1.f), 0.9997f);
#endif
@ -564,7 +587,7 @@ PSO main(in VertexIn inpix)
Pixel pixel;
pixel.color = packColors(clamp(color, 0.f, 1.f));
pixel.depth = inpix.uv.w;
pixel.depth = pso.z;
pixel.seq_num = inpix.index;
InterlockedExchange(abufferPointers[coords], idx, pixel.next);
Pixels[idx] = pixel;
@ -829,18 +852,26 @@ struct IncludeManager : public ID3DInclude
const char * const MacroValues[] { "0", "1", "2", "3" };
enum VertexMacroEnum {
MacroGouraud,
MacroTwoVolumes,
MacroDivPosZ,
MacroPositionOnly,
MacroLightOn,
};
static D3D_SHADER_MACRO VertexMacros[]
{
{ "pp_Gouraud", "1" },
{ "POSITION_ONLY", "0" },
{ "pp_TwoVolumes", "0" },
{ "DIV_POS_Z", "0" },
{ "POSITION_ONLY", "0" },
{ "LIGHT_ON", "1" },
{ nullptr, nullptr }
};
enum PixelMacroEnum {
MacroGouraud,
MacroTexture,
MacroTexture = 3,
MacroUseAlpha,
MacroIgnoreTexA,
MacroShadInstr,
@ -852,13 +883,14 @@ enum PixelMacroEnum {
MacroAlphaTest,
MacroClipInside,
MacroNearestWrapFix,
MacroTwoVolumes,
MacroPass
};
static D3D_SHADER_MACRO PixelMacros[]
{
{ "pp_Gouraud", "1" },
{ "pp_TwoVolumes", "0" },
{ "DIV_POS_Z", "0" },
{ "pp_Texture", "0" },
{ "pp_UseAlpha", "0" },
{ "pp_IgnoreTexA", "0" },
@ -871,7 +903,6 @@ static D3D_SHADER_MACRO PixelMacros[]
{ "cp_AlphaTest", "0" },
{ "pp_ClipInside", "0" },
{ "NearestWrapFix", "0" },
{ "pp_TwoVolumes", "0" },
{ "PASS", "0" },
{ nullptr, nullptr }
};
@ -880,6 +911,7 @@ const ComPtr<ID3D11PixelShader>& DX11OITShaders::getShader(bool pp_Texture, bool
bool pp_Offset, u32 pp_FogCtrl, bool pp_BumpMap, bool fog_clamping,
bool palette, bool gouraud, bool alphaTest, bool clipInside, bool nearestWrapFix, bool twoVolumes, Pass pass)
{
bool divPosZ = !settings.platform.isNaomi2() && config::NativeDepthInterpolation;
const u32 hash = (int)pp_Texture
| (pp_UseAlpha << 1)
| (pp_IgnoreTexA << 2)
@ -894,7 +926,8 @@ const ComPtr<ID3D11PixelShader>& DX11OITShaders::getShader(bool pp_Texture, bool
| (clipInside << 13)
| (nearestWrapFix << 14)
| (twoVolumes << 15)
| (pass << 16);
| (pass << 16)
| (divPosZ << 18);
auto& shader = shaders[hash];
if (shader == nullptr)
{
@ -915,6 +948,7 @@ const ComPtr<ID3D11PixelShader>& DX11OITShaders::getShader(bool pp_Texture, bool
PixelMacros[MacroClipInside].Definition = MacroValues[clipInside];
PixelMacros[MacroNearestWrapFix].Definition = MacroValues[nearestWrapFix];
PixelMacros[MacroTwoVolumes].Definition = MacroValues[twoVolumes];
PixelMacros[MacroDivPosZ].Definition = MacroValues[divPosZ];
PixelMacros[MacroPass].Definition = MacroValues[pass];
shader = compilePS(PixelShader, "main", PixelMacros);
@ -925,24 +959,28 @@ const ComPtr<ID3D11PixelShader>& DX11OITShaders::getShader(bool pp_Texture, bool
const ComPtr<ID3D11VertexShader>& DX11OITShaders::getVertexShader(bool gouraud, bool naomi2, bool positionOnly, bool lightOn, bool twoVolumes)
{
bool divPosZ = !settings.platform.isNaomi2() && config::NativeDepthInterpolation;
const u32 hash = (int)gouraud
| ((int)naomi2 << 1)
| ((int)positionOnly << 2)
| ((int)lightOn << 3)
| ((int)twoVolumes << 4);
| ((int)twoVolumes << 4)
| ((int)divPosZ << 5);
auto& shader = vertexShaders[hash];
if (shader == nullptr)
{
VertexMacros[0].Definition = MacroValues[gouraud];
VertexMacros[MacroGouraud].Definition = MacroValues[gouraud];
if (!naomi2)
{
VertexMacros[MacroDivPosZ].Definition = MacroValues[divPosZ];
shader = compileVS(VertexShader, "main", VertexMacros);
}
else
{
VertexMacros[1].Definition = MacroValues[positionOnly];
VertexMacros[2].Definition = MacroValues[twoVolumes];
VertexMacros[3].Definition = MacroValues[lightOn];
VertexMacros[MacroDivPosZ].Definition = MacroValues[false];
VertexMacros[MacroPositionOnly].Definition = MacroValues[positionOnly];
VertexMacros[MacroTwoVolumes].Definition = MacroValues[twoVolumes];
VertexMacros[MacroLightOn].Definition = MacroValues[lightOn];
std::string source(DX11N2VertexShader);
if (!positionOnly && lightOn)
source += std::string("\n") + DX11N2ColorShader;
@ -955,27 +993,37 @@ const ComPtr<ID3D11VertexShader>& DX11OITShaders::getVertexShader(bool gouraud,
const ComPtr<ID3D11VertexShader>& DX11OITShaders::getMVVertexShader(bool naomi2)
{
if (!modVolVertexShaders[naomi2])
bool divPosZ = !settings.platform.isNaomi2() && config::NativeDepthInterpolation;
auto& mvVertexShader = modVolVertexShaders[(int)naomi2 | ((int)divPosZ << 1)];
if (!mvVertexShader)
{
if (!naomi2)
modVolVertexShaders[0] = compileVS(ModVolVertexShader, "main", nullptr);
{
VertexMacros[MacroDivPosZ].Definition = MacroValues[divPosZ];
mvVertexShader = compileVS(ModVolVertexShader, "main", VertexMacros);
}
else
{
VertexMacros[0].Definition = MacroValues[false];
VertexMacros[1].Definition = MacroValues[true];
VertexMacros[2].Definition = MacroValues[false];
VertexMacros[3].Definition = MacroValues[false];
modVolVertexShaders[1] = compileVS(DX11N2VertexShader, "main", VertexMacros);
VertexMacros[MacroGouraud].Definition = MacroValues[false];
VertexMacros[MacroPositionOnly].Definition = MacroValues[true];
VertexMacros[MacroTwoVolumes].Definition = MacroValues[false];
VertexMacros[MacroLightOn].Definition = MacroValues[false];
mvVertexShader = compileVS(DX11N2VertexShader, "main", VertexMacros);
}
}
return modVolVertexShaders[naomi2];
return mvVertexShader;
}
const ComPtr<ID3D11PixelShader>& DX11OITShaders::getModVolShader()
{
bool divPosZ = !settings.platform.isNaomi2() && config::NativeDepthInterpolation;
auto& modVolShader = modVolShaders[divPosZ];
if (!modVolShader)
{
PixelMacros[MacroDivPosZ].Definition = MacroValues[divPosZ];
modVolShader = compilePS(PixelShader, "modifierVolume", PixelMacros);
}
return modVolShader;
}
@ -1082,10 +1130,10 @@ ComPtr<ID3D11PixelShader> DX11OITShaders::compilePS(const char* source, const ch
ComPtr<ID3DBlob> DX11OITShaders::getVertexShaderBlob()
{
VertexMacros[0].Definition = MacroValues[true];
VertexMacros[MacroGouraud].Definition = MacroValues[true];
// FIXME code dup
VertexMacros[1].Definition = MacroValues[false];
VertexMacros[2].Definition = MacroValues[true];
VertexMacros[MacroPositionOnly].Definition = MacroValues[false];
VertexMacros[MacroTwoVolumes].Definition = MacroValues[true];
std::string source(DX11N2VertexShader);
source += std::string("\n") + DX11N2ColorShader;
return compileShader(source.c_str(), "main", "vs_5_0", VertexMacros);
@ -1094,9 +1142,9 @@ ComPtr<ID3DBlob> DX11OITShaders::getVertexShaderBlob()
ComPtr<ID3DBlob> DX11OITShaders::getMVVertexShaderBlob()
{
// FIXME code dup
VertexMacros[0].Definition = MacroValues[false];
VertexMacros[1].Definition = MacroValues[true];
VertexMacros[2].Definition = MacroValues[false];
VertexMacros[MacroGouraud].Definition = MacroValues[false];
VertexMacros[MacroPositionOnly].Definition = MacroValues[true];
VertexMacros[MacroTwoVolumes].Definition = MacroValues[false];
return compileShader(DX11N2VertexShader, "main", "vs_5_0", VertexMacros);
}

View File

@ -35,7 +35,7 @@ public:
const ComPtr<ID3D11PixelShader>& getShader(bool pp_Texture, bool pp_UseAlpha, bool pp_IgnoreTexA, u32 pp_ShadInstr,
bool pp_Offset, u32 pp_FogCtrl, bool pp_BumpMap, bool fog_clamping,
bool palette, bool gouraud, bool alphaTest, bool clipInside, bool nearestWrapFix, bool twoVolumes, Pass pass);
const ComPtr<ID3D11VertexShader>& getVertexShader(bool gouraud, bool naomi2, bool positionOnly, bool lightOn, bool twoVolumes = true);
const ComPtr<ID3D11VertexShader>& getVertexShader(bool gouraud, bool naomi2, bool positionOnly, bool lightOn, bool twoVolumes = false);
const ComPtr<ID3D11PixelShader>& getModVolShader();
const ComPtr<ID3D11VertexShader>& getMVVertexShader(bool naomi2);
const ComPtr<ID3D11PixelShader>& getFinalShader();
@ -47,9 +47,10 @@ public:
saveCache(CacheFile);
shaders.clear();
vertexShaders.clear();
modVolShader.reset();
for (auto& shader : modVolVertexShaders)
shader.reset();
for (auto& shader : modVolShaders)
shader.reset();
for (auto& shader : trModVolShaders)
shader.reset();
finalShader.reset();
@ -69,8 +70,8 @@ private:
ComPtr<ID3D11Device> device;
std::unordered_map<u32, ComPtr<ID3D11PixelShader>> shaders;
std::unordered_map<u32, ComPtr<ID3D11VertexShader>> vertexShaders;
ComPtr<ID3D11PixelShader> modVolShader;
ComPtr<ID3D11VertexShader> modVolVertexShaders[2];
ComPtr<ID3D11PixelShader> modVolShaders[2];
ComPtr<ID3D11VertexShader> modVolVertexShaders[4];
ComPtr<ID3D11PixelShader> trModVolShaders[4];
ComPtr<ID3D11PixelShader> finalShader;

View File

@ -17,6 +17,7 @@
along with Flycast. If not, see <https://www.gnu.org/licenses/>.
*/
#include "d3d_shaders.h"
#include "cfg/option.h"
#define SHADER_DEBUG 0 // D3DXSHADER_DEBUG|D3DXSHADER_SKIPOPTIMIZATION
@ -43,18 +44,23 @@ VertexOut main(in VertexIn vin)
{
VertexOut vo;
vo.pos = mul(transMatrix, float4(vin.pos.xyz, 1.f));
#if pp_Gouraud == 1
vo.col = vin.col * vo.pos.z;
vo.spec = vin.spec * vo.pos.z;
#else
// flat shading: no interpolation
#if DIV_POS_Z == 1
vo.pos /= vo.pos.z;
vo.pos.z = vo.pos.w;
#endif
vo.col = vin.col;
vo.spec = vin.spec;
#if pp_Gouraud == 1 && DIV_POS_Z != 1
vo.col *= vo.pos.z;
vo.spec *= vo.pos.z;
#endif
vo.uv = float4(vin.uv * vo.pos.z, 0.f, vo.pos.z);
vo.uv = float4(vin.uv, 0.f, vo.pos.z);
#if DIV_POS_Z != 1
vo.uv.xy *= vo.pos.z;
vo.pos.w = 1.f;
vo.pos.z = 0.f;
#endif
return vo;
}
@ -75,7 +81,7 @@ struct pixel
#endif
};
sampler2D samplr : register(s0);
sampler2D tex_pal : register(s1);
sampler2D fog_table : register(s2);
@ -91,7 +97,13 @@ float4 colorClampMax : register(c7);
float fog_mode2(float w)
{
float z = clamp(w * FOG_DENSITY_SCALE.x, 1.0f, 255.9999f);
float z = clamp(
#if DIV_POS_Z == 1
FOG_DENSITY_SCALE.x / w
#else
FOG_DENSITY_SCALE.x * w
#endif
, 1.0f, 255.9999f);
float exp = floor(log2(z));
float m = z * 16.0f / pow(2.0, exp) - 16.0f;
float idx = floor(m) + exp * 16.0f + 0.5f;
@ -112,7 +124,12 @@ float4 clampColor(float4 color)
float4 palettePixel(float4 coords)
{
int colorIdx = int(floor(tex2Dproj(samplr, coords).a * 255.0f + 0.5f) + paletteIndex.x);
#if DIV_POS_Z == 1
float texColIdx = tex2D(samplr, coords.xy).a;
#else
float texColIdx = tex2Dproj(samplr, coords).a;
#endif
int colorIdx = int(floor(texColIdx * 255.0f + 0.5f) + paletteIndex.x);
float2 c = float2((fmod(float(colorIdx), 32.0f) * 2.0f + 1.0f) / 64.0f, (float(colorIdx / 32) * 2.0f + 1.0f) / 64.0f);
return tex2D(tex_pal, c);
}
@ -134,17 +151,16 @@ PSO main(in pixel inpix)
discard;
#endif
#if pp_Gouraud == 1
float4 color = inpix.col / inpix.uv.w;
#if pp_BumpMap == 1 || pp_Offset == 1
float4 specular = inpix.spec / inpix.uv.w;
#endif
#else
float4 color = inpix.col;
#if pp_BumpMap == 1 || pp_Offset == 1
float4 specular = inpix.spec;
#endif
#endif
#if pp_Gouraud == 1 && DIV_POS_Z != 1
color /= inpix.uv.w;
#if pp_BumpMap == 1 || pp_Offset == 1
specular /= inpix.uv.w;
#endif
#endif
#if pp_UseAlpha == 0
color.a = 1.0f;
#endif
@ -154,7 +170,11 @@ PSO main(in pixel inpix)
#if pp_Texture == 1
{
#if pp_Palette == 0
float4 texcol = tex2Dproj(samplr, inpix.uv);
#if DIV_POS_Z == 1
float4 texcol = tex2D(samplr, inpix.uv.xy);
#else
float4 texcol = tex2Dproj(samplr, inpix.uv);
#endif
#else
float4 texcol = palettePixel(inpix.uv);
#endif
@ -204,7 +224,11 @@ PSO main(in pixel inpix)
//color.rgb = float3(inpix.uv.w * FOG_DENSITY_SCALE.x / 128.0f);
PSO pso;
float w = inpix.uv.w * 100000.0f;
#if DIV_POS_Z == 1
float w = 100000.0f / inpix.uv.w;
#else
float w = 100000.0f * inpix.uv.w;
#endif
pso.z = log2(1.0f + w) / 34.0f;
pso.col = color;
@ -214,7 +238,11 @@ PSO main(in pixel inpix)
PSO modifierVolume(float4 uv : TEXCOORD0)
{
PSO pso;
float w = uv.w * 100000.0f;
#if DIV_POS_Z == 1
float w = 100000.0f / uv.w;
#else
float w = 100000.0f * uv.w;
#endif
pso.z = log2(1.0f + w) / 34.0f;
pso.col = float4(0, 0, 0, FOG_DENSITY_SCALE.y);
@ -227,24 +255,30 @@ const char * const MacroValues[] { "0", "1", "2", "3" };
static D3DXMACRO VertexMacros[]
{
{ "pp_Gouraud", "1" },
{ "DIV_POS_Z", "0" },
{ 0, 0 }
};
constexpr u32 MacroTexture = 0;
constexpr u32 MacroOffset = 1;
constexpr u32 MacroShadInstr = 2;
constexpr u32 MacroIgnoreTexA = 3;
constexpr u32 MacroUseAlpha = 4;
constexpr u32 MacroFogCtrl = 5;
constexpr u32 MacroFogClamping = 6;
constexpr u32 MacroPalette = 7;
constexpr u32 MacroBumpMap = 8;
constexpr u32 MacroTriLinear = 9;
constexpr u32 MacroGouraud = 10;
constexpr u32 MacroClipInside = 11;
enum ShaderMacros {
MacroGouraud,
MacroDivPosZ,
MacroTexture,
MacroOffset,
MacroShadInstr,
MacroIgnoreTexA,
MacroUseAlpha,
MacroFogCtrl,
MacroFogClamping,
MacroPalette,
MacroBumpMap,
MacroTriLinear,
MacroClipInside,
};
static D3DXMACRO PixelMacros[]
{
{ "pp_Gouraud", "1" },
{ "DIV_POS_Z", "0" },
{ "pp_Texture", "0" },
{ "pp_Offset", "0" },
{ "pp_ShadInstr", "0" },
@ -255,7 +289,6 @@ static D3DXMACRO PixelMacros[]
{ "pp_Palette", "0" },
{ "pp_BumpMap", "0" },
{ "pp_TriLinear", "0" },
{ "pp_Gouraud", "1" },
{ "pp_ClipInside", "0" },
{0, 0}
};
@ -275,7 +308,8 @@ const ComPtr<IDirect3DPixelShader9>& D3DShaders::getShader(bool pp_Texture, bool
| (trilinear << 10)
| (palette << 11)
| (gouraud << 12)
| (clipInside << 13);
| (clipInside << 13)
| ((int)config::NativeDepthInterpolation << 14);
auto it = shaders.find(hash);
if (it == shaders.end())
{
@ -293,6 +327,7 @@ const ComPtr<IDirect3DPixelShader9>& D3DShaders::getShader(bool pp_Texture, bool
PixelMacros[MacroPalette].Definition = MacroValues[palette];
PixelMacros[MacroGouraud].Definition = MacroValues[gouraud];
PixelMacros[MacroClipInside].Definition = MacroValues[clipInside];
PixelMacros[MacroDivPosZ].Definition = MacroValues[config::NativeDepthInterpolation];
ComPtr<IDirect3DPixelShader9> shader = compilePS(PixelShader, "main", PixelMacros);
verify((bool )shader);
it = shaders.insert(std::make_pair(hash, shader)).first;
@ -302,10 +337,11 @@ const ComPtr<IDirect3DPixelShader9>& D3DShaders::getShader(bool pp_Texture, bool
const ComPtr<IDirect3DVertexShader9>& D3DShaders::getVertexShader(bool gouraud)
{
ComPtr<IDirect3DVertexShader9>& vertexShader = gouraud ? gouraudVertexShader : flatVertexShader;
ComPtr<IDirect3DVertexShader9>& vertexShader = vertexShaders[(int)gouraud | ((int)config::NativeDepthInterpolation << 1)];
if (!vertexShader)
{
VertexMacros[0].Definition = MacroValues[gouraud];
VertexMacros[MacroGouraud].Definition = MacroValues[gouraud];
VertexMacros[MacroDivPosZ].Definition = MacroValues[config::NativeDepthInterpolation];
vertexShader = compileVS(VertexShader, "main", VertexMacros);
}
@ -314,8 +350,12 @@ const ComPtr<IDirect3DVertexShader9>& D3DShaders::getVertexShader(bool gouraud)
const ComPtr<IDirect3DPixelShader9>& D3DShaders::getModVolShader()
{
ComPtr<IDirect3DPixelShader9>& modVolShader = modVolShaders[config::NativeDepthInterpolation];
if (!modVolShader)
{
PixelMacros[MacroDivPosZ].Definition = MacroValues[config::NativeDepthInterpolation];
modVolShader = compilePS(PixelShader, "modifierVolume", PixelMacros);
}
return modVolShader;
}

View File

@ -36,9 +36,10 @@ public:
const ComPtr<IDirect3DPixelShader9>& getModVolShader();
void term() {
shaders.clear();
gouraudVertexShader.reset();
flatVertexShader.reset();
modVolShader.reset();
for (auto& shader : vertexShaders)
shader.reset();
for (auto& shader : modVolShaders)
shader.reset();
device.reset();
}
@ -49,7 +50,6 @@ private:
ComPtr<IDirect3DDevice9> device;
std::unordered_map<u32, ComPtr<IDirect3DPixelShader9>> shaders;
ComPtr<IDirect3DVertexShader9> gouraudVertexShader;
ComPtr<IDirect3DVertexShader9> flatVertexShader;
ComPtr<IDirect3DPixelShader9> modVolShader;
ComPtr<IDirect3DVertexShader9> vertexShaders[4];
ComPtr<IDirect3DPixelShader9> modVolShaders[2];
};

View File

@ -198,7 +198,7 @@ void main(void)
)";
static const char *tr_modvol_shader_source = R"(
noperspective in vec3 vtx_uv;
in vec3 vtx_uv;
// Must match ModifierVolumeMode enum values
#define MV_XOR 0

View File

@ -100,6 +100,7 @@ struct gl4PipelineShader
bool fog_clamping;
bool palette;
bool naomi2;
bool divPosZ;
};

View File

@ -25,14 +25,14 @@ static const char *gouraudSource = R"(
#if pp_Gouraud == 0
#define INTERPOLATION flat
#else
#define INTERPOLATION noperspective
#define INTERPOLATION
#endif
#define NOPERSPECTIVE noperspective
)";
N2Vertex4Source::N2Vertex4Source(const gl4PipelineShader* shader) : OpenGl4Source()
{
addConstant("OIT_RENDER");
addConstant("DIV_POS_Z", false);
if (shader == nullptr)
{
addConstant("POSITION_ONLY", 1);

View File

@ -54,6 +54,7 @@ static gl4PipelineShader *gl4GetProgram(bool cp_AlphaTest, bool pp_InsideClippin
rv <<= 1; rv |= (int)palette;
rv <<= 1; rv |= (int)naomi2;
rv <<= 2; rv |= (int)pass;
rv <<= 1; rv |= (int)(!settings.platform.isNaomi2() && config::NativeDepthInterpolation);
gl4PipelineShader *shader = &gl4.shaders[rv];
if (shader->program == 0)
@ -73,6 +74,7 @@ static gl4PipelineShader *gl4GetProgram(bool cp_AlphaTest, bool pp_InsideClippin
shader->palette = palette;
shader->naomi2 = naomi2;
shader->pass = pass;
shader->divPosZ = !settings.platform.isNaomi2() && config::NativeDepthInterpolation;
gl4CompilePipelineShader(shader);
}

View File

@ -55,7 +55,7 @@ static const char* VertexShaderSource = R"(
#if pp_Gouraud == 0
#define INTERPOLATION flat
#else
#define INTERPOLATION noperspective
#define INTERPOLATION
#endif
// Uniforms
@ -74,31 +74,39 @@ in vec2 in_uv1;
// Output
INTERPOLATION out vec4 vtx_base;
INTERPOLATION out vec4 vtx_offs;
noperspective out vec3 vtx_uv;
out vec3 vtx_uv;
INTERPOLATION out vec4 vtx_base1;
INTERPOLATION out vec4 vtx_offs1;
noperspective out vec2 vtx_uv1;
out vec2 vtx_uv1;
flat out uint vtx_index;
void main()
{
vec4 vpos = ndcMat * in_pos;
#if DIV_POS_Z == 1
vpos /= vpos.z;
vpos.z = vpos.w;
#endif
vtx_base = in_base;
vtx_offs = in_offs;
vtx_uv = vec3(in_uv * vpos.z, vpos.z);
vtx_uv = vec3(in_uv, vpos.z);
vtx_base1 = in_base1;
vtx_offs1 = in_offs1;
vtx_uv1 = in_uv1 * vpos.z;
vtx_uv1 = in_uv1;
vtx_index = uint(pp_Number) + uint(gl_VertexID);
#if pp_Gouraud == 1
vtx_base *= vpos.z;
vtx_offs *= vpos.z;
vtx_base1 *= vpos.z;
vtx_offs1 *= vpos.z;
#endif
#if pp_Gouraud == 1 && DIV_POS_Z != 1
vtx_base *= vpos.z;
vtx_offs *= vpos.z;
vtx_base1 *= vpos.z;
vtx_offs1 *= vpos.z;
#endif
vpos.w = 1.0;
vpos.z = 0.0;
#if DIV_POS_Z != 1
vtx_uv.xy *= vpos.z;
vtx_uv1 *= vpos.z;
vpos.w = 1.0;
vpos.z = 0.0;
#endif
gl_Position = vpos;
}
)";
@ -123,7 +131,7 @@ out vec4 FragColor;
#if pp_Gouraud == 0
#define INTERPOLATION flat
#else
#define INTERPOLATION noperspective
#define INTERPOLATION
#endif
// Uniforms
@ -155,15 +163,21 @@ uniform int fog_control[2];
// Input
INTERPOLATION in vec4 vtx_base;
INTERPOLATION in vec4 vtx_offs;
noperspective in vec3 vtx_uv;
in vec3 vtx_uv;
INTERPOLATION in vec4 vtx_base1;
INTERPOLATION in vec4 vtx_offs1;
noperspective in vec2 vtx_uv1;
in vec2 vtx_uv1;
flat in uint vtx_index;
float fog_mode2(float w)
{
float z = clamp(w * sp_FOG_DENSITY, 1.0, 255.9999);
float z = clamp(
#if DIV_POS_Z == 1
sp_FOG_DENSITY / w
#else
sp_FOG_DENSITY * w
#endif
, 1.0, 255.9999);
float exp = floor(log2(z));
float m = z * 16.0 / pow(2.0, exp) - 16.0;
float idx = floor(m) + exp * 16.0 + 0.5;
@ -184,7 +198,12 @@ vec4 fog_clamp(vec4 col)
vec4 palettePixel(sampler2D tex, vec3 coords)
{
int color_idx = int(floor(textureProj(tex, coords).r * 255.0 + 0.5)) + palette_index;
#if DIV_POS_Z == 1
float colIdx = texture(tex, coords.xy).r;
#else
float colIdx = textureProj(tex, coords).r;
#endif
int color_idx = int(floor(colIdx * 255.0 + 0.5)) + palette_index;
ivec2 c = ivec2(color_idx % 32, color_idx / 32);
return texelFetch(palette, c, 0);
}
@ -233,7 +252,7 @@ void main()
}
#endif
#endif
#if pp_Gouraud == 1
#if pp_Gouraud == 1 && DIV_POS_Z != 1
color /= vtx_uv.z;
offset /= vtx_uv.z;
#endif
@ -250,10 +269,17 @@ void main()
{
vec4 texcol;
#if pp_Palette == 0
if (area1)
texcol = textureProj(tex1, vec3(vtx_uv1.xy, vtx_uv.z));
else
texcol = textureProj(tex0, vtx_uv);
#if DIV_POS_Z == 1
if (area1)
texcol = texture(tex1, vtx_uv1);
else
texcol = texture(tex0, vtx_uv.xy);
#else
if (area1)
texcol = textureProj(tex1, vec3(vtx_uv1.xy, vtx_uv.z));
else
texcol = textureProj(tex0, vtx_uv);
#endif
#else
if (area1)
texcol = palettePixel(tex1, vec3(vtx_uv1.xy, vtx_uv.z));
@ -415,7 +441,7 @@ void main()
)";
static const char* ModifierVolumeShader = R"(
noperspective in vec3 vtx_uv;
in vec3 vtx_uv;
void main()
{
@ -426,8 +452,9 @@ void main()
class Vertex4Source : public OpenGl4Source
{
public:
Vertex4Source(bool gouraud) : OpenGl4Source() {
Vertex4Source(bool gouraud, bool divPosZ) : OpenGl4Source() {
addConstant("pp_Gouraud", gouraud);
addConstant("DIV_POS_Z", divPosZ);
addSource(VertexShaderSource);
}
@ -453,6 +480,7 @@ public:
addConstant("pp_Palette", s->palette);
addConstant("NOUVEAU", gl.mesa_nouveau);
addConstant("PASS", (int)s->pass);
addConstant("DIV_POS_Z", s->divPosZ);
addSource(ShaderHeader);
addSource(gl4PixelPipelineShader);
@ -472,7 +500,7 @@ bool gl4CompilePipelineShader(gl4PipelineShader* s, const char *fragment_source
if (s->naomi2)
vertexSource = N2Vertex4Source(s).generate();
else
vertexSource = Vertex4Source(s->pp_Gouraud).generate();
vertexSource = Vertex4Source(s->pp_Gouraud, s->divPosZ).generate();
Fragment4ShaderSource fragmentSource(s);
s->program = gl_CompileAndLink(vertex_source != nullptr ? vertex_source : vertexSource.c_str(),
@ -588,15 +616,17 @@ static void create_modvol_shader()
{
if (gl4.modvol_shader.program != 0)
return;
Vertex4Source vertexShader(false);
Vertex4Source vertexShader(false, config::NativeDepthInterpolation);
OpenGl4Source fragmentShader;
fragmentShader.addSource(ShaderHeader)
fragmentShader.addConstant("DIV_POS_Z", config::NativeDepthInterpolation)
.addSource(ShaderHeader)
.addSource(ModifierVolumeShader);
gl4.modvol_shader.program = gl_CompileAndLink(vertexShader.generate().c_str(), fragmentShader.generate().c_str());
gl4.modvol_shader.ndcMat = glGetUniformLocation(gl4.modvol_shader.program, "ndcMat");
N2Vertex4Source n2VertexShader;
fragmentShader.setConstant("DIV_POS_Z", false);
gl4.n2ModVolShader.program = gl_CompileAndLink(n2VertexShader.generate().c_str(), fragmentShader.generate().c_str());
gl4.n2ModVolShader.ndcMat = glGetUniformLocation(gl4.n2ModVolShader.program, "ndcMat");
gl4.n2ModVolShader.mvMat = glGetUniformLocation(gl4.n2ModVolShader.program, "mvMat");
@ -626,8 +656,6 @@ static bool gl_create_resources()
gl4SetupModvolVBO();
}
create_modvol_shader();
initQuad();
glCheck();
@ -757,8 +785,9 @@ static bool RenderFrame(int width, int height)
pvrrc.fog_clamp_min.getRGBAColor(gl4ShaderUniforms.fog_clamp_min);
pvrrc.fog_clamp_max.getRGBAColor(gl4ShaderUniforms.fog_clamp_max);
if (config::Fog)
if (config::ModifierVolumes)
{
create_modvol_shader();
glcache.UseProgram(gl4.modvol_shader.program);
glUniformMatrix4fv(gl4.modvol_shader.ndcMat, 1, GL_FALSE, &gl4ShaderUniforms.ndcMat[0][0]);

View File

@ -40,7 +40,11 @@ struct Pixel { \n\
\n\
void setFragDepth(float z) \n\
{ \n\
#if DIV_POS_Z == 1 \n\
float w = 100000.0 / z; \n\
#else \n\
float w = 100000.0 * z; \n\
#endif \n\
gl_FragDepth = log2(1.0 + w) / 34.0; \n\
} \n\
\n\

View File

@ -69,22 +69,9 @@ const char *PixelCompatShader = R"(
)";
const char* GouraudSource = R"(
#if TARGET_GL == GL3
#define NOPERSPECTIVE noperspective
#if pp_Gouraud == 0
#define INTERPOLATION flat
#else
#define INTERPOLATION noperspective
#endif
#elif TARGET_GL == GLES3
#define NOPERSPECTIVE
#if pp_Gouraud == 0
#define INTERPOLATION flat
#else
#define INTERPOLATION
#endif
#if (TARGET_GL == GL3 || TARGET_GL == GLES3) && pp_Gouraud == 0
#define INTERPOLATION flat
#else
#define NOPERSPECTIVE
#define INTERPOLATION
#endif
)";
@ -103,7 +90,7 @@ in highp vec2 in_uv;
/* output */
INTERPOLATION out highp vec4 vtx_base;
INTERPOLATION out highp vec4 vtx_offs;
NOPERSPECTIVE out highp vec3 vtx_uv;
out highp vec3 vtx_uv;
void main()
{
@ -116,13 +103,20 @@ void main()
vpos.z = depth_scale.x + depth_scale.y * vpos.w;
vpos.xy *= vpos.w;
#else
#if pp_Gouraud == 1
vtx_base *= vpos.z;
vtx_offs *= vpos.z;
#endif
vtx_uv = vec3(in_uv * vpos.z, vpos.z);
vpos.w = 1.0;
vpos.z = 0.0;
#if DIV_POS_Z == 1
vpos /= vpos.z;
vpos.z = vpos.w;
#endif
#if pp_Gouraud == 1 && DIV_POS_Z != 1
vtx_base *= vpos.z;
vtx_offs *= vpos.z;
#endif
vtx_uv = vec3(in_uv, vpos.z);
#if DIV_POS_Z != 1
vtx_uv.xy *= vpos.z;
vpos.w = 1.0;
vpos.z = 0.0;
#endif
#endif
gl_Position = vpos;
}
@ -149,15 +143,19 @@ uniform mediump int palette_index;
/* Vertex input*/
INTERPOLATION in highp vec4 vtx_base;
INTERPOLATION in highp vec4 vtx_offs;
NOPERSPECTIVE in highp vec3 vtx_uv;
in highp vec3 vtx_uv;
lowp float fog_mode2(highp float w)
{
highp float z = clamp(
#if TARGET_GL == GLES2
highp float z = clamp(vtx_uv.z, 1.0, 255.9999);
vtx_uv.z
#elif DIV_POS_Z == 1
sp_FOG_DENSITY / w
#else
highp float z = clamp(w * sp_FOG_DENSITY, 1.0, 255.9999);
sp_FOG_DENSITY * w
#endif
, 1.0, 255.9999);
mediump float exp = floor(log2(z));
highp float m = z * 16.0 / pow(2.0, exp) - 16.0;
mediump float idx = floor(m) + exp * 16.0 + 0.5;
@ -178,7 +176,7 @@ highp vec4 fog_clamp(lowp vec4 col)
lowp vec4 palettePixel(highp vec3 coords)
{
#if TARGET_GL == GLES2 || TARGET_GL == GL2
#if TARGET_GL == GLES2 || TARGET_GL == GL2 || DIV_POS_Z == 1
highp int color_idx = int(floor(texture(tex, coords.xy).FOG_CHANNEL * 255.0 + 0.5)) + palette_index;
highp vec2 c = vec2((mod(float(color_idx), 32.0) * 2.0 + 1.0) / 64.0, (float(color_idx / 32) * 2.0 + 1.0) / 64.0);
return texture(palette, c);
@ -208,7 +206,7 @@ void main()
highp vec4 color = vtx_base;
highp vec4 offset = vtx_offs;
#if pp_Gouraud == 1 && TARGET_GL != GLES2
#if pp_Gouraud == 1 && TARGET_GL != GLES2 && DIV_POS_Z != 1
color /= vtx_uv.z;
offset /= vtx_uv.z;
#endif
@ -221,7 +219,7 @@ void main()
#if pp_Texture==1
{
#if pp_Palette == 0
#if TARGET_GL == GLES2 || TARGET_GL == GL2
#if TARGET_GL == GLES2 || TARGET_GL == GL2 || DIV_POS_Z == 1
lowp vec4 texcol = texture(tex, vtx_uv.xy);
#else
lowp vec4 texcol = textureProj(tex, vtx_uv);
@ -289,7 +287,11 @@ void main()
//color.rgb = vec3(vtx_uv.z * sp_FOG_DENSITY / 128.0);
#if TARGET_GL != GLES2
highp float w = vtx_uv.z * 100000.0;
#if DIV_POS_Z == 1
highp float w = 100000.0 / vtx_uv.z;
#else
highp float w = 100000.0 * vtx_uv.z;
#endif
gl_FragDepth = log2(1.0 + w) / 34.0;
#endif
gl_FragColor = color;
@ -300,12 +302,16 @@ static const char* ModifierVolumeShader = R"(
uniform lowp float sp_ShaderColor;
/* Vertex input*/
NOPERSPECTIVE in highp vec3 vtx_uv;
in highp vec3 vtx_uv;
void main()
{
#if TARGET_GL != GLES2
highp float w = vtx_uv.z * 100000.0;
#if DIV_POS_Z == 1
highp float w = 100000.0 / vtx_uv.z;
#else
highp float w = 100000.0 * vtx_uv.z;
#endif
gl_FragDepth = log2(1.0 + w) / 34.0;
#endif
gl_FragColor=vec4(0.0, 0.0, 0.0, sp_ShaderColor);
@ -635,19 +641,20 @@ PipelineShader *GetProgram(bool cp_AlphaTest, bool pp_InsideClipping,
u32 rv=0;
rv |= pp_InsideClipping;
rv<<=1; rv|=cp_AlphaTest;
rv<<=1; rv|=pp_Texture;
rv<<=1; rv|=pp_UseAlpha;
rv<<=1; rv|=pp_IgnoreTexA;
rv<<=2; rv|=pp_ShadInstr;
rv<<=1; rv|=pp_Offset;
rv<<=2; rv|=pp_FogCtrl;
rv<<=1; rv|=pp_Gouraud;
rv<<=1; rv|=pp_BumpMap;
rv<<=1; rv|=fog_clamping;
rv<<=1; rv|=trilinear;
rv<<=1; rv|=palette;
rv<<=1; rv|=naomi2;
rv <<= 1; rv |= cp_AlphaTest;
rv <<= 1; rv |= pp_Texture;
rv <<= 1; rv |= pp_UseAlpha;
rv <<= 1; rv |= pp_IgnoreTexA;
rv <<= 2; rv |= pp_ShadInstr;
rv <<= 1; rv |= pp_Offset;
rv <<= 2; rv |= pp_FogCtrl;
rv <<= 1; rv |= pp_Gouraud;
rv <<= 1; rv |= pp_BumpMap;
rv <<= 1; rv |= fog_clamping;
rv <<= 1; rv |= trilinear;
rv <<= 1; rv |= palette;
rv <<= 1; rv |= naomi2;
rv <<= 1, rv |= !settings.platform.isNaomi2() && config::NativeDepthInterpolation;
PipelineShader *shader = &gl.shaders[rv];
if (shader->program == 0)
@ -666,6 +673,7 @@ PipelineShader *GetProgram(bool cp_AlphaTest, bool pp_InsideClipping,
shader->trilinear = trilinear;
shader->palette = palette;
shader->naomi2 = naomi2;
shader->divPosZ = !settings.platform.isNaomi2() && config::NativeDepthInterpolation;
CompilePipelineShader(shader);
}
@ -675,8 +683,9 @@ PipelineShader *GetProgram(bool cp_AlphaTest, bool pp_InsideClipping,
class VertexSource : public OpenGlSource
{
public:
VertexSource(bool gouraud) : OpenGlSource() {
VertexSource(bool gouraud, bool divPosZ) : OpenGlSource() {
addConstant("pp_Gouraud", gouraud);
addConstant("DIV_POS_Z", divPosZ);
addSource(VertexCompatShader);
addSource(GouraudSource);
@ -702,6 +711,7 @@ public:
addConstant("FogClamping", s->fog_clamping);
addConstant("pp_TriLinear", s->trilinear);
addConstant("pp_Palette", s->palette);
addConstant("DIV_POS_Z", s->divPosZ);
addSource(PixelCompatShader);
addSource(GouraudSource);
@ -715,7 +725,7 @@ bool CompilePipelineShader(PipelineShader* s)
if (s->naomi2)
vertexShader = N2VertexSource(s->pp_Gouraud, false, s->pp_Texture).generate();
else
vertexShader = VertexSource(s->pp_Gouraud).generate();
vertexShader = VertexSource(s->pp_Gouraud, s->divPosZ).generate();
FragmentShaderSource fragmentSource(s);
s->program = gl_CompileAndLink(vertexShader.c_str(), fragmentSource.generate().c_str());
@ -860,10 +870,11 @@ static void create_modvol_shader()
{
if (gl.modvol_shader.program != 0)
return;
VertexSource vertexShader(false);
VertexSource vertexShader(false, config::NativeDepthInterpolation);
OpenGlSource fragmentShader;
fragmentShader.addConstant("pp_Gouraud", 0)
.addConstant("DIV_POS_Z", config::NativeDepthInterpolation)
.addSource(PixelCompatShader)
.addSource(GouraudSource)
.addSource(ModifierVolumeShader);
@ -876,6 +887,7 @@ static void create_modvol_shader()
if (gl.gl_major >= 3)
{
N2VertexSource n2vertexShader(false, true, false);
fragmentShader.setConstant("DIV_POS_Z", false);
gl.n2ModVolShader.program = gl_CompileAndLink(n2vertexShader.generate().c_str(), fragmentShader.generate().c_str());
gl.n2ModVolShader.ndcMat = glGetUniformLocation(gl.n2ModVolShader.program, "ndcMat");
gl.n2ModVolShader.sp_ShaderColor = glGetUniformLocation(gl.n2ModVolShader.program, "sp_ShaderColor");
@ -885,7 +897,7 @@ static void create_modvol_shader()
}
}
bool gl_create_resources()
static bool gl_create_resources()
{
if (gl.vbo.geometry != nullptr)
// Assume the resources have already been created
@ -903,7 +915,6 @@ bool gl_create_resources()
gl.vbo.idxs = std::unique_ptr<GlBuffer>(new GlBuffer(GL_ELEMENT_ARRAY_BUFFER));
gl.vbo.idxs2 = std::unique_ptr<GlBuffer>(new GlBuffer(GL_ELEMENT_ARRAY_BUFFER));
create_modvol_shader();
initQuad();
return true;
@ -911,8 +922,6 @@ bool gl_create_resources()
GLuint gl_CompileShader(const char* shader,GLuint type);
bool gl_create_resources();
//setup
#ifndef __APPLE__
@ -1200,17 +1209,21 @@ bool RenderFrame(int width, int height)
pvrrc.fog_clamp_min.getRGBAColor(ShaderUniforms.fog_clamp_min);
pvrrc.fog_clamp_max.getRGBAColor(ShaderUniforms.fog_clamp_max);
glcache.UseProgram(gl.modvol_shader.program);
if (gl.modvol_shader.depth_scale != -1)
glUniform4fv(gl.modvol_shader.depth_scale, 1, ShaderUniforms.depth_coefs);
glUniformMatrix4fv(gl.modvol_shader.ndcMat, 1, GL_FALSE, &ShaderUniforms.ndcMat[0][0]);
glUniform1f(gl.modvol_shader.sp_ShaderColor, 1 - FPU_SHAD_SCALE.scale_factor / 256.f);
if (config::ModifierVolumes)
{
create_modvol_shader();
glcache.UseProgram(gl.modvol_shader.program);
if (gl.modvol_shader.depth_scale != -1)
glUniform4fv(gl.modvol_shader.depth_scale, 1, ShaderUniforms.depth_coefs);
glUniformMatrix4fv(gl.modvol_shader.ndcMat, 1, GL_FALSE, &ShaderUniforms.ndcMat[0][0]);
glUniform1f(gl.modvol_shader.sp_ShaderColor, 1 - FPU_SHAD_SCALE.scale_factor / 256.f);
glcache.UseProgram(gl.n2ModVolShader.program);
if (gl.n2ModVolShader.depth_scale != -1)
glUniform4fv(gl.n2ModVolShader.depth_scale, 1, ShaderUniforms.depth_coefs);
glUniformMatrix4fv(gl.n2ModVolShader.ndcMat, 1, GL_FALSE, &ShaderUniforms.ndcMat[0][0]);
glUniform1f(gl.n2ModVolShader.sp_ShaderColor, 1 - FPU_SHAD_SCALE.scale_factor / 256.f);
glcache.UseProgram(gl.n2ModVolShader.program);
if (gl.n2ModVolShader.depth_scale != -1)
glUniform4fv(gl.n2ModVolShader.depth_scale, 1, ShaderUniforms.depth_coefs);
glUniformMatrix4fv(gl.n2ModVolShader.ndcMat, 1, GL_FALSE, &ShaderUniforms.ndcMat[0][0]);
glUniform1f(gl.n2ModVolShader.sp_ShaderColor, 1 - FPU_SHAD_SCALE.scale_factor / 256.f);
}
ShaderUniforms.PT_ALPHA=(PT_ALPHA_REF&0xFF)/255.0f;

View File

@ -110,6 +110,7 @@ struct PipelineShader
bool trilinear;
bool palette;
bool naomi2;
bool divPosZ;
};
class GlBuffer

View File

@ -46,10 +46,10 @@ INTERPOLATION out highp vec4 vtx_offs;
#if pp_TwoVolumes == 1
INTERPOLATION out vec4 vtx_base1;
INTERPOLATION out vec4 vtx_offs1;
noperspective out vec2 vtx_uv1;
out vec2 vtx_uv1;
#endif
#endif
NOPERSPECTIVE out highp vec3 vtx_uv;
out highp vec3 vtx_uv;
#ifdef OIT_RENDER
flat out uint vtx_index;
#endif

View File

@ -1731,25 +1731,30 @@ static void gui_display_settings()
#ifndef TARGET_IPHONE
OptionCheckbox("VSync", config::VSync, "Synchronizes the frame rate with the screen refresh rate. Recommended");
ImGui::Indent();
if (!config::VSync || !isVulkan(config::RendererType))
if (isVulkan(config::RendererType))
{
ImGui::PushItemFlag(ImGuiItemFlags_Disabled, true);
ImGui::PushStyleVar(ImGuiStyleVar_Alpha, ImGui::GetStyle().Alpha * 0.5f);
ImGui::Indent();
if (!config::VSync)
{
ImGui::PushItemFlag(ImGuiItemFlags_Disabled, true);
ImGui::PushStyleVar(ImGuiStyleVar_Alpha, ImGui::GetStyle().Alpha * 0.5f);
}
OptionCheckbox("Duplicate frames", config::DupeFrames, "Duplicate frames on high refresh rate monitors (120 Hz and higher)");
if (!config::VSync)
{
ImGui::PopItemFlag();
ImGui::PopStyleVar();
}
ImGui::Unindent();
}
OptionCheckbox("Duplicate frames", config::DupeFrames, "Duplicate frames on high refresh rate monitors (120 Hz and higher)");
if (!config::VSync || !isVulkan(config::RendererType))
{
ImGui::PopItemFlag();
ImGui::PopStyleVar();
}
ImGui::Unindent();
#endif
OptionCheckbox("Show FPS Counter", config::ShowFPS, "Show on-screen frame/sec counter");
OptionCheckbox("Show VMU In-game", config::FloatVMUs, "Show the VMU LCD screens while in-game");
OptionCheckbox("Rotate Screen 90°", config::Rotate90, "Rotate the screen 90° counterclockwise");
OptionCheckbox("Delay Frame Swapping", config::DelayFrameSwapping,
"Useful to avoid flashing screen or glitchy videos. Not recommended on slow platforms");
OptionCheckbox("Native Depth Interpolation", config::NativeDepthInterpolation,
"Helps with texture corruption and depth issues on AMD GPUs. Can also help Intel GPUs in some cases.");
constexpr int apiCount = 0
#ifdef USE_VULKAN
+ 1

View File

@ -140,8 +140,9 @@ void OITPipelineManager::CreatePipeline(u32 listType, bool autosort, const PolyP
vk::PipelineDynamicStateCreateInfo pipelineDynamicStateCreateInfo(vk::PipelineDynamicStateCreateFlags(), 2, dynamicStates);
bool twoVolume = pp.tsp1.full != (u32)-1 || pp.tcw1.full != (u32)-1;
bool divPosZ = !settings.platform.isNaomi2() && config::NativeDepthInterpolation;
vk::ShaderModule vertex_module = shaderManager->GetVertexShader(
OITShaderManager::VertexShaderParams{ pp.pcw.Gouraud == 1, pp.isNaomi2(), pass != Pass::Depth, twoVolume, pp.pcw.Texture == 1 });
OITShaderManager::VertexShaderParams{ pp.pcw.Gouraud == 1, pp.isNaomi2(), pass != Pass::Depth, twoVolume, pp.pcw.Texture == 1, divPosZ });
OITShaderManager::FragmentShaderParams params = {};
params.alphaTest = listType == ListType_Punch_Through;
params.bumpmap = pp.tcw.PixelFmt == PixelBumpMap;
@ -157,6 +158,7 @@ void OITPipelineManager::CreatePipeline(u32 listType, bool autosort, const PolyP
params.pass = pass;
params.twoVolume = twoVolume;
params.palette = gpuPalette;
params.divPosZ = divPosZ;
vk::ShaderModule fragment_module = shaderManager->GetFragmentShader(params);
vk::PipelineShaderStageCreateInfo stages[] = {
@ -440,8 +442,8 @@ void OITPipelineManager::CreateModVolPipeline(ModVolMode mode, int cullMode, boo
vk::DynamicState dynamicStates[2] = { vk::DynamicState::eViewport, vk::DynamicState::eScissor };
vk::PipelineDynamicStateCreateInfo pipelineDynamicStateCreateInfo(vk::PipelineDynamicStateCreateFlags(), 2, dynamicStates);
vk::ShaderModule vertex_module = shaderManager->GetModVolVertexShader(naomi2);
vk::ShaderModule fragment_module = shaderManager->GetModVolShader();
vk::ShaderModule vertex_module = shaderManager->GetModVolVertexShader(OITShaderManager::ModVolShaderParams{ naomi2, !settings.platform.isNaomi2() && config::NativeDepthInterpolation });
vk::ShaderModule fragment_module = shaderManager->GetModVolShader(!settings.platform.isNaomi2() && config::NativeDepthInterpolation);
vk::PipelineShaderStageCreateInfo stages[] = {
{ vk::PipelineShaderStageCreateFlags(), vk::ShaderStageFlagBits::eVertex, vertex_module, "main" },
@ -535,8 +537,9 @@ void OITPipelineManager::CreateTrModVolPipeline(ModVolMode mode, int cullMode, b
vk::DynamicState dynamicStates[2] = { vk::DynamicState::eViewport, vk::DynamicState::eScissor };
vk::PipelineDynamicStateCreateInfo pipelineDynamicStateCreateInfo(vk::PipelineDynamicStateCreateFlags(), 2, dynamicStates);
vk::ShaderModule vertex_module = shaderManager->GetModVolVertexShader(naomi2);
vk::ShaderModule fragment_module = shaderManager->GetTrModVolShader(mode);
bool divPosZ = !settings.platform.isNaomi2() && config::NativeDepthInterpolation;
vk::ShaderModule vertex_module = shaderManager->GetModVolVertexShader(OITShaderManager::ModVolShaderParams{ naomi2, divPosZ });
vk::ShaderModule fragment_module = shaderManager->GetTrModVolShader(OITShaderManager::TrModVolShaderParams{ mode, divPosZ });
vk::PipelineShaderStageCreateInfo stages[] = {
{ vk::PipelineShaderStageCreateFlags(), vk::ShaderStageFlagBits::eVertex, vertex_module, "main" },

View File

@ -405,12 +405,13 @@ private:
}
hash |= (pp->isp.ZWriteDis << 20) | (pp->isp.CullMode << 21) | ((autosort ? 6 : pp->isp.DepthMode) << 23);
hash |= ((u32)gpuPalette << 26) | ((u32)pass << 27) | ((u32)pp->isNaomi2() << 29);
hash |= (u32)(!settings.platform.isNaomi2() && config::NativeDepthInterpolation) << 30;
return hash;
}
u32 hash(ModVolMode mode, int cullMode, bool naomi2) const
{
return ((int)mode << 2) | cullMode | ((u32)naomi2 << 5);
return ((int)mode << 2) | cullMode | ((u32)naomi2 << 5) | ((u32)(!settings.platform.isNaomi2() && config::NativeDepthInterpolation) << 6);
}
vk::PipelineVertexInputStateCreateInfo GetMainVertexInputStateCreateInfo(bool full = true) const

View File

@ -44,30 +44,38 @@ layout (location = 6) in mediump vec2 in_uv1;
layout (location = 0) INTERPOLATION out highp vec4 vtx_base;
layout (location = 1) INTERPOLATION out highp vec4 vtx_offs;
layout (location = 2) noperspective out highp vec3 vtx_uv;
layout (location = 2) out highp vec3 vtx_uv;
layout (location = 3) INTERPOLATION out highp vec4 vtx_base1; // New for OIT, only for OP/PT with 2-volume
layout (location = 4) INTERPOLATION out highp vec4 vtx_offs1;
layout (location = 5) noperspective out highp vec2 vtx_uv1;
layout (location = 5) out highp vec2 vtx_uv1;
layout (location = 6) flat out uint vtx_index;
void main()
{
vec4 vpos = uniformBuffer.ndcMat * in_pos;
#if DIV_POS_Z == 1
vpos /= vpos.z;
vpos.z = vpos.w;
#endif
vtx_base = vec4(in_base) / 255.0;
vtx_offs = vec4(in_offs) / 255.0;
vtx_uv = vec3(in_uv * vpos.z, vpos.z);
vtx_uv = vec3(in_uv, vpos.z);
vtx_base1 = vec4(in_base1) / 255.0;
vtx_offs1 = vec4(in_offs1) / 255.0;
vtx_uv1 = in_uv1 * vpos.z;
#if pp_Gouraud == 1
vtx_uv1 = in_uv1;
#if pp_Gouraud == 1 && DIV_POS_Z != 1
vtx_base *= vpos.z;
vtx_offs *= vpos.z;
vtx_base1 *= vpos.z;
vtx_offs1 *= vpos.z;
#endif
vtx_index = uint(pushConstants.polyNumber) + uint(gl_VertexIndex);
#if DIV_POS_Z != 1
vtx_uv.xy *= vpos.z;
vtx_uv1 *= vpos.z;
vpos.w = 1.0;
vpos.z = 0.0;
#endif
gl_Position = vpos;
}
)";
@ -179,10 +187,10 @@ layout (input_attachment_index = 0, set = 0, binding = 5) uniform subpassInput D
// Vertex input
layout (location = 0) INTERPOLATION in highp vec4 vtx_base;
layout (location = 1) INTERPOLATION in highp vec4 vtx_offs;
layout (location = 2) noperspective in highp vec3 vtx_uv;
layout (location = 2) in highp vec3 vtx_uv;
layout (location = 3) INTERPOLATION in highp vec4 vtx_base1; // new for OIT. Only if 2 vol
layout (location = 4) INTERPOLATION in highp vec4 vtx_offs1;
layout (location = 5) noperspective in highp vec2 vtx_uv1;
layout (location = 5) in highp vec2 vtx_uv1;
layout (location = 6) flat in uint vtx_index;
#if pp_FogCtrl != 2 || pp_TwoVolumes == 1
@ -190,7 +198,13 @@ layout (set = 0, binding = 2) uniform sampler2D fog_table;
float fog_mode2(float w)
{
float z = clamp(w * uniformBuffer.sp_FOG_DENSITY, 1.0, 255.9999);
float z = clamp(
#if DIV_POS_Z == 1
uniformBuffer.sp_FOG_DENSITY / w
#else
uniformBuffer.sp_FOG_DENSITY * w
#endif
, 1.0, 255.9999);
float exp = floor(log2(z));
float m = z * 16.0 / pow(2.0, exp) - 16.0;
float idx = floor(m) + exp * 16.0 + 0.5;
@ -213,7 +227,12 @@ vec4 colorClamp(vec4 col)
vec4 palettePixel(sampler2D tex, vec3 coords)
{
vec4 c = vec4(textureProj(tex, coords).r * 255.0 / 1023.0 + pushConstants.palette_index, 0.5, 0.0, 0.0);
#if DIV_POS_Z == 1
float texIdx = texture(tex, coords.xy).r;
#else
float texIdx = textureProj(tex, coords).r;
#endif
vec4 c = vec4(texIdx * 255.0 / 1023.0 + pushConstants.palette_index, 0.5, 0.0, 0.0);
return texture(palette, c.xy);
}
@ -261,7 +280,7 @@ void main()
}
#endif
#endif
#if pp_Gouraud == 1
#if pp_Gouraud == 1 && DIV_POS_Z != 1
color /= vtx_uv.z;
offset /= vtx_uv.z;
#endif
@ -280,14 +299,22 @@ void main()
#if pp_TwoVolumes == 1
if (area1)
#if pp_Palette == 0
texcol = textureProj(tex1, vec3(vtx_uv1, vtx_uv.z));
#if DIV_POS_Z == 1
texcol = texture(tex1, vtx_uv1);
#else
texcol = textureProj(tex1, vec3(vtx_uv1, vtx_uv.z));
#endif
#else
texcol = palettePixel(tex1, vec3(vtx_uv1, vtx_uv.z));
#endif
else
#endif
#if pp_Palette == 0
#if DIV_POS_Z == 1
texcol = texture(tex0, vtx_uv.xy);
#else
texcol = textureProj(tex0, vtx_uv);
#endif
#else
texcol = palettePixel(tex0, vtx_uv);
#endif
@ -428,7 +455,7 @@ void main()
Pixel pixel;
pixel.color = packColors(clamp(color, vec4(0.0), vec4(1.0)));
pixel.depth = vtx_uv.z;
pixel.depth = gl_FragDepth;
pixel.seq_num = vtx_index;
pixel.next = atomicExchange(abufferPointer.pointers[coords.x + coords.y * uniformBuffer.viewportWidth], idx);
PixelBuffer.pixels[idx] = pixel;
@ -438,7 +465,7 @@ void main()
)";
static const char OITModifierVolumeShader[] = R"(
layout (location = 0) noperspective in highp float depth;
layout (location = 0) in highp float depth;
void main()
{
@ -614,7 +641,7 @@ void main(void)
)";
static const char OITTranslucentModvolShaderSource[] = R"(
layout (location = 0) noperspective in highp float depth;
layout (location = 0) in highp float depth;
// Must match ModifierVolumeMode enum values
#define MV_XOR 0
@ -624,6 +651,9 @@ layout (location = 0) noperspective in highp float depth;
void main()
{
#if MV_MODE == MV_XOR || MV_MODE == MV_OR
setFragDepth(depth);
#endif
ivec2 coords = ivec2(gl_FragCoord.xy);
uint idx = abufferPointer.pointers[coords.x + coords.y * uniformBuffer.viewportWidth];
@ -635,10 +665,10 @@ void main()
if (getShadowEnable(pp))
{
#if MV_MODE == MV_XOR
if (depth >= pixel.depth)
if (gl_FragDepth >= pixel.depth)
atomicXor(PixelBuffer.pixels[idx].seq_num, SHADOW_STENCIL);
#elif MV_MODE == MV_OR
if (depth >= pixel.depth)
if (gl_FragDepth >= pixel.depth)
atomicOr(PixelBuffer.pixels[idx].seq_num, SHADOW_STENCIL);
#elif MV_MODE == MV_INCLUSION
uint prev_val = atomicAnd(PixelBuffer.pixels[idx].seq_num, ~(SHADOW_STENCIL));
@ -692,10 +722,10 @@ layout (location = 7) in vec3 in_normal;
layout (location = 0) INTERPOLATION out highp vec4 vtx_base;
layout (location = 1) INTERPOLATION out highp vec4 vtx_offs;
layout (location = 2) noperspective out highp vec3 vtx_uv;
layout (location = 2) out highp vec3 vtx_uv;
layout (location = 3) INTERPOLATION out highp vec4 vtx_base1;
layout (location = 4) INTERPOLATION out highp vec4 vtx_offs1;
layout (location = 5) noperspective out highp vec2 vtx_uv1;
layout (location = 5) out highp vec2 vtx_uv1;
layout (location = 6) flat out uint vtx_index;
void wDivide(inout vec4 vpos)
@ -779,7 +809,8 @@ vk::UniqueShaderModule OITShaderManager::compileShader(const VertexShaderParams&
{
VulkanSource src;
src.addConstant("pp_Gouraud", (int)params.gouraud)
.addSource(GouraudSource);
.addConstant("DIV_POS_Z", (int)params.divPosZ)
.addSource(GouraudSource);
if (params.naomi2)
src.addConstant("pp_TwoVolumes", (int)params.twoVolume)
.addConstant("LIGHT_ON", (int)params.lightOn)
@ -807,6 +838,7 @@ vk::UniqueShaderModule OITShaderManager::compileShader(const FragmentShaderParam
.addConstant("pp_BumpMap", (int)params.bumpmap)
.addConstant("ColorClamping", (int)params.clamping)
.addConstant("pp_Palette", (int)params.palette)
.addConstant("DIV_POS_Z", (int)params.divPosZ)
.addConstant("PASS", (int)params.pass)
.addSource(GouraudSource)
.addSource(OITShaderHeader)
@ -836,30 +868,31 @@ vk::UniqueShaderModule OITShaderManager::compileClearShader()
return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eFragment, src.generate());
}
vk::UniqueShaderModule OITShaderManager::compileModVolVertexShader(bool naomi2)
vk::UniqueShaderModule OITShaderManager::compileShader(const ModVolShaderParams& params)
{
VulkanSource src;
if (naomi2)
if (params.naomi2)
src.addSource(N2ModVolVertexShaderSource);
else
src.addSource(ModVolVertexShaderSource);
src.addConstant("DIV_POS_Z", (int)params.divPosZ)
.addSource(ModVolVertexShaderSource);
return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eVertex, src.generate());
}
vk::UniqueShaderModule OITShaderManager::compileModVolFragmentShader()
vk::UniqueShaderModule OITShaderManager::compileModVolFragmentShader(bool divPosZ)
{
VulkanSource src;
src.addSource(OITShaderHeader)
src.addConstant("DIV_POS_Z", (int)divPosZ)
.addSource(OITShaderHeader)
.addSource(OITModifierVolumeShader);
return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eFragment, src.generate());
}
void OITShaderManager::compileTrModVolFragmentShader(ModVolMode mode)
vk::UniqueShaderModule OITShaderManager::compileShader(const TrModVolShaderParams& params)
{
if (trModVolShaders.empty())
trModVolShaders.resize((size_t)ModVolMode::Final);
VulkanSource src;
src.addConstant("MAX_PIXELS_PER_FRAGMENT", config::PerPixelLayers)
.addConstant("MV_MODE", (int)mode)
.addConstant("MV_MODE", (int)params.mode)
.addConstant("DIV_POS_Z", (int)params.divPosZ)
.addSource(OITShaderHeader)
.addSource(OITTranslucentModvolShaderSource);
trModVolShaders[(size_t)mode] = ShaderCompiler::Compile(vk::ShaderStageFlagBits::eFragment, src.generate());
return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eFragment, src.generate());
}

View File

@ -35,9 +35,10 @@ public:
bool lightOn;
bool twoVolume;
bool texture;
bool divPosZ;
u32 hash() { return (u32)gouraud | ((u32)naomi2 << 1) | ((u32)lightOn << 2)
| ((u32)twoVolume << 3) | ((u32)texture << 4); }
| ((u32)twoVolume << 3) | ((u32)texture << 4) | ((u32)divPosZ << 5); }
};
// alpha test, clip test, use alpha, texture, ignore alpha, shader instr, offset, fog, gouraud, bump, clamp
@ -56,6 +57,7 @@ public:
bool clamping;
bool twoVolume;
bool palette;
bool divPosZ;
Pass pass;
u32 hash()
@ -64,39 +66,39 @@ public:
| ((u32)texture << 3) | ((u32)ignoreTexAlpha << 4) | (shaderInstr << 5)
| ((u32)offset << 7) | ((u32)fog << 8) | ((u32)gouraud << 10)
| ((u32)bumpmap << 11) | ((u32)clamping << 12) | ((u32)twoVolume << 13)
| ((u32)palette << 14) | ((int)pass << 15);
| ((u32)palette << 14) | ((int)pass << 15) | ((u32)divPosZ << 17);
}
};
struct ModVolShaderParams
{
bool naomi2;
bool divPosZ;
u32 hash() { return (u32)naomi2 | ((u32)divPosZ << 1); }
};
struct TrModVolShaderParams
{
ModVolMode mode;
bool divPosZ;
u32 hash() { return (u32)mode | ((u32)divPosZ << 3); }
};
vk::ShaderModule GetVertexShader(const VertexShaderParams& params) { return getShader(vertexShaders, params); }
vk::ShaderModule GetFragmentShader(const FragmentShaderParams& params) { return getShader(fragmentShaders, params); }
vk::ShaderModule GetModVolVertexShader(bool naomi2)
{
vk::UniqueShaderModule& shader = naomi2 ? n2ModVolVertexShader : modVolVertexShader;
if (!shader)
shader = compileModVolVertexShader(naomi2);
return *shader;
}
vk::ShaderModule GetModVolShader()
vk::ShaderModule GetModVolVertexShader(const ModVolShaderParams& params) { return getShader(modVolVertexShaders, params); }
vk::ShaderModule GetModVolShader(bool divPosZ)
{
auto& modVolShader = modVolShaders[divPosZ];
if (!modVolShader)
modVolShader = compileModVolFragmentShader();
modVolShader = compileModVolFragmentShader(divPosZ);
return *modVolShader;
}
vk::ShaderModule GetTrModVolShader(ModVolMode mode)
{
if (trModVolShaders.empty() || !trModVolShaders[(size_t)mode] || maxLayers != config::PerPixelLayers)
{
if (maxLayers != config::PerPixelLayers)
{
trModVolShaders.clear();
finalFragmentShader.reset();
}
compileTrModVolFragmentShader(mode);
maxLayers = config::PerPixelLayers;
}
return *trModVolShaders[(size_t)mode];
}
vk::ShaderModule GetTrModVolShader(const TrModVolShaderParams& params) { return getShader(trModVolShaders, params); }
vk::ShaderModule GetFinalShader()
{
@ -126,27 +128,27 @@ private:
template<typename T>
vk::ShaderModule getShader(std::map<u32, vk::UniqueShaderModule>& map, T params)
{
auto it = map.find(params.hash());
u32 h = params.hash();
auto it = map.find(h);
if (it != map.end())
return it->second.get();
map[params.hash()] = compileShader(params);
return map[params.hash()].get();
map[h] = compileShader(params);
return map[h].get();
}
vk::UniqueShaderModule compileShader(const VertexShaderParams& params);
vk::UniqueShaderModule compileShader(const FragmentShaderParams& params);
vk::UniqueShaderModule compileModVolVertexShader(bool naomi2);
vk::UniqueShaderModule compileModVolFragmentShader();
void compileTrModVolFragmentShader(ModVolMode mode);
vk::UniqueShaderModule compileShader(const ModVolShaderParams& params);
vk::UniqueShaderModule compileModVolFragmentShader(bool divPosZ);
vk::UniqueShaderModule compileShader(const TrModVolShaderParams& params);
vk::UniqueShaderModule compileFinalShader();
vk::UniqueShaderModule compileFinalVertexShader();
vk::UniqueShaderModule compileClearShader();
std::map<u32, vk::UniqueShaderModule> vertexShaders;
std::map<u32, vk::UniqueShaderModule> fragmentShaders;
vk::UniqueShaderModule modVolVertexShader;
vk::UniqueShaderModule n2ModVolVertexShader;
vk::UniqueShaderModule modVolShader;
std::vector<vk::UniqueShaderModule> trModVolShaders;
std::map<u32, vk::UniqueShaderModule> modVolVertexShaders;
vk::UniqueShaderModule modVolShaders[2];
std::map<u32, vk::UniqueShaderModule> trModVolShaders;
vk::UniqueShaderModule finalVertexShader;
vk::UniqueShaderModule finalFragmentShader;

View File

@ -138,8 +138,9 @@ void PipelineManager::CreateModVolPipeline(ModVolMode mode, int cullMode, bool n
vk::DynamicState dynamicStates[2] = { vk::DynamicState::eViewport, vk::DynamicState::eScissor };
vk::PipelineDynamicStateCreateInfo pipelineDynamicStateCreateInfo(vk::PipelineDynamicStateCreateFlags(), 2, dynamicStates);
vk::ShaderModule vertex_module = shaderManager->GetModVolVertexShader(naomi2);
vk::ShaderModule fragment_module = shaderManager->GetModVolShader();
ModVolShaderParams shaderParams { naomi2, !settings.platform.isNaomi2() && config::NativeDepthInterpolation };
vk::ShaderModule vertex_module = shaderManager->GetModVolVertexShader(shaderParams);
vk::ShaderModule fragment_module = shaderManager->GetModVolShader(!settings.platform.isNaomi2() && config::NativeDepthInterpolation);
vk::PipelineShaderStageCreateInfo stages[] = {
{ vk::PipelineShaderStageCreateFlags(), vk::ShaderStageFlagBits::eVertex, vertex_module, "main" },
@ -241,8 +242,9 @@ void PipelineManager::CreateDepthPassPipeline(int cullMode, bool naomi2)
vk::DynamicState dynamicStates[2] = { vk::DynamicState::eViewport, vk::DynamicState::eScissor };
vk::PipelineDynamicStateCreateInfo pipelineDynamicStateCreateInfo(vk::PipelineDynamicStateCreateFlags(), 2, dynamicStates);
vk::ShaderModule vertex_module = shaderManager->GetModVolVertexShader(naomi2);
vk::ShaderModule fragment_module = shaderManager->GetModVolShader();
ModVolShaderParams shaderParams { naomi2, !settings.platform.isNaomi2() && config::NativeDepthInterpolation };
vk::ShaderModule vertex_module = shaderManager->GetModVolVertexShader(shaderParams);
vk::ShaderModule fragment_module = shaderManager->GetModVolShader(!settings.platform.isNaomi2() && config::NativeDepthInterpolation);
vk::PipelineShaderStageCreateInfo stages[] = {
{ vk::PipelineShaderStageCreateFlags(), vk::ShaderStageFlagBits::eVertex, vertex_module, "main" },
@ -392,7 +394,8 @@ void PipelineManager::CreatePipeline(u32 listType, bool sortTriangles, const Pol
vk::DynamicState dynamicStates[2] = { vk::DynamicState::eViewport, vk::DynamicState::eScissor };
vk::PipelineDynamicStateCreateInfo pipelineDynamicStateCreateInfo(vk::PipelineDynamicStateCreateFlags(), 2, dynamicStates);
vk::ShaderModule vertex_module = shaderManager->GetVertexShader(VertexShaderParams{ pp.pcw.Gouraud == 1, pp.isNaomi2() });
bool divPosZ = !settings.platform.isNaomi2() && config::NativeDepthInterpolation;
vk::ShaderModule vertex_module = shaderManager->GetVertexShader(VertexShaderParams { pp.pcw.Gouraud == 1, pp.isNaomi2(), divPosZ });
FragmentShaderParams params = {};
params.alphaTest = listType == ListType_Punch_Through;
params.bumpmap = pp.tcw.PixelFmt == PixelBumpMap;
@ -407,6 +410,7 @@ void PipelineManager::CreatePipeline(u32 listType, bool sortTriangles, const Pol
params.trilinear = pp.pcw.Texture && pp.tsp.FilterMode > 1 && listType != ListType_Punch_Through && pp.tcw.MipMapped == 1;
params.useAlpha = pp.tsp.UseAlpha;
params.palette = gpuPalette;
params.divPosZ = divPosZ;
vk::ShaderModule fragment_module = shaderManager->GetFragmentShader(params);
vk::PipelineShaderStageCreateInfo stages[] = {

View File

@ -275,16 +275,17 @@ private:
| (pp->tsp.DstInstr << 17);
hash |= (pp->isp.ZWriteDis << 20) | (pp->isp.CullMode << 21) | (pp->isp.DepthMode << 23);
hash |= ((u32)sortTriangles << 26) | ((u32)gpuPalette << 27) | ((u32)pp->isNaomi2() << 28);
hash |= (u32)(!settings.platform.isNaomi2() && config::NativeDepthInterpolation) << 29;
return hash;
}
u32 hash(ModVolMode mode, int cullMode, bool naomi2) const
{
return ((int)mode << 2) | cullMode | ((int)naomi2 << 5);
return ((int)mode << 2) | cullMode | ((int)naomi2 << 5) | ((int)(!settings.platform.isNaomi2() && config::NativeDepthInterpolation) << 6);
}
u32 hash(int cullMode, bool naomi2) const
{
return cullMode | ((int)naomi2 << 2);
return cullMode | ((int)naomi2 << 2) | ((int)(!settings.platform.isNaomi2() && config::NativeDepthInterpolation) << 3);
}
vk::PipelineVertexInputStateCreateInfo GetMainVertexInputStateCreateInfo(bool full = true) const

View File

@ -36,20 +36,28 @@ layout (location = 3) in mediump vec2 in_uv;
layout (location = 0) INTERPOLATION out highp vec4 vtx_base;
layout (location = 1) INTERPOLATION out highp vec4 vtx_offs;
layout (location = 2) noperspective out highp vec3 vtx_uv;
layout (location = 2) out highp vec3 vtx_uv;
void main()
{
vec4 vpos = uniformBuffer.ndcMat * in_pos;
#if DIV_POS_Z == 1
vpos /= vpos.z;
vpos.z = vpos.w;
#endif
vtx_base = vec4(in_base) / 255.0;
vtx_offs = vec4(in_offs) / 255.0;
vtx_uv = vec3(in_uv * vpos.z, vpos.z);
#if pp_Gouraud == 1
vtx_uv = vec3(in_uv, vpos.z);
#if pp_Gouraud == 1 && DIV_POS_Z != 1
vtx_base *= vpos.z;
vtx_offs *= vpos.z;
#endif
#if DIV_POS_Z != 1
vtx_uv.xy *= vpos.z;
vpos.w = 1.0;
vpos.z = 0.0;
#endif
gl_Position = vpos;
}
)";
@ -87,14 +95,20 @@ layout (set = 0, binding = 3) uniform sampler2D palette;
// Vertex input
layout (location = 0) INTERPOLATION in highp vec4 vtx_base;
layout (location = 1) INTERPOLATION in highp vec4 vtx_offs;
layout (location = 2) noperspective in highp vec3 vtx_uv;
layout (location = 2) in highp vec3 vtx_uv;
#if pp_FogCtrl != 2
layout (set = 0, binding = 2) uniform sampler2D fog_table;
float fog_mode2(float w)
{
float z = clamp(w * uniformBuffer.sp_FOG_DENSITY, 1.0, 255.9999);
float z = clamp(
#if DIV_POS_Z == 1
uniformBuffer.sp_FOG_DENSITY / w
#else
uniformBuffer.sp_FOG_DENSITY * w
#endif
, 1.0, 255.9999);
float exp = floor(log2(z));
float m = z * 16.0 / pow(2.0, exp) - 16.0;
float idx = floor(m) + exp * 16.0 + 0.5;
@ -116,7 +130,12 @@ vec4 colorClamp(vec4 col)
vec4 palettePixel(sampler2D tex, vec3 coords)
{
vec4 c = vec4(textureProj(tex, coords).r * 255.0 / 1023.0 + pushConstants.palette_index, 0.5, 0.0, 0.0);
#if DIV_POS_Z == 1
float texIdx = texture(tex, coords.xy).r;
#else
float texIdx = textureProj(tex, coords).r;
#endif
vec4 c = vec4(texIdx * 255.0 / 1023.0 + pushConstants.palette_index, 0.5, 0.0, 0.0);
return texture(palette, c.xy);
}
@ -133,7 +152,7 @@ void main()
highp vec4 color = vtx_base;
highp vec4 offset = vtx_offs;
#if pp_Gouraud == 1
#if pp_Gouraud == 1 && DIV_POS_Z != 1
color /= vtx_uv.z;
offset /= vtx_uv.z;
#endif
@ -141,12 +160,16 @@ void main()
color.a = 1.0;
#endif
#if pp_FogCtrl == 3
color = vec4(uniformBuffer.sp_FOG_COL_RAM.rgb, fog_mode2(gl_FragCoord.w));
color = vec4(uniformBuffer.sp_FOG_COL_RAM.rgb, fog_mode2(vtx_uv.z));
#endif
#if pp_Texture == 1
{
#if pp_Palette == 0
vec4 texcol = textureProj(tex, vtx_uv);
#if DIV_POS_Z == 1
vec4 texcol = texture(tex, vtx_uv.xy);
#else
vec4 texcol = textureProj(tex, vtx_uv);
#endif
#else
vec4 texcol = palettePixel(tex, vtx_uv);
#endif
@ -216,7 +239,11 @@ void main()
//color.rgb = vec3(gl_FragCoord.w * uniformBuffer.sp_FOG_DENSITY / 128.0);
highp float w = vtx_uv.z * 100000.0;
#if DIV_POS_Z == 1
highp float w = 100000.0 / vtx_uv.z;
#else
highp float w = 100000.0 * vtx_uv.z;
#endif
gl_FragDepth = log2(1.0 + w) / 34.0;
gl_FragColor = color;
@ -230,20 +257,26 @@ layout (std140, set = 0, binding = 0) uniform VertexShaderUniforms
} uniformBuffer;
layout (location = 0) in vec4 in_pos;
layout (location = 0) noperspective out highp float depth;
layout (location = 0) out highp float depth;
void main()
{
vec4 vpos = uniformBuffer.ndcMat * in_pos;
#if DIV_POS_Z == 1
vpos /= vpos.z;
vpos.z = vpos.w;
depth = vpos.w;
#else
depth = vpos.z;
vpos.w = 1.0;
vpos.z = 0.0;
#endif
gl_Position = vpos;
}
)";
static const char ModVolFragmentShaderSource[] = R"(
layout (location = 0) noperspective in highp float depth;
layout (location = 0) in highp float depth;
layout (location = 0) out vec4 FragColor;
layout (push_constant) uniform pushBlock
@ -253,7 +286,11 @@ layout (push_constant) uniform pushBlock
void main()
{
highp float w = depth * 100000.0;
#if DIV_POS_Z == 1
highp float w = 100000.0 / depth;
#else
highp float w = 100000.0 * depth;
#endif
gl_FragDepth = log2(1.0 + w) / 34.0;
FragColor = vec4(0.0, 0.0, 0.0, pushConstants.sp_ShaderColor);
}
@ -570,7 +607,7 @@ layout (location = 4) in vec3 in_normal;
layout (location = 0) INTERPOLATION out highp vec4 vtx_base;
layout (location = 1) INTERPOLATION out highp vec4 vtx_offs;
layout (location = 2) noperspective out highp vec3 vtx_uv;
layout (location = 2) out highp vec3 vtx_uv;
void wDivide(inout vec4 vpos)
{
@ -633,7 +670,7 @@ layout (std140, set = 1, binding = 2) uniform N2VertexShaderUniforms
} n2Uniform;
layout (location = 0) in vec4 in_pos;
layout (location = 0) noperspective out highp float depth;
layout (location = 0) out highp float depth;
void wDivide(inout vec4 vpos)
{
@ -660,6 +697,7 @@ vk::UniqueShaderModule ShaderManager::compileShader(const VertexShaderParams& pa
if (!params.naomi2)
{
src.addConstant("pp_Gouraud", (int)params.gouraud)
.addConstant("DIV_POS_Z", (int)params.divPosZ)
.addSource(GouraudSource)
.addSource(VertexShaderSource);
}
@ -689,20 +727,24 @@ vk::UniqueShaderModule ShaderManager::compileShader(const FragmentShaderParams&
.addConstant("ColorClamping", (int)params.clamping)
.addConstant("pp_TriLinear", (int)params.trilinear)
.addConstant("pp_Palette", (int)params.palette)
.addConstant("DIV_POS_Z", (int)params.divPosZ)
.addSource(GouraudSource)
.addSource(FragmentShaderSource);
return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eFragment, src.generate());
}
vk::UniqueShaderModule ShaderManager::compileModVolVertexShader(bool naomi2)
vk::UniqueShaderModule ShaderManager::compileShader(const ModVolShaderParams& params)
{
return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eVertex,
VulkanSource().addSource(naomi2 ? N2ModVolVertexShaderSource : ModVolVertexShaderSource).generate());
VulkanSource().addConstant("DIV_POS_Z", (int)params.divPosZ)
.addSource(params.naomi2 ? N2ModVolVertexShaderSource : ModVolVertexShaderSource).generate());
}
vk::UniqueShaderModule ShaderManager::compileModVolFragmentShader()
vk::UniqueShaderModule ShaderManager::compileModVolFragmentShader(bool divPosZ)
{
return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eFragment, VulkanSource().addSource(ModVolFragmentShaderSource).generate());
return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eFragment,
VulkanSource().addConstant("DIV_POS_Z", (int)divPosZ)
.addSource(ModVolFragmentShaderSource).generate());
}
vk::UniqueShaderModule ShaderManager::compileQuadVertexShader(bool rotate)

View File

@ -29,8 +29,9 @@ struct VertexShaderParams
{
bool gouraud;
bool naomi2;
bool divPosZ;
u32 hash() { return (u32)gouraud | ((u32)naomi2 << 1); }
u32 hash() { return (u32)gouraud | ((u32)naomi2 << 1) | ((u32)divPosZ << 2); }
};
// alpha test, clip test, use alpha, texture, ignore alpha, shader instr, offset, fog, gouraud, bump, clamp, trilinear
@ -49,6 +50,7 @@ struct FragmentShaderParams
bool clamping;
bool trilinear;
bool palette;
bool divPosZ;
u32 hash()
{
@ -56,10 +58,18 @@ struct FragmentShaderParams
| ((u32)texture << 3) | ((u32)ignoreTexAlpha << 4) | (shaderInstr << 5)
| ((u32)offset << 7) | ((u32)fog << 8) | ((u32)gouraud << 10)
| ((u32)bumpmap << 11) | ((u32)clamping << 12) | ((u32)trilinear << 13)
| ((u32)palette << 14);
| ((u32)palette << 14) | ((u32)divPosZ << 15);
}
};
struct ModVolShaderParams
{
bool naomi2;
bool divPosZ;
u32 hash() { return (u32)naomi2 | ((u32)divPosZ << 1); }
};
// std140 alignment required
struct VertexShaderUniforms
{
@ -96,17 +106,13 @@ class ShaderManager
public:
vk::ShaderModule GetVertexShader(const VertexShaderParams& params) { return getShader(vertexShaders, params); }
vk::ShaderModule GetFragmentShader(const FragmentShaderParams& params) { return getShader(fragmentShaders, params); }
vk::ShaderModule GetModVolVertexShader(bool naomi2)
{
vk::UniqueShaderModule& shader = naomi2 ? n2ModVolVertexShader : modVolVertexShader;
if (!shader)
shader = compileModVolVertexShader(naomi2);
return *shader;
}
vk::ShaderModule GetModVolShader()
vk::ShaderModule GetModVolVertexShader(const ModVolShaderParams& params) { return getShader(modVolVertexShaders, params); }
vk::ShaderModule GetModVolShader(bool divPosZ)
{
auto& modVolShader = modVolShaders[divPosZ];
if (!modVolShader)
modVolShader = compileModVolFragmentShader();
modVolShader = compileModVolFragmentShader(divPosZ);
return *modVolShader;
}
vk::ShaderModule GetQuadVertexShader(bool rotate = false)
@ -156,16 +162,17 @@ private:
template<typename T>
vk::ShaderModule getShader(std::map<u32, vk::UniqueShaderModule>& map, T params)
{
auto it = map.find(params.hash());
u32 h = params.hash();
auto it = map.find(h);
if (it != map.end())
return it->second.get();
map[params.hash()] = compileShader(params);
return map[params.hash()].get();
map[h] = compileShader(params);
return map[h].get();
}
vk::UniqueShaderModule compileShader(const VertexShaderParams& params);
vk::UniqueShaderModule compileShader(const FragmentShaderParams& params);
vk::UniqueShaderModule compileModVolVertexShader(bool naomi2);
vk::UniqueShaderModule compileModVolFragmentShader();
vk::UniqueShaderModule compileShader(const ModVolShaderParams& params);
vk::UniqueShaderModule compileModVolFragmentShader(bool divPosZ);
vk::UniqueShaderModule compileQuadVertexShader(bool rotate);
vk::UniqueShaderModule compileQuadFragmentShader(bool ignoreTexAlpha);
vk::UniqueShaderModule compileOSDVertexShader();
@ -173,9 +180,8 @@ private:
std::map<u32, vk::UniqueShaderModule> vertexShaders;
std::map<u32, vk::UniqueShaderModule> fragmentShaders;
vk::UniqueShaderModule modVolVertexShader;
vk::UniqueShaderModule n2ModVolVertexShader;
vk::UniqueShaderModule modVolShader;
std::map<u32, vk::UniqueShaderModule> modVolVertexShaders;
vk::UniqueShaderModule modVolShaders[2];
vk::UniqueShaderModule quadVertexShader;
vk::UniqueShaderModule quadRotateVertexShader;
vk::UniqueShaderModule quadFragmentShader;

View File

@ -82,7 +82,7 @@ static const char GouraudSource[] = R"(
#if pp_Gouraud == 0
#define INTERPOLATION flat
#else
#define INTERPOLATION noperspective
#define INTERPOLATION
#endif
)";

View File

@ -534,6 +534,20 @@ struct retro_core_option_v2_definition option_defs_us[] = {
"256",
},
#endif
{
CORE_OPTION_NAME "_native_depth_interpolation",
"Native Depth Interpolation",
NULL,
"Helps with texture corruption and depth issues on AMD GPUs. Can also help Intel GPUs in some cases.",
NULL,
"video",
{
{ "disabled", NULL },
{ "enabled", NULL },
{ NULL, NULL },
},
"disabled",
},
{
CORE_OPTION_NAME "_threaded_rendering",
"Threaded Rendering",

View File

@ -90,6 +90,7 @@ Option<int> TextureFiltering(CORE_OPTION_NAME "_texture_filtering");
Option<bool> PowerVR2Filter(CORE_OPTION_NAME "_pvr2_filtering");
Option<int64_t> PixelBufferSize("", 512 * 1024 * 1024);
IntOption PerPixelLayers(CORE_OPTION_NAME "_oit_layers");
Option<bool> NativeDepthInterpolation(CORE_OPTION_NAME "_native_depth_interpolation");
// Misc