naomi2: dx11 support. fix ambient color. no depth write in per-strip

dx11: naomi2 support
n2: ambient color was swapping red and blue
gl, dx11: don't write depth in per-strip to avoid missing geometry,
prefer wrong depth order/blending for now
optimize naomi2 per-triangle sorting
This commit is contained in:
flyinghead 2022-02-13 21:01:45 +01:00
parent a14086d484
commit 8d071bf4ea
17 changed files with 2682 additions and 1899 deletions

View File

@ -1079,6 +1079,8 @@ if(WIN32)
core/rend/dx11/dx11context_lr.cpp
core/rend/dx11/dx11context_lr.h
core/rend/dx11/dx11_driver.h
core/rend/dx11/dx11_naomi2.cpp
core/rend/dx11/dx11_naomi2.h
core/rend/dx11/oit/dx11_oitbuffers.h
core/rend/dx11/oit/dx11_oitrenderer.cpp
core/rend/dx11/oit/dx11_oitshaders.cpp

File diff suppressed because it is too large Load Diff

View File

@ -1561,8 +1561,10 @@ bool ta_parse_vdrc(TA_context* ctx)
bgpp->envMapping[1] = false;
}
const bool mergeTranslucent = !config::PerStripSorting || config::RendererType == RenderType::OpenGL_OIT;
// TODO || config::RendererType == RenderType::Vulkan_OIT || config::RendererType == RenderType::DirectX11_OIT
const bool mergeTranslucent = !config::PerStripSorting
|| config::RendererType == RenderType::OpenGL_OIT
|| config::RendererType == RenderType::DirectX11_OIT;
// TODO || config::RendererType == RenderType::Vulkan_OIT
TA_context *childCtx = ctx;
while (childCtx != nullptr)
{
@ -1680,8 +1682,10 @@ bool ta_parse_naomi2(TA_context* ctx)
int op_count = 0;
int pt_count = 0;
int tr_count = 0;
const bool mergeTranslucent = !config::PerStripSorting || config::RendererType == RenderType::OpenGL_OIT;
// TODO || config::RendererType == RenderType::Vulkan_OIT || config::RendererType == RenderType::DirectX11_OIT;
const bool mergeTranslucent = !config::PerStripSorting
|| config::RendererType == RenderType::OpenGL_OIT
|| config::RendererType == RenderType::DirectX11_OIT;
// TODO || config::RendererType == RenderType::Vulkan_OIT
for (const RenderPass& pass : ctx->rend.render_passes)
{
make_index(&ctx->rend.global_param_op, op_count, pass.op_count, true, &ctx->rend);

View File

@ -0,0 +1,387 @@
/*
Copyright 2022 flyinghead
This file is part of Flycast.
Flycast is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
Flycast is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Flycast. If not, see <https://www.gnu.org/licenses/>.
*/
#include "dx11_naomi2.h"
const char * const DX11N2VertexShader = R"(
#if pp_Gouraud == 1
#define INTERPOLATION
#else
#define INTERPOLATION nointerpolation
#endif
struct VertexIn
{
float4 pos : POSITION;
#if POSITION_ONLY == 0
float4 col : COLOR0;
float4 spec : COLOR1;
float2 uv : TEXCOORD0;
#if pp_TwoVolumes == 1
float4 col1 : COLOR2;
float4 spec1 : COLOR3;
float2 uv1 : TEXCOORD1;
#endif
float3 normal: NORMAL;
uint vertexId : SV_VertexID;
#endif
};
struct VertexOut
{
float4 pos : SV_POSITION;
float4 uv : TEXCOORD0;
#if POSITION_ONLY == 0
INTERPOLATION float4 col : COLOR0;
INTERPOLATION float4 spec : COLOR1;
#if pp_TwoVolumes == 1
float2 uv1 : TEXCOORD1;
INTERPOLATION float4 col1 : COLOR2;
INTERPOLATION float4 spec1 : COLOR3;
#endif
nointerpolation uint index : BLENDINDICES0;
#endif
};
cbuffer shaderConstants : register(b0)
{
float4x4 ndcMat;
float4 leftPlane;
float4 topPlane;
float4 rightPlane;
float4 bottomPlane;
};
cbuffer polyConstants : register(b1)
{
float4x4 mvMat;
float4x4 normalMat;
float4x4 projMat;
int envMapping0;
int envMapping1;
int bumpMapping;
int polyNumber;
float4 glossCoef;
int4 constantColor;
int4 model_diff_spec; // diffuse0, diffuse1, specular0, specular1
};
void computeColors(inout float4 baseCol, inout float4 offsetCol, in int volIdx, in float3 position, in float3 normal);
void computeEnvMap(inout float2 uv, in float3 normal);
void computeBumpMap(inout float4 color0, in float4 color1, in float3 position, in float3 normal, in float4x4 normalMat);
[clipplanes(leftPlane, topPlane, rightPlane, bottomPlane)]
VertexOut main(in VertexIn vin)
{
VertexOut vo;
vo.pos = mul(mvMat, float4(vin.pos.xyz, 1.f));
#if POSITION_ONLY == 0
vo.col = vin.col;
vo.spec = vin.spec;
#if LIGHT_ON == 1
float4 vnorm = normalize(mul(normalMat, float4(vin.normal, 0.f)));
#endif
#if pp_TwoVolumes == 1
vo.col1 = vin.col1;
vo.spec1 = vin.spec1;
vo.uv1 = vin.uv1;
#if LIGHT_ON == 1
// FIXME need offset0 and offset1 for bump maps
if (bumpMapping == 1)
computeBumpMap(vo.spec, vo.spec1, vo.pos.xyz, vnorm.xyz, normalMat);
else
{
computeColors(vo.col1, vo.spec1, 1, vo.pos.xyz, vnorm.xyz);
#if pp_Texture == 0
vo.col1 += vo.spec1;
#endif
}
if (envMapping1 == 1)
computeEnvMap(vo.uv1.xy, vnorm.xyz);
#endif
#endif
#if LIGHT_ON == 1
if (bumpMapping == 0)
{
computeColors(vo.col, vo.spec, 0, vo.pos.xyz, vnorm.xyz);
#if pp_Texture == 0
vo.col += vo.spec;
#endif
}
#endif
vo.uv.xy = vin.uv;
#if LIGHT_ON == 1
if (envMapping0 == 1)
computeEnvMap(vo.uv.xy, vnorm.xyz);
#endif
vo.index = (uint(polyNumber) << 18) + vin.vertexId;
#endif
vo.pos = mul(projMat, vo.pos);
vo.pos = float4(vo.pos.xy / vo.pos.w, 1.f / vo.pos.w, 1.f);
vo.pos = mul(ndcMat, vo.pos);
#if POSITION_ONLY == 1
vo.uv = float4(0.f, 0.f, 0.f, vo.pos.z);
#else
#if pp_Gouraud == 1
vo.col *= vo.pos.z;
vo.spec *= vo.pos.z;
#if pp_TwoVolumes == 1
vo.col1 *= vo.pos.z;
vo.spec1 *= vo.pos.z;
#endif
#endif
vo.uv = float4(vo.uv.xy * vo.pos.z, 0.f, vo.pos.z);
#if pp_TwoVolumes == 1
vo.uv1 *= vo.pos.z;
#endif
#endif
vo.pos.w = 1.f;
vo.pos.z = 0.f;
return vo;
}
)";
const char * const DX11N2ColorShader = R"(
#define PI 3.1415926f
#define LMODE_SINGLE_SIDED 0
#define LMODE_DOUBLE_SIDED 1
#define LMODE_DOUBLE_SIDED_WITH_TOLERANCE 2
#define LMODE_SPECIAL_EFFECT 3
#define LMODE_THIN_SURFACE 4
#define LMODE_BUMP_MAP 5
#define ROUTING_BASEDIFF_BASESPEC_ADD 0
#define ROUTING_BASEDIFF_OFFSSPEC_ADD 1
#define ROUTING_OFFSDIFF_BASESPEC_ADD 2
#define ROUTING_OFFSDIFF_OFFSSPEC_ADD 3
#define ROUTING_ALPHADIFF_ADD 4
#define ROUTING_ALPHAATTEN_ADD 5
#define ROUTING_FOGDIFF_ADD 6
#define ROUTING_FOGATTENUATION_ADD 7
#define ROUTING_BASEDIFF_BASESPEC_SUB 8
#define ROUTING_BASEDIFF_OFFSSPEC_SUB 9
#define ROUTING_OFFSDIFF_BASESPEC_SUB 10
#define ROUTING_OFFSDIFF_OFFSSPEC_SUB 11
#define ROUTING_ALPHADIFF_SUB 12
#define ROUTING_ALPHAATTEN_SUB 13
struct N2Light
{
float4 color;
float4 direction;
float4 position;
int parallel;
int routing;
int dmode;
int smode;
int4 diffuse_specular; // diffuse0, diffuse1, specular0, specular1
float attnDistA;
float attnDistB;
float attnAngleA;
float attnAngleB;
int distAttnMode;
int3 _pad;
};
cbuffer lightConstants : register(b2)
{
N2Light lights[16];
int lightCount;
float4 ambientBase[2];
float4 ambientOffset[2];
int4 ambientMaterial; // base0, base1, offset0, offset1
int useBaseOver;
int bumpId0;
int bumpId1;
}
void computeColors(inout float4 baseCol, inout float4 offsetCol, in int volIdx, in float3 position, in float3 normal)
{
if (constantColor[volIdx] == 1)
return;
float3 diffuse = float3(0.f, 0.f, 0.f);
float3 specular = float3(0.f, 0.f, 0.f);
float diffuseAlpha = 0.f;
float specularAlpha = 0.f;
for (int i = 0; i < lightCount; i++)
{
N2Light light = lights[i];
float3 lightDir; // direction to the light
float3 lightColor = light.color.rgb;
if (light.parallel == 1)
{
lightDir = normalize(light.direction.xyz);
}
else
{
lightDir = normalize(light.position.xyz - position);
if (light.attnDistA != 1.f || light.attnDistB != 0.f)
{
float distance = length(light.position.xyz - position);
if (light.distAttnMode == 0)
distance = 1.f / distance;
lightColor *= clamp(light.attnDistB * distance + light.attnDistA, 0.f, 1.f);
}
if (light.attnAngleA != 1.f || light.attnAngleB != 0.f)
{
float3 spotDir = light.direction.xyz;
float cosAngle = 1.f - max(0.f, dot(lightDir, spotDir));
lightColor *= clamp(cosAngle * light.attnAngleB + light.attnAngleA, 0.f, 1.f);
}
}
int routing = light.routing;
if (light.diffuse_specular[volIdx] == 1) // If light contributes to diffuse
{
float factor;
switch (light.dmode)
{
case LMODE_SINGLE_SIDED:
factor = max(dot(normal, lightDir), 0.f);
break;
case LMODE_DOUBLE_SIDED:
factor = abs(dot(normal, lightDir));
break;
case LMODE_SPECIAL_EFFECT:
default:
factor = 1.f;
break;
}
if (routing == ROUTING_ALPHADIFF_SUB)
diffuseAlpha -= lightColor.r * factor;
else if (routing == ROUTING_BASEDIFF_BASESPEC_ADD || routing == ROUTING_BASEDIFF_OFFSSPEC_ADD)
diffuse += lightColor * factor;
if (routing == ROUTING_OFFSDIFF_BASESPEC_ADD || routing == ROUTING_OFFSDIFF_OFFSSPEC_ADD)
specular += lightColor * factor;
}
if (light.diffuse_specular[2 + volIdx] == 1) // If light contributes to specular
{
float3 reflectDir = reflect(-lightDir, normal);
float factor;
switch (light.smode)
{
case LMODE_SINGLE_SIDED:
factor = clamp(pow(max(dot(normalize(-position), reflectDir), 0.f), glossCoef[volIdx]), 0.f, 1.f);
break;
case LMODE_DOUBLE_SIDED:
factor = clamp(pow(abs(dot(normalize(-position), reflectDir)), glossCoef[volIdx]), 0.f, 1.f);
break;
case LMODE_SPECIAL_EFFECT:
default:
factor = 1.f;
break;
}
if (routing == ROUTING_ALPHADIFF_SUB)
specularAlpha -= lightColor.r * factor;
else if (routing == ROUTING_OFFSDIFF_OFFSSPEC_ADD || routing == ROUTING_BASEDIFF_OFFSSPEC_ADD)
specular += lightColor * factor;
if (routing == ROUTING_BASEDIFF_BASESPEC_ADD || routing == ROUTING_OFFSDIFF_BASESPEC_ADD)
diffuse += lightColor * factor;
}
}
// ambient with material
if (ambientMaterial[volIdx] == 1)
diffuse += ambientBase[volIdx].rgb;
if (ambientMaterial[volIdx + 2] == 1)
specular += ambientOffset[volIdx].rgb;
if (model_diff_spec[volIdx] == 1)
baseCol.rgb *= diffuse;
if (model_diff_spec[volIdx + 2] == 1)
offsetCol.rgb *= specular;
// ambient w/o material
if (ambientMaterial[volIdx] == 0 && model_diff_spec[volIdx] == 1)
baseCol.rgb += ambientBase[volIdx].rgb;
if (ambientMaterial[volIdx + 2] == 0 && model_diff_spec[volIdx + 2] == 1)
offsetCol.rgb += ambientOffset[volIdx].rgb;
baseCol.a = max(0.f, baseCol.a + diffuseAlpha);
offsetCol.a = max(0.f, offsetCol.a + specularAlpha);
if (useBaseOver == 1)
{
float4 overflow = max(float4(0.f, 0.f, 0.f, 0.f), baseCol - float4(1.f, 1.f, 1.f, 1.f));
offsetCol += overflow;
}
}
void computeEnvMap(inout float2 uv, in float3 normal)
{
// Cheap env mapping
uv += normal.xy / 2.f + 0.5f;
uv = clamp(uv, 0.f, 1.f);
}
void computeBumpMap(inout float4 color0, in float4 color1, in float3 position, in float3 normal, in float4x4 normalMat)
{
// TODO
//if (bumpId0 == -1)
return;
float3 tangent = color0.xyz;
if (tangent.x > 0.5f)
tangent.x -= 1.f;
if (tangent.y > 0.5f)
tangent.y -= 1.f;
if (tangent.z > 0.5f)
tangent.z -= 1.f;
tangent = normalize(mul(normalMat, float4(tangent, 0.f))).xyz;
float3 bitangent = color1.xyz;
if (bitangent.x > 0.5f)
bitangent.x -= 1.f;
if (bitangent.y > 0.5f)
bitangent.y -= 1.f;
if (bitangent.z > 0.5f)
bitangent.z -= 1.f;
bitangent = normalize(mul(normalMat, float4(bitangent, 0.f))).xyz;
float scaleDegree = color0.w;
float scaleOffset = color1.w;
N2Light light = lights[bumpId0];
float3 lightDir; // direction to the light
if (light.parallel == 1)
lightDir = normalize(light.direction.xyz);
else
lightDir = normalize(light.position.xyz - position);
float n = dot(lightDir, normal);
float cosQ = dot(lightDir, tangent);
float sinQ = dot(lightDir, bitangent);
float sinT = clamp(n, 0.f, 1.f);
float k1 = 1.f - scaleDegree;
float k2 = scaleDegree * sinT;
float k3 = scaleDegree * sqrt(1.f - sinT * sinT); // cos T
float q = acos(cosQ);
if (sinQ < 0.f)
q = 2.f * PI - q;
color0.r = k2;
color0.g = k3;
color0.b = q / PI / 2.f;
color0.a = k1;
}
)";

View File

@ -0,0 +1,220 @@
/*
Copyright 2022 flyinghead
This file is part of Flycast.
Flycast is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
Flycast is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Flycast. If not, see <https://www.gnu.org/licenses/>.
*/
#pragma once
#include <windows.h>
#include <d3d11.h>
#include "windows/comptr.h"
#include "hw/pvr/ta_ctx.h"
extern const char * const DX11N2VertexShader;
extern const char * const DX11N2ColorShader;
struct N2PolyConstants
{
float mvMat[4][4]; // 0
float normalMat[4][4]; // 64
float projMat[4][4]; // 128
int envMapping[2]; // 192
int bumpMapping; // 200
int polyNumber; // 204
float glossCoef[4]; // 208
int constantColor[4]; // 224
// int4 model_diff_spec
int modelDiffuse[2]; // 240
int modelSpecular[2]; // 248
// 256
};
static_assert(sizeof(N2PolyConstants) == 256, "sizeof(N2PolyConstants) should be 256");
struct DX11N2Light
{
float color[4]; // 0
float direction[4]; // 16
float position[4]; // 32
int parallel; // 48
int routing; // 52
int dmode; // 56
int smode; // 60
// int4 diffuse_specular
int diffuse[2]; // 64
int specular[2]; // 72
float attnDistA; // 80
float attnDistB; // 84
float attnAngleA; // 88
float attnAngleB; // 92
int distAttnMode; // 96
int _pad[3];
// 112
};
static_assert(sizeof(DX11N2Light) == 112, "sizeof(DX11N2Light) should be 112");
struct N2LightConstants
{
DX11N2Light lights[16]; // 0
int lightCount; // 1792
int _pad0[3];
float ambientBase[2][4]; // 1808
float ambientOffset[2][4]; // 1840
// int4 ambientMaterial
int ambientMaterialBase[2]; // 1872
int ambientMaterialOffset[2]; // 1880
int useBaseOver; // 1888
int bumpId1; // 1892
int bumpId2; // 1896
int _pad3; // 1900
// 1904
};
static_assert(sizeof(N2LightConstants) == 1904, "sizeof(N2LightConstants) should be 1904");
class Naomi2Helper
{
public:
void init(ComPtr<ID3D11Device>& device, ComPtr<ID3D11DeviceContext> deviceContext)
{
this->deviceContext = deviceContext;
D3D11_BUFFER_DESC desc{};
desc.ByteWidth = sizeof(N2PolyConstants);
desc.ByteWidth = (((desc.ByteWidth - 1) >> 4) + 1) << 4;
desc.Usage = D3D11_USAGE_DYNAMIC;
desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
if (FAILED(device->CreateBuffer(&desc, nullptr, &polyConstantsBuffer.get())))
WARN_LOG(RENDERER, "Per-polygon constants buffer creation failed");
desc.ByteWidth = sizeof(N2LightConstants);
desc.ByteWidth = (((desc.ByteWidth - 1) >> 4) + 1) << 4;
if (FAILED(device->CreateBuffer(&desc, nullptr, &lightConstantsBuffer.get())))
WARN_LOG(RENDERER, "Light constants buffer creation failed");
resetCache();
}
void term()
{
polyConstantsBuffer.reset();
lightConstantsBuffer.reset();
deviceContext.reset();
}
void setConstants(const PolyParam& pp, u32 polyNumber)
{
N2PolyConstants polyConstants;
memcpy(polyConstants.mvMat, pp.mvMatrix, sizeof(polyConstants.mvMat));
memcpy(polyConstants.normalMat, pp.normalMatrix, sizeof(polyConstants.normalMat));
memcpy(polyConstants.projMat, pp.projMatrix, sizeof(polyConstants.projMat));
polyConstants.envMapping[0] = pp.envMapping[0];
polyConstants.envMapping[1] = pp.envMapping[1];
polyConstants.bumpMapping = pp.pcw.Texture == 1 && pp.tcw.PixelFmt == PixelBumpMap;
polyConstants.polyNumber = polyNumber;
for (size_t i = 0; i < 2; i++)
{
polyConstants.glossCoef[i] = pp.glossCoef[i];
polyConstants.constantColor[i] = pp.constantColor[i];
polyConstants.modelDiffuse[i] = pp.diffuseColor[i];
polyConstants.modelSpecular[i] = pp.specularColor[i];
}
setConstBuffer(polyConstantsBuffer, polyConstants);
deviceContext->VSSetConstantBuffers(1, 1, &polyConstantsBuffer.get());
if (pp.lightModel != lastModel)
{
lastModel = pp.lightModel;
N2LightConstants lightConstants{};
if (pp.lightModel != nullptr)
{
const N2LightModel& lights = *pp.lightModel;
lightConstants.lightCount = lights.lightCount;
for (int i = 0; i < lights.lightCount; i++)
{
DX11N2Light& light = lightConstants.lights[i];
memcpy(light.color, lights.lights[i].color, sizeof(light.color));
memcpy(light.direction, lights.lights[i].direction, sizeof(light.direction));
memcpy(light.position, lights.lights[i].position, sizeof(light.position));
light.parallel = lights.lights[i].parallel;
light.routing = lights.lights[i].routing;
light.dmode = lights.lights[i].dmode;
light.smode = lights.lights[i].smode;
memcpy(light.diffuse, lights.lights[i].diffuse, sizeof(light.diffuse));
memcpy(light.specular, lights.lights[i].specular, sizeof(light.specular));
light.attnDistA = lights.lights[i].attnDistA;
light.attnDistB = lights.lights[i].attnDistB;
light.attnAngleA = lights.lights[i].attnAngleA;
light.attnAngleB = lights.lights[i].attnAngleB;
light.distAttnMode = lights.lights[i].distAttnMode;
}
memcpy(lightConstants.ambientBase, lights.ambientBase, sizeof(lightConstants.ambientBase));
memcpy(lightConstants.ambientOffset, lights.ambientOffset, sizeof(lightConstants.ambientOffset));
for (int i = 0; i < 2; i++)
{
lightConstants.ambientMaterialBase[i] = lights.ambientMaterialBase[i];
lightConstants.ambientMaterialOffset[i] = lights.ambientMaterialOffset[i];
}
lightConstants.useBaseOver = lights.useBaseOver;
lightConstants.bumpId1 = lights.bumpId1;
lightConstants.bumpId2 = lights.bumpId2;
}
else
{
lightConstants.lightCount = 0;
float white[] { 1.f, 1.f, 1.f, 1.f };
float black[4]{};
for (int vol = 0; vol < 2; vol++)
{
lightConstants.ambientMaterialBase[vol] = 0;
lightConstants.ambientMaterialOffset[vol] = 0;
memcpy(lightConstants.ambientBase[vol], white, sizeof(white));
memcpy(lightConstants.ambientOffset[vol], black, sizeof(black));
}
lightConstants.useBaseOver = 0;
lightConstants.bumpId1 = -1;
lightConstants.bumpId2 = -1;
}
setConstBuffer(lightConstantsBuffer, lightConstants);
deviceContext->VSSetConstantBuffers(2, 1, &lightConstantsBuffer.get());
}
}
void setConstants(const float *mvMatrix, const float *projMatrix)
{
N2PolyConstants polyConstants;
memcpy(polyConstants.mvMat, mvMatrix, sizeof(polyConstants.mvMat));
memcpy(polyConstants.projMat, projMatrix, sizeof(polyConstants.projMat));
setConstBuffer(polyConstantsBuffer, polyConstants);
deviceContext->VSSetConstantBuffers(1, 1, &polyConstantsBuffer.get());
}
void resetCache() {
lastModel = (N2LightModel *)1;
}
private:
template<typename T>
void setConstBuffer(const ComPtr<ID3D11Buffer>& buffer, const T& data)
{
D3D11_MAPPED_SUBRESOURCE mappedSubres;
deviceContext->Map(buffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedSubres);
memcpy(mappedSubres.pData, &data, sizeof(T));
deviceContext->Unmap(buffer, 0);
}
ComPtr<ID3D11DeviceContext> deviceContext;
ComPtr<ID3D11Buffer> polyConstantsBuffer;
ComPtr<ID3D11Buffer> lightConstantsBuffer;
const N2LightModel *lastModel;
};

View File

@ -29,6 +29,7 @@ const D3D11_INPUT_ELEMENT_DESC MainLayout[]
{ "COLOR", 0, DXGI_FORMAT_B8G8R8A8_UNORM, 0, (UINT)offsetof(Vertex, col), D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "COLOR", 1, DXGI_FORMAT_B8G8R8A8_UNORM, 0, (UINT)offsetof(Vertex, spc), D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, (UINT)offsetof(Vertex, u), D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "NORMAL", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, (UINT)offsetof(Vertex, nx), D3D11_INPUT_PER_VERTEX_DATA, 0 },
};
const D3D11_INPUT_ELEMENT_DESC ModVolLayout[]
{
@ -43,7 +44,7 @@ bool DX11Renderer::Init()
shaders = &theDX11Context.getShaders();
samplers = &theDX11Context.getSamplers();
bool success = (bool)shaders->getVertexShader(true);
bool success = (bool)shaders->getVertexShader(true, true);
ComPtr<ID3DBlob> blob = shaders->getVertexShaderBlob();
success = success && SUCCEEDED(device->CreateInputLayout(MainLayout, ARRAY_SIZE(MainLayout), blob->GetBufferPointer(), blob->GetBufferSize(), &mainInputLayout.get()));
blob = shaders->getMVVertexShaderBlob();
@ -146,6 +147,7 @@ bool DX11Renderer::Init()
quad = std::unique_ptr<Quad>(new Quad());
quad->init(device, deviceContext, shaders);
n2Helper.init(device, deviceContext);
fog_needs_update = true;
forcePaletteUpdate();
@ -163,6 +165,7 @@ bool DX11Renderer::Init()
void DX11Renderer::Term()
{
NOTICE_LOG(RENDERER, "DX11 renderer terminating");
n2Helper.term();
vtxConstants.reset();
pxlConstants.reset();
fbTex.reset();
@ -310,7 +313,12 @@ bool DX11Renderer::Process(TA_context* ctx)
}
else
{
if (!ta_parse_vdrc(ctx))
bool success;
if (settings.platform.isNaomi2())
success = ta_parse_naomi2(ctx);
else
success = ta_parse_vdrc(ctx);
if (!success)
return false;
}
@ -319,7 +327,7 @@ bool DX11Renderer::Process(TA_context* ctx)
//
// Efficient Triangle and Quadrilateral Clipping within Shaders. M. McGuire
// Journal of Graphics GPU and Game Tools <EFBFBD> November 2011
// Journal of Graphics GPU and Game Tools - November 2011
//
static glm::vec3 intersect(const glm::vec3& A, float Adist , const glm::vec3& B, float Bdist)
{
@ -523,21 +531,24 @@ void DX11Renderer::uploadGeometryBuffers()
{
const ModTriangle *data = nullptr;
u32 size = 0;
#if 1
// clip triangles
std::vector<ModTriangle> modVolTriangles;
modVolTriangles.reserve(pvrrc.modtrig.used());
clipModVols(pvrrc.global_param_mvo, modVolTriangles);
clipModVols(pvrrc.global_param_mvo_tr, modVolTriangles);
if (!modVolTriangles.empty())
if (!settings.platform.isNaomi2()) // TODO for naomi2 as well?
{
size = (u32)(modVolTriangles.size() * sizeof(ModTriangle));
data = modVolTriangles.data();
// clip triangles
std::vector<ModTriangle> modVolTriangles;
modVolTriangles.reserve(pvrrc.modtrig.used());
clipModVols(pvrrc.global_param_mvo, modVolTriangles);
clipModVols(pvrrc.global_param_mvo_tr, modVolTriangles);
if (!modVolTriangles.empty())
{
size = (u32)(modVolTriangles.size() * sizeof(ModTriangle));
data = modVolTriangles.data();
}
}
else
{
size = pvrrc.modtrig.bytes();
data = pvrrc.modtrig.head();
}
#else
size = pvrrc.modtrig.bytes();
data = pvrrc.modtrig.head();
#endif
if (size > 0)
{
verify(ensureBufferSize(modvolBuffer, D3D11_BIND_VERTEX_BUFFER, modvolBufferSize, size));
@ -598,6 +609,7 @@ bool DX11Renderer::Render()
if (!pvrrc.isRenderFramebuffer)
{
n2Helper.resetCache();
uploadGeometryBuffers();
updateFogTexture();
@ -745,7 +757,7 @@ void DX11Renderer::setRenderState(const PolyParam *gp)
DX11Texture *texture = (DX11Texture *)gp->texture;
bool gpuPalette = texture != nullptr ? texture->gpuPalette : false;
ComPtr<ID3D11VertexShader> vertexShader = shaders->getVertexShader(gp->pcw.Gouraud);
ComPtr<ID3D11VertexShader> vertexShader = shaders->getVertexShader(gp->pcw.Gouraud, gp->isNaomi2());
deviceContext->VSSetShader(vertexShader, nullptr, 0);
ComPtr<ID3D11PixelShader> pixelShader = shaders->getShader(
gp->pcw.Texture,
@ -819,7 +831,7 @@ void DX11Renderer::setRenderState(const PolyParam *gp)
zfunc = gp->isp.DepthMode;
bool zwriteEnable;
if (SortingEnabled && !config::PerStripSorting)
if (SortingEnabled /* && !config::PerStripSorting */)
zwriteEnable = false;
else
{
@ -832,6 +844,9 @@ void DX11Renderer::setRenderState(const PolyParam *gp)
}
const u32 stencil = (gp->pcw.Shadow != 0) ? 0x80 : 0;
deviceContext->OMSetDepthStencilState(depthStencilStates.getState(true, zwriteEnable, zfunc, config::ModifierVolumes), stencil);
if (gp->isNaomi2())
n2Helper.setConstants(*gp, 0); // poly number only used in OIT
}
template <u32 Type, bool SortingEnabled>
@ -900,7 +915,7 @@ void DX11Renderer::drawSorted(bool multipass)
// Write to the depth buffer now. The next render pass might need it. (Cosmic Smash)
deviceContext->OMSetBlendState(blendStates.getState(false, 0, 0, true), nullptr, 0xffffffff);
ComPtr<ID3D11VertexShader> vertexShader = shaders->getVertexShader(true);
ComPtr<ID3D11VertexShader> vertexShader = shaders->getVertexShader(true, settings.platform.isNaomi2());
deviceContext->VSSetShader(vertexShader, nullptr, 0);
ComPtr<ID3D11PixelShader> pixelShader = shaders->getShader(
false,
@ -949,7 +964,6 @@ void DX11Renderer::drawModVols(int first, int count)
deviceContext->OMSetBlendState(blendStates.getState(false, 0, 0, true), nullptr, 0xffffffff);
deviceContext->VSSetShader(shaders->getMVVertexShader(), nullptr, 0);
deviceContext->PSSetShader(shaders->getModVolShader(), nullptr, 0);
deviceContext->RSSetScissorRects(1, &scissorRect);
@ -958,6 +972,8 @@ void DX11Renderer::drawModVols(int first, int count)
ModifierVolumeParam* params = &pvrrc.global_param_mvo.head()[first];
int mod_base = -1;
const float *curMVMat = nullptr;
const float *curProjMat = nullptr;
for (int cmv = 0; cmv < count; cmv++)
{
@ -968,6 +984,13 @@ void DX11Renderer::drawModVols(int first, int count)
if (mod_base == -1)
mod_base = param.first;
if (param.isNaomi2() && (param.mvMatrix != curMVMat || param.projMatrix != curProjMat))
{
curMVMat = param.mvMatrix;
curProjMat = param.projMatrix;
n2Helper.setConstants(param.mvMatrix, param.projMatrix);
}
deviceContext->VSSetShader(shaders->getMVVertexShader(param.isNaomi2()), nullptr, 0);
if (!param.isp.VolumeLast && mv_mode > 0)
// OR'ing (open volume or quad)
deviceContext->OMSetDepthStencilState(depthStencilStates.getMVState(DepthStencilStates::Or), 2);
@ -1005,6 +1028,8 @@ void DX11Renderer::drawModVols(int first, int count)
deviceContext->IASetIndexBuffer(indexBuffer, DXGI_FORMAT_R32_UINT, 0);
deviceContext->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
// Use the background poly as a quad
deviceContext->VSSetShader(shaders->getMVVertexShader(false), nullptr, 0);
deviceContext->DrawIndexed(4, 0, 0);
}

View File

@ -27,6 +27,7 @@
#include "dx11_shaders.h"
#include "rend/sorter.h"
#include "dx11_renderstate.h"
#include "dx11_naomi2.h"
struct DX11Renderer : public Renderer
{
@ -112,6 +113,7 @@ protected:
u32 height = 0;
bool frameRendered = false;
bool frameRenderedOnce = false;
Naomi2Helper n2Helper;
private:
void readDCFramebuffer();

View File

@ -19,6 +19,7 @@
#include "dx11_shaders.h"
#include "dx11context.h"
#include "stdclass.h"
#include "dx11_naomi2.h"
#include <xxhash.h>
const char * const VertexShader = R"(
@ -356,6 +357,9 @@ const char * const MacroValues[] { "0", "1", "2", "3" };
static D3D_SHADER_MACRO VertexMacros[]
{
{ "pp_Gouraud", "1" },
{ "POSITION_ONLY", "0" },
{ "pp_TwoVolumes", "0" },
{ "LIGHT_ON", "1" },
{ nullptr, nullptr }
};
@ -439,24 +443,48 @@ const ComPtr<ID3D11PixelShader>& DX11Shaders::getShader(bool pp_Texture, bool pp
return shader;
}
const ComPtr<ID3D11VertexShader>& DX11Shaders::getVertexShader(bool gouraud)
const ComPtr<ID3D11VertexShader>& DX11Shaders::getVertexShader(bool gouraud, bool naomi2)
{
ComPtr<ID3D11VertexShader>& vertexShader = gouraud ? gouraudVertexShader : flatVertexShader;
int index = (int)gouraud | ((int)naomi2 << 1);
ComPtr<ID3D11VertexShader>& vertexShader = vertexShaders[index];
if (!vertexShader)
{
VertexMacros[0].Definition = MacroValues[gouraud];
vertexShader = compileVS(VertexShader, "main", VertexMacros);
if (!naomi2)
{
vertexShader = compileVS(VertexShader, "main", VertexMacros);
}
else
{
VertexMacros[1].Definition = MacroValues[false];
VertexMacros[2].Definition = MacroValues[false];
VertexMacros[3].Definition = MacroValues[true];
std::string source(DX11N2VertexShader);
source += std::string("\n") + DX11N2ColorShader;
vertexShader = compileVS(source.c_str(), "main", VertexMacros);
}
}
return vertexShader;
}
const ComPtr<ID3D11VertexShader>& DX11Shaders::getMVVertexShader()
const ComPtr<ID3D11VertexShader>& DX11Shaders::getMVVertexShader(bool naomi2)
{
if (!modVolVertexShader)
modVolVertexShader = compileVS(ModVolVertexShader, "main", nullptr);
if (!modVolVertexShaders[naomi2])
{
if (!naomi2)
modVolVertexShaders[0] = compileVS(ModVolVertexShader, "main", nullptr);
else
{
VertexMacros[0].Definition = MacroValues[false];
VertexMacros[1].Definition = MacroValues[true];
VertexMacros[2].Definition = MacroValues[false];
VertexMacros[3].Definition = MacroValues[false];
modVolVertexShaders[1] = compileVS(DX11N2VertexShader, "main", VertexMacros);
}
}
return modVolVertexShader;
return modVolVertexShaders[naomi2];
}
const ComPtr<ID3D11PixelShader>& DX11Shaders::getModVolShader()
@ -536,13 +564,22 @@ ComPtr<ID3D11PixelShader> DX11Shaders::compilePS(const char* source, const char*
ComPtr<ID3DBlob> DX11Shaders::getVertexShaderBlob()
{
VertexMacros[0].Definition = MacroValues[0];
return compileShader(VertexShader, "main", "vs_4_0", VertexMacros);
VertexMacros[0].Definition = MacroValues[true];
// FIXME code dup
VertexMacros[1].Definition = MacroValues[false];
VertexMacros[2].Definition = MacroValues[false];
std::string source(DX11N2VertexShader);
source += std::string("\n") + DX11N2ColorShader;
return compileShader(source.c_str(), "main", "vs_4_0", VertexMacros);
}
ComPtr<ID3DBlob> DX11Shaders::getMVVertexShaderBlob()
{
return compileShader(ModVolVertexShader, "main", "vs_4_0", nullptr);
// FIXME code dup
VertexMacros[0].Definition = MacroValues[false];
VertexMacros[1].Definition = MacroValues[true];
VertexMacros[2].Definition = MacroValues[false];
return compileShader(DX11N2VertexShader, "main", "vs_4_0", VertexMacros);
}
ComPtr<ID3DBlob> DX11Shaders::getQuadVertexShaderBlob()
@ -562,10 +599,11 @@ void DX11Shaders::term()
{
saveCache(CacheFile);
shaders.clear();
gouraudVertexShader.reset();
flatVertexShader.reset();
for (auto& shader : vertexShaders)
shader.reset();
modVolShader.reset();
modVolVertexShader.reset();
for (auto& shader : modVolVertexShaders)
shader.reset();
quadVertexShader.reset();
quadRotateVertexShader.reset();
quadPixelShader.reset();

View File

@ -53,9 +53,9 @@ public:
const ComPtr<ID3D11PixelShader>& getShader(bool pp_Texture, bool pp_UseAlpha, bool pp_IgnoreTexA, u32 pp_ShadInstr,
bool pp_Offset, u32 pp_FogCtrl, bool pp_BumpMap, bool fog_clamping, bool trilinear, bool palette, bool gouraud,
bool alphaTest, bool clipInside, bool nearestWrapFix);
const ComPtr<ID3D11VertexShader>& getVertexShader(bool gouraud);
const ComPtr<ID3D11VertexShader>& getVertexShader(bool gouraud, bool naomi2);
const ComPtr<ID3D11PixelShader>& getModVolShader();
const ComPtr<ID3D11VertexShader>& getMVVertexShader();
const ComPtr<ID3D11VertexShader>& getMVVertexShader(bool naomi2);
const ComPtr<ID3D11PixelShader>& getQuadPixelShader();
const ComPtr<ID3D11VertexShader>& getQuadVertexShader(bool rotate);
@ -70,10 +70,9 @@ private:
ComPtr<ID3D11Device> device;
std::unordered_map<u32, ComPtr<ID3D11PixelShader>> shaders;
ComPtr<ID3D11VertexShader> gouraudVertexShader;
ComPtr<ID3D11VertexShader> flatVertexShader;
ComPtr<ID3D11VertexShader> vertexShaders[4];
ComPtr<ID3D11PixelShader> modVolShader;
ComPtr<ID3D11VertexShader> modVolVertexShader;
ComPtr<ID3D11VertexShader> modVolVertexShaders[2];
ComPtr<ID3D11PixelShader> quadPixelShader;
ComPtr<ID3D11VertexShader> quadVertexShader;
ComPtr<ID3D11VertexShader> quadRotateVertexShader;

View File

@ -40,6 +40,8 @@ const D3D11_INPUT_ELEMENT_DESC MainLayout[]
{ "COLOR", 2, DXGI_FORMAT_B8G8R8A8_UNORM, 0, (UINT)offsetof(Vertex, col1), D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "COLOR", 3, DXGI_FORMAT_B8G8R8A8_UNORM, 0, (UINT)offsetof(Vertex, spc1), D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "TEXCOORD", 1, DXGI_FORMAT_R32G32_FLOAT, 0, (UINT)offsetof(Vertex, u1), D3D11_INPUT_PER_VERTEX_DATA, 0 },
// Naomi 2
{ "NORMAL", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, (UINT)offsetof(Vertex, nx), D3D11_INPUT_PER_VERTEX_DATA, 0 },
};
struct DX11OITRenderer : public DX11Renderer
@ -51,7 +53,6 @@ struct DX11OITRenderer : public DX11Renderer
int blend_mode1[2];
float paletteIndex;
float trilinearAlpha;
int pp_Number;
// two volume mode
int shading_instr0;
@ -81,7 +82,19 @@ struct DX11OITRenderer : public DX11Renderer
buffers.init(device, deviceContext);
ComPtr<ID3DBlob> blob = shaders.getVertexShaderBlob();
mainInputLayout.reset();
return success && SUCCEEDED(device->CreateInputLayout(MainLayout, ARRAY_SIZE(MainLayout), blob->GetBufferPointer(), blob->GetBufferSize(), &mainInputLayout.get()));
success = SUCCEEDED(device->CreateInputLayout(MainLayout, ARRAY_SIZE(MainLayout), blob->GetBufferPointer(), blob->GetBufferSize(), &mainInputLayout.get())) && success;
blob = shaders.getFinalVertexShaderBlob();
success = SUCCEEDED(device->CreateInputLayout(MainLayout, 0, blob->GetBufferPointer(), blob->GetBufferSize(), &finalInputLayout.get())) && success;
desc.ByteWidth = sizeof(int);
desc.ByteWidth = (((desc.ByteWidth - 1) >> 4) + 1) << 4;
desc.Usage = D3D11_USAGE_DYNAMIC;
desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
success = SUCCEEDED(device->CreateBuffer(&desc, nullptr, &vtxPolyConstants.get())) && success;
return success;
}
void Resize(int w, int h) override {
@ -110,6 +123,9 @@ struct DX11OITRenderer : public DX11Renderer
void Term() override
{
vtxPolyConstants.reset();
finalInputLayout.reset();
mainInputLayout.reset();
opaqueTextureView.reset();
opaqueRenderTarget.reset();
opaqueTex.reset();
@ -121,7 +137,7 @@ struct DX11OITRenderer : public DX11Renderer
template <u32 Type, bool SortingEnabled, DX11OITShaders::Pass pass>
void setRenderState(const PolyParam *gp, int polyNumber)
{
ComPtr<ID3D11VertexShader> vertexShader = shaders.getVertexShader(gp->pcw.Gouraud);
ComPtr<ID3D11VertexShader> vertexShader = shaders.getVertexShader(gp->pcw.Gouraud, gp->isNaomi2(), false, pass != DX11OITShaders::Depth);
deviceContext->VSSetShader(vertexShader, nullptr, 0);
PixelPolyConstants constants;
@ -214,7 +230,6 @@ struct DX11OITRenderer : public DX11Renderer
constants.clipTest[3] = (float)(clip_rect[1] + clip_rect[3]);
}
}
constants.pp_Number = polyNumber;
constants.blend_mode0[0] = gp->tsp.SrcInstr;
constants.blend_mode0[1] = gp->tsp.DstInstr;
if (two_volumes_mode)
@ -235,6 +250,14 @@ struct DX11OITRenderer : public DX11Renderer
memcpy(mappedSubres.pData, &constants, sizeof(constants));
deviceContext->Unmap(pxlPolyConstants, 0);
if (!gp->isNaomi2())
{
deviceContext->Map(vtxPolyConstants, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedSubres);
memcpy(mappedSubres.pData, &polyNumber, sizeof(polyNumber));
deviceContext->Unmap(vtxPolyConstants, 0);
deviceContext->VSSetConstantBuffers(1, 1, &vtxPolyConstants.get());
}
if (pass == DX11OITShaders::Color)
{
// Apparently punch-through polys support blending, or at least some combinations
@ -280,6 +303,9 @@ struct DX11OITRenderer : public DX11Renderer
bool needStencil = config::ModifierVolumes && pass == DX11OITShaders::Depth && Type != ListType_Translucent;
const u32 stencil = (gp->pcw.Shadow != 0) ? 0x80 : 0;
deviceContext->OMSetDepthStencilState(depthStencilStates.getState(true, zwriteEnable, zfunc, needStencil), stencil);
if (gp->isNaomi2())
n2Helper.setConstants(*gp, polyNumber);
}
template <u32 Type, bool SortingEnabled, DX11OITShaders::Pass pass>
@ -318,13 +344,14 @@ struct DX11OITRenderer : public DX11Renderer
unsigned int offset = 0;
deviceContext->IASetVertexBuffers(0, 1, &modvolBuffer.get(), &stride, &offset);
deviceContext->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
deviceContext->VSSetShader(shaders.getMVVertexShader(), nullptr, 0);
if (!Transparent)
deviceContext->PSSetShader(shaders.getModVolShader(), nullptr, 0);
deviceContext->RSSetScissorRects(1, &scissorRect);
ModifierVolumeParam* params = Transparent ? &pvrrc.global_param_mvo_tr.head()[first] : &pvrrc.global_param_mvo.head()[first];
int mod_base = -1;
const float *curMVMat = nullptr;
const float *curProjMat = nullptr;
for (int cmv = 0; cmv < count; cmv++)
{
@ -337,6 +364,13 @@ struct DX11OITRenderer : public DX11Renderer
if (param.count > 0)
{
if (param.isNaomi2() && (param.mvMatrix != curMVMat || param.projMatrix != curProjMat))
{
curMVMat = param.mvMatrix;
curProjMat = param.projMatrix;
n2Helper.setConstants(param.mvMatrix, param.projMatrix);
}
deviceContext->VSSetShader(shaders.getMVVertexShader(param.isNaomi2()), nullptr, 0);
if (Transparent)
{
if (!param.isp.VolumeLast && mv_mode > 0)
@ -393,6 +427,7 @@ struct DX11OITRenderer : public DX11Renderer
deviceContext->OMSetDepthStencilState(depthStencilStates.getState(false, false, 0, false), 0);
setCullMode(0);
deviceContext->IASetInputLayout(finalInputLayout);
deviceContext->VSSetShader(shaders.getFinalVertexShader(), nullptr, 0);
deviceContext->PSSetShader(shaders.getFinalShader(), nullptr, 0);
@ -529,6 +564,7 @@ struct DX11OITRenderer : public DX11Renderer
//
renderABuffer();
deviceContext->PSSetShaderResources(0, 1, &p);
deviceContext->IASetInputLayout(mainInputLayout);
// Clear the stencil from this pass
deviceContext->ClearDepthStencilView(depthStencilView2, D3D11_CLEAR_STENCIL, 0.f, 0);
@ -559,6 +595,7 @@ struct DX11OITRenderer : public DX11Renderer
if (!pvrrc.isRenderFramebuffer)
{
n2Helper.resetCache();
uploadGeometryBuffers();
updateFogTexture();
@ -609,6 +646,9 @@ private:
ComPtr<ID3D11Buffer> trPolyParamsBuffer;
u32 trPolyParamsBufferSize = 0;
ComPtr<ID3D11ShaderResourceView> trPolyParamsBufferView;
ComPtr<ID3D11InputLayout> mainInputLayout; // FIXME
ComPtr<ID3D11InputLayout> finalInputLayout;
ComPtr<ID3D11Buffer> vtxPolyConstants;
};
Renderer *rend_OITDirectX11()

View File

@ -18,6 +18,7 @@
*/
#include "dx11_oitshaders.h"
#include "../dx11context.h"
#include "../dx11_naomi2.h"
const char * const VertexShader = R"(
#if pp_Gouraud == 1
@ -35,6 +36,8 @@ struct VertexIn
float4 col1 : COLOR2;
float4 spec1 : COLOR3;
float2 uv1 : TEXCOORD1;
float3 normal: NORMAL; // unused
uint vertexId : SV_VertexID;
};
struct VertexOut
@ -43,12 +46,13 @@ struct VertexOut
float4 uv : TEXCOORD0;
INTERPOLATION float4 col : COLOR0;
INTERPOLATION float4 spec : COLOR1;
float4 uv1 : TEXCOORD1;
float2 uv1 : TEXCOORD1;
INTERPOLATION float4 col1 : COLOR2;
INTERPOLATION float4 spec1 : COLOR3;
nointerpolation uint index : BLENDINDICES0;
};
cbuffer constantBuffer : register(b0)
cbuffer shaderConstants : register(b0)
{
float4x4 transMatrix;
float4 leftPlane;
@ -57,6 +61,11 @@ cbuffer constantBuffer : register(b0)
float4 bottomPlane;
};
cbuffer polyConstants : register(b1)
{
int polyNumber;
};
[clipplanes(leftPlane, topPlane, rightPlane, bottomPlane)]
VertexOut main(in VertexIn vin)
{
@ -75,7 +84,8 @@ VertexOut main(in VertexIn vin)
vo.spec1 = vin.spec1;
#endif
vo.uv = float4(vin.uv * vo.pos.z, 0.f, vo.pos.z);
vo.uv1 = float4(vin.uv1 * vo.pos.z, 0.f, 0.f);
vo.uv1 = vin.uv1 * vo.pos.z;
vo.index = (uint(polyNumber) << 18) + vin.vertexId;
vo.pos.w = 1.f;
vo.pos.z = 0.f;
@ -195,11 +205,16 @@ bool getShadowEnable(in PolyParam pp)
return (pp.tsp_isp_pcw & 1) != 0;
}
uint getPolyNumber(in Pixel pixel)
uint getPolyIndex(in Pixel pixel)
{
return pixel.seq_num & 0x3FFFFFFFu;
}
uint getPolyNumber(in Pixel pixel)
{
return (pixel.seq_num & 0x3FFFFFFFu) >> 18;
}
#define SHADOW_STENCIL 0x40000000u
#define SHADOW_ACC 0x80000000u
@ -269,9 +284,10 @@ struct VertexIn
float4 uv : TEXCOORD0;
INTERPOLATION float4 col : COLOR0;
INTERPOLATION float4 spec : COLOR1;
float4 uv1 : TEXCOORD1;
float2 uv1 : TEXCOORD1;
INTERPOLATION float4 col1 : COLOR2;
INTERPOLATION float4 spec1 : COLOR3;
nointerpolation uint index : BLENDINDICES0;
};
Texture2D texture0 : register(t0);
@ -299,7 +315,6 @@ cbuffer polyConstantBuffer : register(b1)
int2 blend_mode1;
float paletteIndex;
float trilinearAlpha;
int pp_Number;
// two volume mode
int shading_instr0;
@ -406,7 +421,7 @@ PSO main(in VertexIn inpix)
float2 uv;
#if pp_TwoVolumes == 1
if (area1)
uv = inpix.uv1.xy / inpix.uv.w;
uv = inpix.uv1 / inpix.uv.w;
else
#endif
uv = inpix.uv.xy / inpix.uv.w;
@ -550,7 +565,7 @@ PSO main(in VertexIn inpix)
Pixel pixel;
pixel.color = packColors(clamp(color, 0.f, 1.f));
pixel.depth = inpix.uv.w;
pixel.seq_num = uint(pp_Number);
pixel.seq_num = inpix.index;
InterlockedExchange(abufferPointers[coords], idx, pixel.next);
Pixels[idx] = pixel;
@ -594,7 +609,7 @@ int fillAndSortFragmentArray(in uint2 coords, out uint pixel_list[MAX_PIXELS_PER
uint jIdx = pixel_list[j];
while (j >= 0
&& (Pixels[jIdx].depth > Pixels[idx].depth
|| (Pixels[jIdx].depth == Pixels[idx].depth && getPolyNumber(Pixels[jIdx]) > getPolyNumber(Pixels[idx]))))
|| (Pixels[jIdx].depth == Pixels[idx].depth && getPolyIndex(Pixels[jIdx]) > getPolyIndex(Pixels[idx]))))
{
pixel_list[j + 1] = pixel_list[j];
j--;
@ -819,6 +834,9 @@ const char * const MacroValues[] { "0", "1", "2", "3" };
static D3D_SHADER_MACRO VertexMacros[]
{
{ "pp_Gouraud", "1" },
{ "POSITION_ONLY", "0" },
{ "pp_TwoVolumes", "0" },
{ "LIGHT_ON", "1" },
{ nullptr, nullptr }
};
@ -907,24 +925,53 @@ const ComPtr<ID3D11PixelShader>& DX11OITShaders::getShader(bool pp_Texture, bool
return shader;
}
const ComPtr<ID3D11VertexShader>& DX11OITShaders::getVertexShader(bool gouraud)
const ComPtr<ID3D11VertexShader>& DX11OITShaders::getVertexShader(bool gouraud, bool naomi2, bool positionOnly, bool lightOn, bool twoVolumes)
{
ComPtr<ID3D11VertexShader>& vertexShader = gouraud ? gouraudVertexShader : flatVertexShader;
if (!vertexShader)
const u32 hash = (int)gouraud
| ((int)naomi2 << 1)
| ((int)positionOnly << 2)
| ((int)lightOn << 3)
| ((int)twoVolumes << 4);
auto& shader = vertexShaders[hash];
if (shader == nullptr)
{
VertexMacros[0].Definition = MacroValues[gouraud];
vertexShader = compileVS(VertexShader, "main", VertexMacros);
if (!naomi2)
{
shader = compileVS(VertexShader, "main", VertexMacros);
}
else
{
VertexMacros[1].Definition = MacroValues[positionOnly];
VertexMacros[2].Definition = MacroValues[twoVolumes];
VertexMacros[3].Definition = MacroValues[lightOn];
std::string source(DX11N2VertexShader);
if (!positionOnly && lightOn)
source += std::string("\n") + DX11N2ColorShader;
shader = compileVS(source.c_str(), "main", VertexMacros);
}
}
return vertexShader;
return shader;
}
const ComPtr<ID3D11VertexShader>& DX11OITShaders::getMVVertexShader()
const ComPtr<ID3D11VertexShader>& DX11OITShaders::getMVVertexShader(bool naomi2)
{
if (!modVolVertexShader)
modVolVertexShader = compileVS(ModVolVertexShader, "main", nullptr);
if (!modVolVertexShaders[naomi2])
{
if (!naomi2)
modVolVertexShaders[0] = compileVS(ModVolVertexShader, "main", nullptr);
else
{
VertexMacros[0].Definition = MacroValues[false];
VertexMacros[1].Definition = MacroValues[true];
VertexMacros[2].Definition = MacroValues[false];
VertexMacros[3].Definition = MacroValues[false];
modVolVertexShaders[1] = compileVS(DX11N2VertexShader, "main", VertexMacros);
}
}
return modVolVertexShader;
return modVolVertexShaders[naomi2];
}
const ComPtr<ID3D11PixelShader>& DX11OITShaders::getModVolShader()
@ -1021,13 +1068,27 @@ ComPtr<ID3D11PixelShader> DX11OITShaders::compilePS(const char* source, const ch
ComPtr<ID3DBlob> DX11OITShaders::getVertexShaderBlob()
{
VertexMacros[0].Definition = MacroValues[0];
return compileShader(VertexShader, "main", "vs_5_0", VertexMacros);
VertexMacros[0].Definition = MacroValues[true];
// FIXME code dup
VertexMacros[1].Definition = MacroValues[false];
VertexMacros[2].Definition = MacroValues[true];
std::string source(DX11N2VertexShader);
source += std::string("\n") + DX11N2ColorShader;
return compileShader(source.c_str(), "main", "vs_5_0", VertexMacros);
}
ComPtr<ID3DBlob> DX11OITShaders::getMVVertexShaderBlob()
{
return compileShader(ModVolVertexShader, "main", "vs_5_0", nullptr);
// FIXME code dup
VertexMacros[0].Definition = MacroValues[false];
VertexMacros[1].Definition = MacroValues[true];
VertexMacros[2].Definition = MacroValues[false];
return compileShader(DX11N2VertexShader, "main", "vs_5_0", VertexMacros);
}
ComPtr<ID3DBlob> DX11OITShaders::getFinalVertexShaderBlob()
{
return compileShader(OITFinalVertexShaderSource, "main", "vs_5_0", nullptr);
}
void DX11OITShaders::init(const ComPtr<ID3D11Device>& device, pD3DCompile D3DCompile)

View File

@ -35,9 +35,9 @@ public:
const ComPtr<ID3D11PixelShader>& getShader(bool pp_Texture, bool pp_UseAlpha, bool pp_IgnoreTexA, u32 pp_ShadInstr,
bool pp_Offset, u32 pp_FogCtrl, bool pp_BumpMap, bool fog_clamping,
bool palette, bool gouraud, bool alphaTest, bool clipInside, bool nearestWrapFix, bool twoVolumes, Pass pass);
const ComPtr<ID3D11VertexShader>& getVertexShader(bool gouraud);
const ComPtr<ID3D11VertexShader>& getVertexShader(bool gouraud, bool naomi2, bool positionOnly, bool lightOn, bool twoVolumes = true);
const ComPtr<ID3D11PixelShader>& getModVolShader();
const ComPtr<ID3D11VertexShader>& getMVVertexShader();
const ComPtr<ID3D11VertexShader>& getMVVertexShader(bool naomi2);
const ComPtr<ID3D11PixelShader>& getFinalShader();
const ComPtr<ID3D11PixelShader>& getTrModVolShader(int type);
const ComPtr<ID3D11VertexShader>& getFinalVertexShader();
@ -46,10 +46,10 @@ public:
{
saveCache(CacheFile);
shaders.clear();
gouraudVertexShader.reset();
flatVertexShader.reset();
vertexShaders.clear();
modVolShader.reset();
modVolVertexShader.reset();
for (auto& shader : modVolVertexShaders)
shader.reset();
for (auto& shader : trModVolShaders)
shader.reset();
finalShader.reset();
@ -59,6 +59,7 @@ public:
}
ComPtr<ID3DBlob> getVertexShaderBlob();
ComPtr<ID3DBlob> getMVVertexShaderBlob();
ComPtr<ID3DBlob> getFinalVertexShaderBlob();
private:
ComPtr<ID3DBlob> compileShader(const char *source, const char* function, const char* profile, const D3D_SHADER_MACRO *pDefines);
@ -67,10 +68,9 @@ private:
ComPtr<ID3D11Device> device;
std::unordered_map<u32, ComPtr<ID3D11PixelShader>> shaders;
ComPtr<ID3D11VertexShader> gouraudVertexShader;
ComPtr<ID3D11VertexShader> flatVertexShader;
std::unordered_map<u32, ComPtr<ID3D11VertexShader>> vertexShaders;
ComPtr<ID3D11PixelShader> modVolShader;
ComPtr<ID3D11VertexShader> modVolVertexShader;
ComPtr<ID3D11VertexShader> modVolVertexShaders[2];
ComPtr<ID3D11PixelShader> trModVolShaders[4];
ComPtr<ID3D11PixelShader> finalShader;

View File

@ -34,14 +34,14 @@ N2Vertex4Source::N2Vertex4Source(const gl4PipelineShader* shader) : OpenGl4Sourc
{
if (shader == nullptr)
{
addConstant("GEOM_ONLY", 1);
addConstant("POSITION_ONLY", 1);
addConstant("pp_TwoVolumes", 0);
addConstant("pp_Gouraud", 0);
addConstant("pp_Texture", 0);
}
else
{
addConstant("GEOM_ONLY", shader->pass == Pass::Depth); // geometry only for depth pass
addConstant("POSITION_ONLY", shader->pass == Pass::Depth); // geometry only for depth pass
addConstant("pp_TwoVolumes", shader->pp_TwoVolumes || shader->pp_BumpMap);
addConstant("pp_Gouraud", shader->pp_Gouraud);
addConstant("pp_Texture", shader->pp_Texture);

View File

@ -233,7 +233,7 @@ __forceinline
glcache.DepthFunc(Zfunction[gp->isp.DepthMode]);
}
if (SortingEnabled && !config::PerStripSorting)
if (SortingEnabled /* && !config::PerStripSorting */) // Looks glitchy too but less missing graphics (but wrong depth order...)
glcache.DepthMask(GL_FALSE);
else
{

View File

@ -30,7 +30,7 @@ uniform int pp_Number;
// Vertex input
in vec3 in_pos;
#if GEOM_ONLY == 0
#if POSITION_ONLY == 0
in vec4 in_base;
in vec4 in_offs;
in vec2 in_uv;
@ -56,7 +56,7 @@ void wDivide(inout vec4 vpos)
{
vpos = vec4(vpos.xy / vpos.w, 1.0 / vpos.w, 1.0);
vpos = ndcMat * vpos;
#if GEOM_ONLY == 1
#if POSITION_ONLY == 1
vtx_uv = vec3(0.0, 0.0, vpos.z);
#else
#if pp_Gouraud == 1
@ -79,7 +79,7 @@ void wDivide(inout vec4 vpos)
void main()
{
vec4 vpos = mvMat * vec4(in_pos, 1.0);
#if GEOM_ONLY == 0
#if POSITION_ONLY == 0
vtx_base = in_base;
vtx_offs = in_offs;
vec4 vnorm = normalize(normalMat * vec4(in_normal, 0.0));
@ -203,7 +203,7 @@ void computeColors(inout vec4 baseCol, inout vec4 offsetCol, in int volIdx, in v
else
{
lightDir = normalize(light.position.xyz - position);
if (light.attnDistA != 1.0 && light.attnDistB != 0.0)
if (light.attnDistA != 1.0 || light.attnDistB != 0.0)
{
float distance = length(light.position.xyz - position);
if (light.distAttnMode == 0)
@ -359,7 +359,7 @@ void computeBumpMap(inout vec4 color0, in vec4 color1, in vec3 position, in vec3
N2VertexSource::N2VertexSource(bool gouraud, bool geometryOnly, bool texture) : OpenGlSource()
{
addConstant("pp_Gouraud", gouraud);
addConstant("GEOM_ONLY", geometryOnly);
addConstant("POSITION_ONLY", geometryOnly);
addConstant("pp_TwoVolumes", 0);
addConstant("pp_Texture", (int)texture);

View File

@ -192,6 +192,8 @@ void setN2Uniforms(const PolyParam *pp, ShaderType *shader)
}
glUniform1i(shader->useBaseOver, 0);
glUniform1i(shader->lightCount, 0);
glUniform1i(shader->bumpId0, -1);
glUniform1i(shader->bumpId1, -1);
}
}
glUniform1i(shader->bumpMapping, pp->pcw.Texture == 1 && pp->tcw.PixelFmt == PixelBumpMap);

View File

@ -42,10 +42,10 @@ static bool operator<(const PolyParam& left, const PolyParam& right)
return left.zvZ < right.zvZ;
}
static float getProjectedZ(const Vertex *v, const glm::mat4& mat)
static float getProjectedZ(const Vertex *v, const float *mat)
{
// 1 / w
return 1 / mat[0][3] * v->x + mat[1][3] * v->y + mat[2][3] * v->z + mat[3][3];
// -1 / z
return -1 / (mat[2] * v->x + mat[1 * 4 + 2] * v->y + mat[2 * 4 + 2] * v->z + mat[3 * 4 + 2]);
}
void SortPParams(int first, int count)
@ -178,16 +178,12 @@ void GenSorted(int first, int count, std::vector<SortTrigDrawParam>& pidx_sort,
{
const u32 *idx = idx_base + pp->first;
u32 flip = 0;
glm::mat4 mat;
float z0, z1;
float z0 = 0, z1 = 0;
if (pp->isNaomi2())
{
mat = glm::make_mat4(pp->projMatrix);
if (pp->mvMatrix != nullptr)
mat *= glm::make_mat4(pp->mvMatrix);
z0 = getProjectedZ(vtx_base + idx[0], mat);
z1 = getProjectedZ(vtx_base + idx[1], mat);
z0 = getProjectedZ(vtx_base + idx[0], pp->mvMatrix);
z1 = getProjectedZ(vtx_base + idx[1], pp->mvMatrix);
}
for (u32 i = 0; i < pp->count - 2; i++)
{
@ -207,7 +203,7 @@ void GenSorted(int first, int count, std::vector<SortTrigDrawParam>& pidx_sort,
lst[pfsti].pid = ppid;
if (pp->isNaomi2())
{
float z2 = getProjectedZ(v2, mat);
float z2 = getProjectedZ(v2, pp->mvMatrix);
lst[pfsti].z = std::min(z0, std::min(z1, z2));
z0 = z1;
z1 = z2;