Merge pull request #1514 from PCSX2/gsdx-nouveau-driver

Gsdx alpha test improvement
This commit is contained in:
Gregory Hainaut 2016-08-14 20:29:09 +02:00 committed by GitHub
commit 447e0d0336
6 changed files with 209 additions and 124 deletions

View File

@ -1029,7 +1029,6 @@ void GSDeviceOGL::SelfShaderTest()
for (int fmt = 0; fmt < 3; fmt++) {
for (int i = 0; i < 3; i++) {
PSSelector sel;
sel.atst = 1;
sel.tfx = 4;
int ib = (i + 1) % 3;
@ -1049,7 +1048,7 @@ void GSDeviceOGL::SelfShaderTest()
SelfShaderTestPrint(test, nb_shader);
test = "Alpha_Test";
for (int atst = 0; atst < 8; atst++) {
for (int atst = 0; atst < 5; atst++) {
PSSelector sel;
sel.tfx = 4;
@ -1063,7 +1062,6 @@ void GSDeviceOGL::SelfShaderTest()
for (int read_ba = 0; read_ba < 2; read_ba++) {
PSSelector sel;
sel.tfx = 4;
sel.atst = 1;
sel.fog = 1;
sel.fbmask = 1;
@ -1079,7 +1077,6 @@ void GSDeviceOGL::SelfShaderTest()
for (int date = 1; date < 7; date++) {
PSSelector sel;
sel.tfx = 4;
sel.atst = 1;
sel.date = date;
std::string file = format("Shader_Date_%d.glsl.asm", date);
@ -1091,7 +1088,6 @@ void GSDeviceOGL::SelfShaderTest()
for (int fmt = 0; fmt < 3; fmt++) {
PSSelector sel;
sel.tfx = 4;
sel.atst = 1;
sel.fba = 1;
sel.dfmt = fmt;
@ -1105,7 +1101,6 @@ void GSDeviceOGL::SelfShaderTest()
{
PSSelector sel;
sel.tfx = 1;
sel.atst = 1;
sel.fst = 0;
sel.iip = 1;
@ -1121,7 +1116,6 @@ void GSDeviceOGL::SelfShaderTest()
for (int tfx = 0; tfx < 5; tfx++) {
for (int tcc = 0; tcc < 2; tcc++) {
PSSelector sel;
sel.atst = 1;
sel.fst = 1;
sel.channel = channel;
@ -1144,7 +1138,6 @@ void GSDeviceOGL::SelfShaderTest()
for (int wms = 1; wms < 4; wms++) {
for (int wmt = 1; wmt < 4; wmt++) {
PSSelector sel;
sel.atst = 1;
sel.tfx = 1;
sel.tcc = 1;
sel.fst = 1;

View File

@ -168,6 +168,99 @@ void GSRendererOGL::SetupIA()
dev->IASetPrimitiveTopology(t);
}
void GSRendererOGL::EmulateAtst(const int pass, const GSTextureCache::Source* tex)
{
static const uint32 inverted_atst[] = {ATST_ALWAYS, ATST_NEVER, ATST_GEQUAL, ATST_GREATER, ATST_NOTEQUAL, ATST_LESS, ATST_LEQUAL, ATST_EQUAL};
int atst = (pass == 2) ? inverted_atst[m_context->TEST.ATST] : m_context->TEST.ATST;
if (!m_context->TEST.ATE) return;
switch (atst) {
case ATST_LESS:
if (tex && tex->m_spritehack_t) {
m_ps_sel.atst = 0;
} else {
ps_cb.FogColor_AREF.a = (float)m_context->TEST.AREF - 0.1f;
m_ps_sel.atst = 1;
}
break;
case ATST_LEQUAL:
ps_cb.FogColor_AREF.a = (float)m_context->TEST.AREF - 0.1f + 1.0f;
m_ps_sel.atst = 1;
break;
case ATST_GEQUAL:
// Maybe a -1 trick multiplication factor could be used to merge with ATST_LEQUAL case
ps_cb.FogColor_AREF.a = (float)m_context->TEST.AREF - 0.1f;
m_ps_sel.atst = 2;
break;
case ATST_GREATER:
// Maybe a -1 trick multiplication factor could be used to merge with ATST_LESS case
ps_cb.FogColor_AREF.a = (float)m_context->TEST.AREF - 0.1f + 1.0f;
m_ps_sel.atst = 2;
break;
case ATST_EQUAL:
ps_cb.FogColor_AREF.a = (float)m_context->TEST.AREF;
m_ps_sel.atst = 3;
break;
case ATST_NOTEQUAL:
ps_cb.FogColor_AREF.a = (float)m_context->TEST.AREF;
m_ps_sel.atst = 4;
break;
case ATST_NEVER: // Draw won't be done so no need to implement it in shader
case ATST_ALWAYS:
default:
m_ps_sel.atst = 0;
break;
}
}
void GSRendererOGL::EmulateZbuffer()
{
if (m_context->TEST.ZTE) {
m_om_dssel.ztst = m_context->TEST.ZTST;
m_om_dssel.zwe = !m_context->ZBUF.ZMSK;
} else {
m_om_dssel.ztst = ZTST_ALWAYS;
}
uint32 max_z;
uint32 max_z_flt;
if (m_context->ZBUF.PSM == PSM_PSMZ32) {
max_z = 0xFFFFFFFF;
max_z_flt = 0xFFFFFFFF;
} else if (m_context->ZBUF.PSM == PSM_PSMZ24) {
// Float mantissa is only 23 bits so the max 24 bits was rounded down
max_z = 0xFFFFFF;
max_z_flt = 0xFFFFFE;
} else {
max_z = 0xFFFF;
max_z_flt = 0xFFFF;
}
// The real GS appears to do no masking based on the Z buffer format and writing larger Z values
// than the buffer supports seems to be an error condition on the real GS, causing it to crash.
// We are probably receiving bad coordinates from VU1 in these cases.
vs_cb.DepthMask = GSVector2i(max_z, max_z);
if (m_om_dssel.ztst >= ZTST_ALWAYS && m_om_dssel.zwe && (m_context->ZBUF.PSM != PSM_PSMZ32)) {
if (m_vt.m_max.p.z > max_z) {
ASSERT(m_vt.m_min.p.z > max_z); // sfex capcom logo
// Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended.
if (m_vt.m_min.p.z > max_z) {
GL_INS("Bad Z size on %s buffers", psm_str(m_context->ZBUF.PSM));
m_om_dssel.ztst = ZTST_ALWAYS;
}
}
}
// Minor optimization of a corner case (it allow to better emulate some alpha test effects)
if (m_om_dssel.ztst == ZTST_GEQUAL && m_vt.m_min.p.z >= max_z_flt) {
GL_INS("Optimize Z test GEQUAL to ALWAYS (%s)", psm_str(m_context->ZBUF.PSM));
m_om_dssel.ztst = ZTST_ALWAYS;
}
}
void GSRendererOGL::EmulateTextureShuffleAndFbmask()
{
if (m_texture_shuffle) {
@ -757,10 +850,6 @@ void GSRendererOGL::EmulateTextureSampler(const GSTextureCache::Source* tex)
// Setup Texture ressources
dev->SetupSampler(m_ps_ssel);
dev->PSSetShaderResources(tex->m_texture, tex->m_palette);
if (tex->m_spritehack_t && (m_ps_sel.atst == 2)) {
m_ps_sel.atst = 1;
}
}
GSRendererOGL::PRIM_OVERLAP GSRendererOGL::PrimitiveOverlap()
@ -962,6 +1051,9 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
bool DATE_GL45 = false;
bool DATE_one = false;
bool ate_first_pass = m_context->TEST.DoFirstPass();
bool ate_second_pass = m_context->TEST.DoSecondPass();
ResetStates();
ASSERT(m_dev != NULL);
@ -1137,55 +1229,10 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
// om
if (m_context->TEST.ZTE)
{
m_om_dssel.ztst = m_context->TEST.ZTST;
m_om_dssel.zwe = !m_context->ZBUF.ZMSK;
}
else
{
m_om_dssel.ztst = ZTST_ALWAYS;
}
EmulateZbuffer(); // will update VS depth mask
// vs
// The real GS appears to do no masking based on the Z buffer format and writing larger Z values
// than the buffer supports seems to be an error condition on the real GS, causing it to crash.
// We are probably receiving bad coordinates from VU1 in these cases.
vs_cb.DepthMask = GSVector2i(0xFFFFFFFF, 0xFFFFFFFF);
if (m_om_dssel.ztst >= ZTST_ALWAYS && m_om_dssel.zwe)
{
if (m_context->ZBUF.PSM == PSM_PSMZ24)
{
if (m_vt.m_max.p.z > 0xffffff)
{
ASSERT(m_vt.m_min.p.z > 0xffffff);
// Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended.
if (m_vt.m_min.p.z > 0xffffff)
{
GL_INS("Bad Z size on 24 bits buffers")
vs_cb.DepthMask = GSVector2i(0x00FFFFFF, 0x00FFFFFF);
m_om_dssel.ztst = ZTST_ALWAYS;
}
}
}
else if (m_context->ZBUF.PSM == PSM_PSMZ16 || m_context->ZBUF.PSM == PSM_PSMZ16S)
{
if (m_vt.m_max.p.z > 0xffff)
{
ASSERT(m_vt.m_min.p.z > 0xffff); // sfex capcom logo
// Fixme : Same as above, I guess.
if (m_vt.m_min.p.z > 0xffff)
{
GL_INS("Bad Z size on 16 bits buffers")
vs_cb.DepthMask = GSVector2i(0x0000FFFF, 0x0000FFFF);
m_om_dssel.ztst = ZTST_ALWAYS;
}
}
}
}
// FIXME Opengl support half pixel center (as dx10). Code could be easier!!!
float sx = 2.0f * rtscale.x / (rtsize.x << 4);
float sy = 2.0f * rtscale.y / (rtsize.y << 4);
@ -1242,21 +1289,25 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
#endif
}
if (m_context->TEST.ATE)
m_ps_sel.atst = m_context->TEST.ATST;
else
m_ps_sel.atst = ATST_ALWAYS;
if (m_context->TEST.ATE && m_context->TEST.ATST > 1)
ps_cb.FogColor_AREF.a = (float)m_context->TEST.AREF;
// By default don't use texture
m_ps_sel.tfx = 4;
int atst = m_ps_sel.atst;
// Warning must be done after EmulateZbuffer
// Depth test is always true so it can be executed in 2 passes (no order required) unlike color.
// The idea is to compute first the color which is independent of the alpha test. And then do a 2nd
// pass to handle the depth based on the alpha test.
bool ate_all_color_then_depth = ate_first_pass & ate_second_pass & (m_context->TEST.AFAIL != AFAIL_ZB_ONLY) & (m_om_dssel.ztst == ZTST_ALWAYS);
if (ate_all_color_then_depth) {
// Render all color but don't update depth
// ATE is disabled here
m_om_dssel.zwe = false;
} else {
EmulateAtst(1, tex);
}
if (tex) {
EmulateTextureSampler(tex);
} else {
m_ps_sel.tfx = 4;
}
// Always bind the RT. This way special effect can use it.
dev->PSSetShaderResource(3, rt);
@ -1328,22 +1379,28 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
dev->OMSetRenderTargets(rt, ds, &scissor);
}
if (m_context->TEST.DoFirstPass())
if (ate_first_pass)
{
SendDraw();
}
if (m_context->TEST.DoSecondPass())
if (ate_second_pass)
{
ASSERT(!m_env.PABE.PABE);
static const uint32 iatst[] = {1, 0, 5, 6, 7, 2, 3, 4};
m_ps_sel.atst = iatst[atst];
if (tex && tex->m_spritehack_t && (m_ps_sel.atst == 2)) {
m_ps_sel.atst = 1;
if (ate_all_color_then_depth) {
// Enable ATE as first pass to update the depth
// of pixels that passed the alpha test
EmulateAtst(1, tex);
} else {
// second pass will process the pixels that failed
// the alpha test
EmulateAtst(2, tex);
}
// Potentially AREF was updated (hope perf impact will be limited)
dev->SetupCB(&vs_cb, &ps_cb);
dev->SetupPipeline(m_vs_sel, m_gs_sel, m_ps_sel);
bool z = m_om_dssel.zwe;
@ -1361,6 +1418,11 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
default: __assume(0);
}
if (ate_all_color_then_depth) {
z = true;
r = g = b = a = false;
}
if (z || r || g || b || a)
{
m_om_dssel.zwe = z;

View File

@ -79,6 +79,8 @@ class GSRendererOGL final : public GSRendererHW
inline void EmulateChannelShuffle(GSTexture** rt, const GSTextureCache::Source* tex);
inline void EmulateBlending(bool DATE_GL42);
inline void EmulateTextureSampler(const GSTextureCache::Source* tex);
inline void EmulateAtst(const int pass, const GSTextureCache::Source* tex);
inline void EmulateZbuffer();
public:
GSRendererOGL();

View File

@ -72,7 +72,8 @@ layout(std140, binding = 21) uniform cb21
vec4 WH;
vec2 TA;
float _pad0;
//float _pad0;
int Uber_ATST;
float Af;
uvec4 MskFix;

View File

@ -478,32 +478,45 @@ vec4 tfx(vec4 T, vec4 C)
void atst(vec4 C)
{
// FIXME use integer cmp
float a = C.a;
#if (PS_ATST == 0) // never
discard;
#elif (PS_ATST == 1) // always
// nothing to do
#elif (PS_ATST == 2) // l
if ((AREF - a - 0.5f) < 0.0f)
discard;
#elif (PS_ATST == 3 ) // le
if ((AREF - a + 0.5f) < 0.0f)
discard;
#elif (PS_ATST == 4) // e
if ((0.5f - abs(a - AREF)) < 0.0f)
discard;
#elif (PS_ATST == 5) // ge
if ((a-AREF + 0.5f) < 0.0f)
discard;
#elif (PS_ATST == 6) // g
if ((a-AREF - 0.5f) < 0.0f)
discard;
#elif (PS_ATST == 7) // ne
if ((abs(a - AREF) - 0.5f) < 0.0f)
discard;
#if 0
switch(Uber_ATST) {
case 0:
break;
case 1:
if (a > AREF) discard;
break;
case 2:
if (a < AREF) discard;
break;
case 3:
if (abs(a - AREF) > 0.5f) discard;
break;
case 4:
if (abs(a - AREF) < 0.5f) discard;
break;
}
#endif
#if 1
#if (PS_ATST == 0)
// nothing to do
#elif (PS_ATST == 1)
if (a > AREF) discard;
#elif (PS_ATST == 2)
if (a < AREF) discard;
#elif (PS_ATST == 3)
if (abs(a - AREF) > 0.5f) discard;
#elif (PS_ATST == 4)
if (abs(a - AREF) < 0.5f) discard;
#endif
#endif
}
void fog(inout vec4 C, float f)

View File

@ -97,7 +97,8 @@ static const char* const common_header_glsl =
" vec4 WH;\n"
"\n"
" vec2 TA;\n"
" float _pad0;\n"
" //float _pad0;\n"
" int Uber_ATST;\n"
" float Af;\n"
"\n"
" uvec4 MskFix;\n"
@ -1321,32 +1322,45 @@ static const char* const tfx_fs_all_glsl =
"\n"
"void atst(vec4 C)\n"
"{\n"
" // FIXME use integer cmp\n"
" float a = C.a;\n"
"\n"
"#if (PS_ATST == 0) // never\n"
" discard;\n"
"#elif (PS_ATST == 1) // always\n"
" // nothing to do\n"
"#elif (PS_ATST == 2) // l\n"
" if ((AREF - a - 0.5f) < 0.0f)\n"
" discard;\n"
"#elif (PS_ATST == 3 ) // le\n"
" if ((AREF - a + 0.5f) < 0.0f)\n"
" discard;\n"
"#elif (PS_ATST == 4) // e\n"
" if ((0.5f - abs(a - AREF)) < 0.0f)\n"
" discard;\n"
"#elif (PS_ATST == 5) // ge\n"
" if ((a-AREF + 0.5f) < 0.0f)\n"
" discard;\n"
"#elif (PS_ATST == 6) // g\n"
" if ((a-AREF - 0.5f) < 0.0f)\n"
" discard;\n"
"#elif (PS_ATST == 7) // ne\n"
" if ((abs(a - AREF) - 0.5f) < 0.0f)\n"
" discard;\n"
"#if 0\n"
" switch(Uber_ATST) {\n"
" case 0:\n"
" break;\n"
" case 1:\n"
" if (a > AREF) discard;\n"
" break;\n"
" case 2:\n"
" if (a < AREF) discard;\n"
" break;\n"
" case 3:\n"
" if (abs(a - AREF) > 0.5f) discard;\n"
" break;\n"
" case 4:\n"
" if (abs(a - AREF) < 0.5f) discard;\n"
" break;\n"
" }\n"
"\n"
"\n"
"#endif\n"
"\n"
"#if 1\n"
"\n"
"#if (PS_ATST == 0)\n"
" // nothing to do\n"
"#elif (PS_ATST == 1)\n"
" if (a > AREF) discard;\n"
"#elif (PS_ATST == 2)\n"
" if (a < AREF) discard;\n"
"#elif (PS_ATST == 3)\n"
" if (abs(a - AREF) > 0.5f) discard;\n"
"#elif (PS_ATST == 4)\n"
" if (abs(a - AREF) < 0.5f) discard;\n"
"#endif\n"
"\n"
"#endif\n"
"\n"
"}\n"
"\n"
"void fog(inout vec4 C, float f)\n"