OpenGL Renderer: Mitigate some of the performance penalty of using the NDS Style Depth Calculation option.

- GPUs that support the GL_ARB_conservative_depth extension will benefit more from this commit. (OpenGL 3.2 only.)
- Also fix some miscellaneous bugs.
This commit is contained in:
rogerman 2018-12-29 22:37:37 -08:00
parent 0c0bd5144e
commit 4d6a132116
3 changed files with 213 additions and 142 deletions

View File

@ -403,14 +403,25 @@ void main()\n\
gl_FragData[2] = newFogAttributes;\n\ gl_FragData[2] = newFogAttributes;\n\
#endif\n\ #endif\n\
#if USE_NDS_DEPTH_CALCULATION || ENABLE_FOG\n\ #if USE_NDS_DEPTH_CALCULATION || ENABLE_FOG\n\
float depthOffset = (polyDepthOffsetMode == 0) ? 0.0 : ((polyDepthOffsetMode == 1) ? -DEPTH_EQUALS_TEST_TOLERANCE : DEPTH_EQUALS_TEST_TOLERANCE);\n\ // It is tempting to perform the NDS depth calculation in the vertex shader rather than in the fragment shader.\n\
// Resist this temptation! It is much more reliable to do the depth calculation in the fragment shader due to\n\
// subtle interpolation differences between various GPUs and/or drivers. If the depth calculation is not done\n\
// here, then it is very possible for the user to experience Z-fighting in certain rendering situations.\n\
\n\ \n\
#if ENABLE_W_DEPTH\n\ #if NEEDS_DEPTH_EQUALS_TEST\n\
float newFragDepthValue = clamp( ( (vtxPosition.w * 4096.0) + depthOffset ) / 16777215.0, 0.0, 1.0 );\n\ float depthOffset = (polyDepthOffsetMode == 0) ? 0.0 : ((polyDepthOffsetMode == 1) ? -DEPTH_EQUALS_TEST_TOLERANCE : DEPTH_EQUALS_TEST_TOLERANCE);\n\
#if ENABLE_W_DEPTH\n\
float newFragDepthValue = clamp( ( (vtxPosition.w * 4096.0) + depthOffset ) / 16777215.0, 0.0, 1.0 );\n\
#else\n\
float newFragDepthValue = clamp( ( (floor(gl_FragCoord.z * 4194303.0) * 4.0) + depthOffset ) / 16777215.0, 0.0, 1.0 );\n\
#endif\n\
#else\n\ #else\n\
float vertW = (vtxPosition.w == 0.0) ? 0.00000001 : vtxPosition.w;\n\ #if ENABLE_W_DEPTH\n\
// hack: when using z-depth, drop some LSBs so that the overworld map in Dragon Quest IV shows up correctly\n\ float newFragDepthValue = clamp( (vtxPosition.w * 4096.0) / 16777215.0, 0.0, 1.0 );\n\
float newFragDepthValue = clamp( ( (floor(((vtxPosition.z/vertW) * 0.5 + 0.5) * 4194303.0) * 4.0) + depthOffset ) / 16777215.0, 0.0, 1.0 );\n\ #else\n\
// hack: when using z-depth, drop some LSBs so that the overworld map in Dragon Quest IV shows up correctly\n\
float newFragDepthValue = clamp( (floor(gl_FragCoord.z * 4194303.0) * 4.0) / 16777215.0, 0.0, 1.0 );\n\
#endif\n\
#endif\n\ #endif\n\
\n\ \n\
gl_FragDepth = newFragDepthValue;\n\ gl_FragDepth = newFragDepthValue;\n\
@ -1223,6 +1234,7 @@ OpenGLRenderer::OpenGLRenderer()
isMultisampledFBOSupported = false; isMultisampledFBOSupported = false;
isShaderSupported = false; isShaderSupported = false;
isSampleShadingSupported = false; isSampleShadingSupported = false;
isConservativeDepthSupported = false;
isVAOSupported = false; isVAOSupported = false;
willFlipOnlyFramebufferOnGPU = false; willFlipOnlyFramebufferOnGPU = false;
willFlipAndConvertFramebufferOnGPU = false; willFlipAndConvertFramebufferOnGPU = false;
@ -1241,6 +1253,7 @@ OpenGLRenderer::OpenGLRenderer()
_workingTextureUnpackBuffer = (FragmentColor *)malloc_alignedCacheLine(1024 * 1024 * sizeof(FragmentColor)); _workingTextureUnpackBuffer = (FragmentColor *)malloc_alignedCacheLine(1024 * 1024 * sizeof(FragmentColor));
_pixelReadNeedsFinish = false; _pixelReadNeedsFinish = false;
_needsZeroDstAlphaPass = true; _needsZeroDstAlphaPass = true;
_renderNeedsDepthEqualsTest = false;
_currentPolyIndex = 0; _currentPolyIndex = 0;
_lastTextureDrawTarget = OGLTextureUnitID_GColor; _lastTextureDrawTarget = OGLTextureUnitID_GColor;
_geometryProgramFlags.value = 0; _geometryProgramFlags.value = 0;
@ -2110,76 +2123,73 @@ Render3DError OpenGLRenderer::DrawAlphaTexturePolygon(const GLenum polyPrimitive
glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_FALSE); glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_FALSE);
} }
} }
else else if (DRAWMODE != OGLPolyDrawMode_DrawOpaquePolys)
{ {
if (DRAWMODE != OGLPolyDrawMode_DrawOpaquePolys) // Draw the translucent fragments.
glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr);
// Draw the opaque fragments if they might exist.
if (canHaveOpaqueFragments)
{ {
// Draw the translucent fragments. if (DRAWMODE != OGLPolyDrawMode_ZeroAlphaPass)
glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr);
// Draw the opaque fragments if they might exist.
if (canHaveOpaqueFragments)
{ {
if (DRAWMODE != OGLPolyDrawMode_ZeroAlphaPass) glStencilFunc(GL_ALWAYS, opaquePolyID, 0x3F);
{ glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
glStencilFunc(GL_ALWAYS, opaquePolyID, 0x3F); glDepthMask(GL_TRUE);
glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); }
glDepthMask(GL_TRUE);
} glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_TRUE);
glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr);
glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_TRUE); glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_FALSE);
glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr);
glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_FALSE); if (DRAWMODE != OGLPolyDrawMode_ZeroAlphaPass)
{
if (DRAWMODE != OGLPolyDrawMode_ZeroAlphaPass) glStencilFunc(GL_NOTEQUAL, 0x40 | opaquePolyID, 0x7F);
{ glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
glStencilFunc(GL_NOTEQUAL, 0x40 | opaquePolyID, 0x7F); glDepthMask((enableAlphaDepthWrite) ? GL_TRUE : GL_FALSE);
glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
glDepthMask((enableAlphaDepthWrite) ? GL_TRUE : GL_FALSE);
}
} }
} }
else // Draw the polygon as completely opaque. }
else // Draw the polygon as completely opaque.
{
glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_TRUE);
if (this->_emulateDepthLEqualPolygonFacing)
{ {
glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_TRUE); if (isPolyFrontFacing)
if (this->_emulateDepthLEqualPolygonFacing)
{ {
if (isPolyFrontFacing) glDepthFunc(GL_EQUAL);
{ glStencilFunc(GL_EQUAL, 0x40 | opaquePolyID, 0x40);
glDepthFunc(GL_EQUAL); glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr);
glStencilFunc(GL_EQUAL, 0x40 | opaquePolyID, 0x40);
glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr); glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
glDepthMask(GL_FALSE);
glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); glStencilOp(GL_KEEP, GL_KEEP, GL_ZERO);
glDepthMask(GL_FALSE); glStencilMask(0x40);
glStencilOp(GL_KEEP, GL_KEEP, GL_ZERO); glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr);
glStencilMask(0x40);
glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr); glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glDepthMask(GL_TRUE);
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glDepthFunc(GL_LESS);
glDepthMask(GL_TRUE); glStencilFunc(GL_ALWAYS, opaquePolyID, 0x3F);
glDepthFunc(GL_LESS); glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
glStencilFunc(GL_ALWAYS, opaquePolyID, 0x3F); glStencilMask(0xFF);
glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr);
glStencilMask(0xFF);
glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr);
}
else
{
glStencilFunc(GL_ALWAYS, 0x40 | opaquePolyID, 0x40);
glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr);
glStencilFunc(GL_ALWAYS, opaquePolyID, 0x3F);
}
} }
else else
{ {
glStencilFunc(GL_ALWAYS, 0x40 | opaquePolyID, 0x40);
glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr); glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr);
glStencilFunc(GL_ALWAYS, opaquePolyID, 0x3F);
} }
glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_FALSE);
} }
else
{
glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr);
}
glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_FALSE);
} }
} }
else else
@ -3084,11 +3094,11 @@ Render3DError OpenGLRenderer_1_2::CreateGeometryPrograms()
OGLGeometryFlags programFlags; OGLGeometryFlags programFlags;
programFlags.value = 0; programFlags.value = 0;
std::stringstream shaderHeader; std::stringstream fragShaderHeader;
shaderHeader << "#define DEPTH_EQUALS_TEST_TOLERANCE " << DEPTH_EQUALS_TEST_TOLERANCE << ".0 \n"; fragShaderHeader << "#define DEPTH_EQUALS_TEST_TOLERANCE " << DEPTH_EQUALS_TEST_TOLERANCE << ".0 \n";
shaderHeader << "\n"; fragShaderHeader << "\n";
for (size_t flagsValue = 0; flagsValue < 64; flagsValue++, programFlags.value++) for (size_t flagsValue = 0; flagsValue < 128; flagsValue++, programFlags.value++)
{ {
std::stringstream shaderFlags; std::stringstream shaderFlags;
shaderFlags << "#define USE_TEXTURE_SMOOTHING " << ((this->_enableTextureSmoothing) ? 1 : 0) << "\n"; shaderFlags << "#define USE_TEXTURE_SMOOTHING " << ((this->_enableTextureSmoothing) ? 1 : 0) << "\n";
@ -3100,9 +3110,10 @@ Render3DError OpenGLRenderer_1_2::CreateGeometryPrograms()
shaderFlags << "#define ENABLE_FOG " << ((programFlags.EnableFog) ? 1 : 0) << "\n"; shaderFlags << "#define ENABLE_FOG " << ((programFlags.EnableFog) ? 1 : 0) << "\n";
shaderFlags << "#define ENABLE_EDGE_MARK " << ((programFlags.EnableEdgeMark) ? 1 : 0) << "\n"; shaderFlags << "#define ENABLE_EDGE_MARK " << ((programFlags.EnableEdgeMark) ? 1 : 0) << "\n";
shaderFlags << "#define TOON_SHADING_MODE " << ((programFlags.ToonShadingMode) ? 1 : 0) << "\n"; shaderFlags << "#define TOON_SHADING_MODE " << ((programFlags.ToonShadingMode) ? 1 : 0) << "\n";
shaderFlags << "#define NEEDS_DEPTH_EQUALS_TEST " << ((programFlags.NeedsDepthEqualsTest) ? 1 : 0) << "\n";
shaderFlags << "\n"; shaderFlags << "\n";
std::string fragShaderCode = shaderHeader.str() + shaderFlags.str() + std::string(GeometryFragShader_100); std::string fragShaderCode = fragShaderHeader.str() + shaderFlags.str() + std::string(GeometryFragShader_100);
error = this->ShaderProgramCreate(OGLRef.vertexGeometryShaderID, error = this->ShaderProgramCreate(OGLRef.vertexGeometryShaderID,
OGLRef.fragmentGeometryShaderID[flagsValue], OGLRef.fragmentGeometryShaderID[flagsValue],
@ -3170,7 +3181,7 @@ void OpenGLRenderer_1_2::DestroyGeometryPrograms()
OGLRenderRef &OGLRef = *this->ref; OGLRenderRef &OGLRef = *this->ref;
for (size_t flagsValue = 0; flagsValue < 64; flagsValue++) for (size_t flagsValue = 0; flagsValue < 128; flagsValue++)
{ {
if (OGLRef.programGeometryID[flagsValue] == 0) if (OGLRef.programGeometryID[flagsValue] == 0)
{ {
@ -4238,37 +4249,6 @@ Render3DError OpenGLRenderer_1_2::BeginRender(const GFX3D &engine)
return OGLERROR_BEGINGL_FAILED; return OGLERROR_BEGINGL_FAILED;
} }
if (this->isShaderSupported)
{
this->_geometryProgramFlags.EnableWDepth = (engine.renderState.wbuffer) ? 1 : 0;
this->_geometryProgramFlags.EnableAlphaTest = (engine.renderState.enableAlphaTest) ? 1 : 0;
this->_geometryProgramFlags.EnableTextureSampling = (this->_enableTextureSampling) ? 1 : 0;
this->_geometryProgramFlags.EnableFog = (this->_enableFog) ? 1 : 0;
this->_geometryProgramFlags.EnableEdgeMark = (this->_enableEdgeMark) ? 1 : 0;
this->_geometryProgramFlags.ToonShadingMode = (engine.renderState.shading) ? 1 : 0;
glUseProgram(OGLRef.programGeometryID[this->_geometryProgramFlags.value]);
glUniform1i(OGLRef.uniformStateClearPolyID, this->_clearAttributes.opaquePolyID);
glUniform1f(OGLRef.uniformStateClearDepth, (GLfloat)this->_clearAttributes.depth / (GLfloat)0x00FFFFFF);
glUniform1f(OGLRef.uniformStateAlphaTestRef[this->_geometryProgramFlags.value], divide5bitBy31_LUT[engine.renderState.alphaTestRef]);
glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_FALSE);
glUniform1i(OGLRef.uniformPolyDrawShadow[this->_geometryProgramFlags.value], GL_FALSE);
}
else
{
if(engine.renderState.enableAlphaTest && (engine.renderState.alphaTestRef > 0))
{
glAlphaFunc(GL_GEQUAL, divide5bitBy31_LUT[engine.renderState.alphaTestRef]);
}
else
{
glAlphaFunc(GL_GREATER, 0);
}
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
}
GLushort *indexPtr = NULL; GLushort *indexPtr = NULL;
if (this->isVBOSupported) if (this->isVBOSupported)
@ -4286,6 +4266,7 @@ Render3DError OpenGLRenderer_1_2::BeginRender(const GFX3D &engine)
indexPtr = OGLRef.vertIndexBuffer; indexPtr = OGLRef.vertIndexBuffer;
} }
this->_renderNeedsDepthEqualsTest = false;
size_t vertIndexCount = 0; size_t vertIndexCount = 0;
for (size_t i = 0; i < engine.polylist->count; i++) for (size_t i = 0; i < engine.polylist->count; i++)
@ -4361,15 +4342,16 @@ Render3DError OpenGLRenderer_1_2::BeginRender(const GFX3D &engine)
// Get this polygon's facing. // Get this polygon's facing.
const size_t n = polyType - 1; const size_t n = polyType - 1;
float facing = (vert[0].y + vert[n].y) * (vert[0].x - vert[n].x) float facing = (vert[0].y + vert[n].y) * (vert[0].x - vert[n].x) +
+ (vert[1].y + vert[0].y) * (vert[1].x - vert[0].x) (vert[1].y + vert[0].y) * (vert[1].x - vert[0].x) +
+ (vert[2].y + vert[1].y) * (vert[2].x - vert[1].x); (vert[2].y + vert[1].y) * (vert[2].x - vert[1].x);
for (size_t j = 2; j < n; j++) for (size_t j = 2; j < n; j++)
{ {
facing += (vert[j+1].y + vert[j].y) * (vert[j+1].x - vert[j].x); facing += (vert[j+1].y + vert[j].y) * (vert[j+1].x - vert[j].x);
} }
this->_renderNeedsDepthEqualsTest = this->_renderNeedsDepthEqualsTest || (thePoly.attribute.DepthEqualTest_Enable != 0);
this->_isPolyFrontFacing[i] = (facing < 0); this->_isPolyFrontFacing[i] = (facing < 0);
// Get the texture that is to be attached to this polygon. // Get the texture that is to be attached to this polygon.
@ -4382,6 +4364,38 @@ Render3DError OpenGLRenderer_1_2::BeginRender(const GFX3D &engine)
glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, sizeof(VERT) * engine.vertListCount, engine.vertList); glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, sizeof(VERT) * engine.vertListCount, engine.vertList);
} }
if (this->isShaderSupported)
{
this->_geometryProgramFlags.EnableWDepth = (engine.renderState.wbuffer) ? 1 : 0;
this->_geometryProgramFlags.EnableAlphaTest = (engine.renderState.enableAlphaTest) ? 1 : 0;
this->_geometryProgramFlags.EnableTextureSampling = (this->_enableTextureSampling) ? 1 : 0;
this->_geometryProgramFlags.EnableFog = (this->_enableFog) ? 1 : 0;
this->_geometryProgramFlags.EnableEdgeMark = (this->_enableEdgeMark) ? 1 : 0;
this->_geometryProgramFlags.ToonShadingMode = (engine.renderState.shading) ? 1 : 0;
this->_geometryProgramFlags.NeedsDepthEqualsTest = (this->_renderNeedsDepthEqualsTest) ? 1 : 0;
glUseProgram(OGLRef.programGeometryID[this->_geometryProgramFlags.value]);
glUniform1i(OGLRef.uniformStateClearPolyID, this->_clearAttributes.opaquePolyID);
glUniform1f(OGLRef.uniformStateClearDepth, (GLfloat)this->_clearAttributes.depth / (GLfloat)0x00FFFFFF);
glUniform1f(OGLRef.uniformStateAlphaTestRef[this->_geometryProgramFlags.value], divide5bitBy31_LUT[engine.renderState.alphaTestRef]);
glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_FALSE);
glUniform1i(OGLRef.uniformPolyDrawShadow[this->_geometryProgramFlags.value], GL_FALSE);
}
else
{
if(engine.renderState.enableAlphaTest && (engine.renderState.alphaTestRef > 0))
{
glAlphaFunc(GL_GEQUAL, divide5bitBy31_LUT[engine.renderState.alphaTestRef]);
}
else
{
glAlphaFunc(GL_GREATER, 0);
}
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
}
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glDepthMask(GL_TRUE); glDepthMask(GL_TRUE);
@ -5086,7 +5100,7 @@ Render3DError OpenGLRenderer_1_2::DrawShadowPolygon(const GLenum polyPrimitive,
// 1st pass: Create the shadow volume. // 1st pass: Create the shadow volume.
if (opaquePolyID == 0) if (opaquePolyID == 0)
{ {
if (performDepthEqualTest && this->isShaderSupported) if (performDepthEqualTest && this->_emulateNDSDepthCalculation && this->isShaderSupported)
{ {
// Use the stencil buffer to determine which fragments fail the depth test using the lower-side tolerance. // Use the stencil buffer to determine which fragments fail the depth test using the lower-side tolerance.
glUniform1i(OGLRef.uniformPolyDepthOffsetMode[this->_geometryProgramFlags.value], 1); glUniform1i(OGLRef.uniformPolyDepthOffsetMode[this->_geometryProgramFlags.value], 1);
@ -5103,6 +5117,8 @@ Render3DError OpenGLRenderer_1_2::DrawShadowPolygon(const GLenum polyPrimitive,
glStencilOp(GL_KEEP, GL_REPLACE, GL_KEEP); glStencilOp(GL_KEEP, GL_REPLACE, GL_KEEP);
glStencilMask(0x80); glStencilMask(0x80);
glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr); glDrawElements(polyPrimitive, vertIndexCount, GL_UNSIGNED_SHORT, indexBufferPtr);
glUniform1i(OGLRef.uniformPolyDepthOffsetMode[this->_geometryProgramFlags.value], 0);
} }
else else
{ {
@ -5113,7 +5129,7 @@ Render3DError OpenGLRenderer_1_2::DrawShadowPolygon(const GLenum polyPrimitive,
} }
// 2nd pass: Do the polygon ID check. // 2nd pass: Do the polygon ID check.
if (performDepthEqualTest && this->isShaderSupported) if (performDepthEqualTest && this->_emulateNDSDepthCalculation && this->isShaderSupported)
{ {
// Use the stencil buffer to determine which fragments pass the lower-side tolerance. // Use the stencil buffer to determine which fragments pass the lower-side tolerance.
glUniform1i(OGLRef.uniformPolyDepthOffsetMode[this->_geometryProgramFlags.value], 1); glUniform1i(OGLRef.uniformPolyDepthOffsetMode[this->_geometryProgramFlags.value], 1);
@ -5227,6 +5243,7 @@ Render3DError OpenGLRenderer_1_2::Reset()
memset(OGLRef.vertIndexBuffer, 0, OGLRENDER_VERT_INDEX_BUFFER_COUNT * sizeof(GLushort)); memset(OGLRef.vertIndexBuffer, 0, OGLRENDER_VERT_INDEX_BUFFER_COUNT * sizeof(GLushort));
} }
this->_renderNeedsDepthEqualsTest = false;
this->_currentPolyIndex = 0; this->_currentPolyIndex = 0;
OGLRef.vtxPtrPosition = (GLvoid *)offsetof(VERT, coord); OGLRef.vtxPtrPosition = (GLvoid *)offsetof(VERT, coord);
@ -5525,40 +5542,35 @@ Render3DError OpenGLRenderer_2_0::BeginRender(const GFX3D &engine)
return OGLERROR_BEGINGL_FAILED; return OGLERROR_BEGINGL_FAILED;
} }
// Setup render states
this->_geometryProgramFlags.EnableWDepth = (engine.renderState.wbuffer) ? 1 : 0;
this->_geometryProgramFlags.EnableAlphaTest = (engine.renderState.enableAlphaTest) ? 1 : 0;
this->_geometryProgramFlags.EnableTextureSampling = (this->_enableTextureSampling) ? 1 : 0;
this->_geometryProgramFlags.EnableFog = (this->_enableFog) ? 1 : 0;
this->_geometryProgramFlags.EnableEdgeMark = (this->_enableEdgeMark) ? 1 : 0;
this->_geometryProgramFlags.ToonShadingMode = (engine.renderState.shading) ? 1 : 0;
glUseProgram(OGLRef.programGeometryID[this->_geometryProgramFlags.value]);
glUniform1f(OGLRef.uniformStateAlphaTestRef[this->_geometryProgramFlags.value], divide5bitBy31_LUT[engine.renderState.alphaTestRef]);
glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_FALSE);
glUniform1i(OGLRef.uniformPolyDrawShadow[this->_geometryProgramFlags.value], GL_FALSE);
glBindBuffer(GL_ARRAY_BUFFER, OGLRef.vboGeometryVtxID); glBindBuffer(GL_ARRAY_BUFFER, OGLRef.vboGeometryVtxID);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, OGLRef.iboGeometryIndexID); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, OGLRef.iboGeometryIndexID);
this->_renderNeedsDepthEqualsTest = false;
size_t vertIndexCount = 0; size_t vertIndexCount = 0;
GLushort *indexPtr = (GLushort *)glMapBuffer(GL_ELEMENT_ARRAY_BUFFER, GL_WRITE_ONLY); GLushort *indexPtr = (GLushort *)glMapBuffer(GL_ELEMENT_ARRAY_BUFFER, GL_WRITE_ONLY);
for (size_t i = 0; i < engine.polylist->count; i++) for (size_t i = 0; i < engine.polylist->count; i++)
{ {
const POLY *thePoly = &engine.polylist->list[engine.indexlist.list[i]]; const POLY &thePoly = engine.polylist->list[engine.indexlist.list[i]];
const size_t polyType = thePoly->type; const size_t polyType = thePoly.type;
const VERT vert[4] = {
engine.vertList[thePoly.vertIndexes[0]],
engine.vertList[thePoly.vertIndexes[1]],
engine.vertList[thePoly.vertIndexes[2]],
engine.vertList[thePoly.vertIndexes[3]]
};
for (size_t j = 0; j < polyType; j++) for (size_t j = 0; j < polyType; j++)
{ {
const GLushort vertIndex = thePoly->vertIndexes[j]; const GLushort vertIndex = thePoly.vertIndexes[j];
// While we're looping through our vertices, add each vertex index to // While we're looping through our vertices, add each vertex index to
// a buffer. For GFX3D_QUADS and GFX3D_QUAD_STRIP, we also add additional // a buffer. For GFX3D_QUADS and GFX3D_QUAD_STRIP, we also add additional
// vertices here to convert them to GL_TRIANGLES, which are much easier // vertices here to convert them to GL_TRIANGLES, which are much easier
// to work with and won't be deprecated in future OpenGL versions. // to work with and won't be deprecated in future OpenGL versions.
indexPtr[vertIndexCount++] = vertIndex; indexPtr[vertIndexCount++] = vertIndex;
if (thePoly->vtxFormat == GFX3D_QUADS || thePoly->vtxFormat == GFX3D_QUAD_STRIP) if (thePoly.vtxFormat == GFX3D_QUADS || thePoly.vtxFormat == GFX3D_QUAD_STRIP)
{ {
if (j == 2) if (j == 2)
{ {
@ -5566,17 +5578,46 @@ Render3DError OpenGLRenderer_2_0::BeginRender(const GFX3D &engine)
} }
else if (j == 3) else if (j == 3)
{ {
indexPtr[vertIndexCount++] = thePoly->vertIndexes[0]; indexPtr[vertIndexCount++] = thePoly.vertIndexes[0];
} }
} }
} }
this->_textureList[i] = this->GetLoadedTextureFromPolygon(*thePoly, this->_enableTextureSampling); // Get this polygon's facing.
const size_t n = polyType - 1;
float facing = (vert[0].y + vert[n].y) * (vert[0].x - vert[n].x) +
(vert[1].y + vert[0].y) * (vert[1].x - vert[0].x) +
(vert[2].y + vert[1].y) * (vert[2].x - vert[1].x);
for (size_t j = 2; j < n; j++)
{
facing += (vert[j+1].y + vert[j].y) * (vert[j+1].x - vert[j].x);
}
this->_renderNeedsDepthEqualsTest = this->_renderNeedsDepthEqualsTest || (thePoly.attribute.DepthEqualTest_Enable != 0);
this->_isPolyFrontFacing[i] = (facing < 0);
// Get the texture that is to be attached to this polygon.
this->_textureList[i] = this->GetLoadedTextureFromPolygon(thePoly, this->_enableTextureSampling);
} }
glUnmapBuffer(GL_ELEMENT_ARRAY_BUFFER); glUnmapBuffer(GL_ELEMENT_ARRAY_BUFFER);
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(VERT) * engine.vertListCount, engine.vertList); glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(VERT) * engine.vertListCount, engine.vertList);
// Setup render states
this->_geometryProgramFlags.EnableWDepth = (engine.renderState.wbuffer) ? 1 : 0;
this->_geometryProgramFlags.EnableAlphaTest = (engine.renderState.enableAlphaTest) ? 1 : 0;
this->_geometryProgramFlags.EnableTextureSampling = (this->_enableTextureSampling) ? 1 : 0;
this->_geometryProgramFlags.EnableFog = (this->_enableFog) ? 1 : 0;
this->_geometryProgramFlags.EnableEdgeMark = (this->_enableEdgeMark) ? 1 : 0;
this->_geometryProgramFlags.ToonShadingMode = (engine.renderState.shading) ? 1 : 0;
this->_geometryProgramFlags.NeedsDepthEqualsTest = (this->_renderNeedsDepthEqualsTest) ? 1 : 0;
glUseProgram(OGLRef.programGeometryID[this->_geometryProgramFlags.value]);
glUniform1f(OGLRef.uniformStateAlphaTestRef[this->_geometryProgramFlags.value], divide5bitBy31_LUT[engine.renderState.alphaTestRef]);
glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_FALSE);
glUniform1i(OGLRef.uniformPolyDrawShadow[this->_geometryProgramFlags.value], GL_FALSE);
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glDepthMask(GL_TRUE); glDepthMask(GL_TRUE);

View File

@ -414,7 +414,8 @@ union OGLGeometryFlags
u8 EnableFog:1; u8 EnableFog:1;
u8 EnableEdgeMark:1; u8 EnableEdgeMark:1;
u8 ToonShadingMode:1; u8 ToonShadingMode:1;
u8 :3; u8 NeedsDepthEqualsTest:1;
u8 :1;
}; };
}; };
typedef OGLGeometryFlags OGLGeometryFlags; typedef OGLGeometryFlags OGLGeometryFlags;
@ -657,6 +658,7 @@ protected:
bool isShaderSupported; bool isShaderSupported;
bool isVAOSupported; bool isVAOSupported;
bool isSampleShadingSupported; bool isSampleShadingSupported;
bool isConservativeDepthSupported;
bool willFlipOnlyFramebufferOnGPU; bool willFlipOnlyFramebufferOnGPU;
bool willFlipAndConvertFramebufferOnGPU; bool willFlipAndConvertFramebufferOnGPU;
bool willUsePerSampleZeroDstPass; bool willUsePerSampleZeroDstPass;
@ -670,6 +672,7 @@ protected:
FragmentColor *_workingTextureUnpackBuffer; FragmentColor *_workingTextureUnpackBuffer;
bool _pixelReadNeedsFinish; bool _pixelReadNeedsFinish;
bool _needsZeroDstAlphaPass; bool _needsZeroDstAlphaPass;
bool _renderNeedsDepthEqualsTest;
size_t _currentPolyIndex; size_t _currentPolyIndex;
OGLTextureUnitID _lastTextureDrawTarget; OGLTextureUnitID _lastTextureDrawTarget;
OGLGeometryFlags _geometryProgramFlags; OGLGeometryFlags _geometryProgramFlags;

View File

@ -197,6 +197,9 @@ out vec4 outPolyID;\n\
#if ENABLE_FOG\n\ #if ENABLE_FOG\n\
out vec4 outFogAttributes;\n\ out vec4 outFogAttributes;\n\
#endif\n\ #endif\n\
#if IS_CONSERVATIVE_DEPTH_SUPPORTED && (USE_NDS_DEPTH_CALCULATION || ENABLE_FOG) && !NEEDS_DEPTH_EQUALS_TEST && !ENABLE_W_DEPTH\n\
layout (depth_less) out float gl_FragDepth;\n\
#endif\n\
\n\ \n\
void main()\n\ void main()\n\
{\n\ {\n\
@ -287,14 +290,25 @@ void main()\n\
outFogAttributes = newFogAttributes;\n\ outFogAttributes = newFogAttributes;\n\
#endif\n\ #endif\n\
#if USE_NDS_DEPTH_CALCULATION || ENABLE_FOG\n\ #if USE_NDS_DEPTH_CALCULATION || ENABLE_FOG\n\
float depthOffset = (polyDepthOffsetMode == 0) ? 0.0 : ((polyDepthOffsetMode == 1) ? -DEPTH_EQUALS_TEST_TOLERANCE : DEPTH_EQUALS_TEST_TOLERANCE);\n\ // It is tempting to perform the NDS depth calculation in the vertex shader rather than in the fragment shader.\n\
// Resist this temptation! It is much more reliable to do the depth calculation in the fragment shader due to\n\
// subtle interpolation differences between various GPUs and/or drivers. If the depth calculation is not done\n\
// here, then it is very possible for the user to experience Z-fighting in certain rendering situations.\n\
\n\ \n\
#if ENABLE_W_DEPTH\n\ #if NEEDS_DEPTH_EQUALS_TEST\n\
float newFragDepthValue = clamp( ( (vtxPosition.w * 4096.0) + depthOffset ) / 16777215.0, 0.0, 1.0 );\n\ float depthOffset = (polyDepthOffsetMode == 0) ? 0.0 : ((polyDepthOffsetMode == 1) ? -DEPTH_EQUALS_TEST_TOLERANCE : DEPTH_EQUALS_TEST_TOLERANCE);\n\
#if ENABLE_W_DEPTH\n\
float newFragDepthValue = clamp( ( (vtxPosition.w * 4096.0) + depthOffset ) / 16777215.0, 0.0, 1.0 );\n\
#else\n\
float newFragDepthValue = clamp( ( (floor(gl_FragCoord.z * 4194303.0) * 4.0) + depthOffset ) / 16777215.0, 0.0, 1.0 );\n\
#endif\n\
#else\n\ #else\n\
float vertW = (vtxPosition.w == 0.0) ? 0.00000001 : vtxPosition.w;\n\ #if ENABLE_W_DEPTH\n\
// hack: when using z-depth, drop some LSBs so that the overworld map in Dragon Quest IV shows up correctly\n\ float newFragDepthValue = clamp( (vtxPosition.w * 4096.0) / 16777215.0, 0.0, 1.0 );\n\
float newFragDepthValue = clamp( ( (floor(((vtxPosition.z/vertW) * 0.5 + 0.5) * 4194303.0) * 4.0) + depthOffset ) / 16777215.0, 0.0, 1.0 );\n\ #else\n\
// hack: when using z-depth, drop some LSBs so that the overworld map in Dragon Quest IV shows up correctly\n\
float newFragDepthValue = clamp( (floor(gl_FragCoord.z * 4194303.0) * 4.0) / 16777215.0, 0.0, 1.0 );\n\
#endif\n\
#endif\n\ #endif\n\
\n\ \n\
gl_FragDepth = newFragDepthValue;\n\ gl_FragDepth = newFragDepthValue;\n\
@ -813,6 +827,7 @@ Render3DError OpenGLRenderer_3_2::InitExtensions()
this->willFlipAndConvertFramebufferOnGPU = true; this->willFlipAndConvertFramebufferOnGPU = true;
this->isSampleShadingSupported = this->IsExtensionPresent(&oglExtensionSet, "GL_ARB_sample_shading"); this->isSampleShadingSupported = this->IsExtensionPresent(&oglExtensionSet, "GL_ARB_sample_shading");
this->isConservativeDepthSupported = this->IsExtensionPresent(&oglExtensionSet, "GL_ARB_conservative_depth");
this->_enableTextureSmoothing = CommonSettings.GFX3D_Renderer_TextureSmoothing; this->_enableTextureSmoothing = CommonSettings.GFX3D_Renderer_TextureSmoothing;
this->_emulateShadowPolygon = CommonSettings.OpenGL_Emulation_ShadowPolygon; this->_emulateShadowPolygon = CommonSettings.OpenGL_Emulation_ShadowPolygon;
@ -1296,14 +1311,21 @@ Render3DError OpenGLRenderer_3_2::CreateGeometryPrograms()
OGLGeometryFlags programFlags; OGLGeometryFlags programFlags;
programFlags.value = 0; programFlags.value = 0;
std::stringstream shaderHeader; std::stringstream vtxShaderHeader;
shaderHeader << "#version 150\n"; vtxShaderHeader << "#version 150\n";
shaderHeader << "#define DEPTH_EQUALS_TEST_TOLERANCE " << DEPTH_EQUALS_TEST_TOLERANCE << ".0\n"; vtxShaderHeader << "\n";
shaderHeader << "\n";
std::string vtxShaderCode = shaderHeader.str() + std::string(GeometryVtxShader_150); std::string vtxShaderCode = vtxShaderHeader.str() + std::string(GeometryVtxShader_150);
for (size_t flagsValue = 0; flagsValue < 64; flagsValue++, programFlags.value++) std::stringstream fragShaderHeader;
fragShaderHeader << "#version 150\n";
if (this->isConservativeDepthSupported) fragShaderHeader << "#extension GL_ARB_conservative_depth : require\n";
fragShaderHeader << "\n";
fragShaderHeader << "#define IS_CONSERVATIVE_DEPTH_SUPPORTED " << ((this->isConservativeDepthSupported) ? 1 : 0) << "\n";
fragShaderHeader << "#define DEPTH_EQUALS_TEST_TOLERANCE " << DEPTH_EQUALS_TEST_TOLERANCE << ".0\n";
fragShaderHeader << "\n";
for (size_t flagsValue = 0; flagsValue < 128; flagsValue++, programFlags.value++)
{ {
std::stringstream shaderFlags; std::stringstream shaderFlags;
shaderFlags << "#define USE_TEXTURE_SMOOTHING " << ((this->_enableTextureSmoothing) ? 1 : 0) << "\n"; shaderFlags << "#define USE_TEXTURE_SMOOTHING " << ((this->_enableTextureSmoothing) ? 1 : 0) << "\n";
@ -1315,9 +1337,10 @@ Render3DError OpenGLRenderer_3_2::CreateGeometryPrograms()
shaderFlags << "#define ENABLE_FOG " << ((programFlags.EnableFog) ? 1 : 0) << "\n"; shaderFlags << "#define ENABLE_FOG " << ((programFlags.EnableFog) ? 1 : 0) << "\n";
shaderFlags << "#define ENABLE_EDGE_MARK " << ((programFlags.EnableEdgeMark) ? 1 : 0) << "\n"; shaderFlags << "#define ENABLE_EDGE_MARK " << ((programFlags.EnableEdgeMark) ? 1 : 0) << "\n";
shaderFlags << "#define TOON_SHADING_MODE " << ((programFlags.ToonShadingMode) ? 1 : 0) << "\n"; shaderFlags << "#define TOON_SHADING_MODE " << ((programFlags.ToonShadingMode) ? 1 : 0) << "\n";
shaderFlags << "#define NEEDS_DEPTH_EQUALS_TEST " << ((programFlags.NeedsDepthEqualsTest) ? 1 : 0) << "\n";
shaderFlags << "\n"; shaderFlags << "\n";
std::string fragShaderCode = shaderHeader.str() + shaderFlags.str() + std::string(GeometryFragShader_150); std::string fragShaderCode = fragShaderHeader.str() + shaderFlags.str() + std::string(GeometryFragShader_150);
error = this->ShaderProgramCreate(OGLRef.vertexGeometryShaderID, error = this->ShaderProgramCreate(OGLRef.vertexGeometryShaderID,
OGLRef.fragmentGeometryShaderID[flagsValue], OGLRef.fragmentGeometryShaderID[flagsValue],
@ -1421,7 +1444,7 @@ void OpenGLRenderer_3_2::DestroyGeometryPrograms()
OGLRef.uboRenderStatesID = 0; OGLRef.uboRenderStatesID = 0;
OGLRef.tboPolyStatesID = 0; OGLRef.tboPolyStatesID = 0;
for (size_t flagsValue = 0; flagsValue < 64; flagsValue++) for (size_t flagsValue = 0; flagsValue < 128; flagsValue++)
{ {
if (OGLRef.programGeometryID[flagsValue] == 0) if (OGLRef.programGeometryID[flagsValue] == 0)
{ {
@ -2146,6 +2169,8 @@ Render3DError OpenGLRenderer_3_2::BeginRender(const GFX3D &engine)
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, OGLRef.iboGeometryIndexID); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, OGLRef.iboGeometryIndexID);
glBindBuffer(GL_TEXTURE_BUFFER, OGLRef.tboPolyStatesID); glBindBuffer(GL_TEXTURE_BUFFER, OGLRef.tboPolyStatesID);
this->_renderNeedsDepthEqualsTest = false;
size_t vertIndexCount = 0; size_t vertIndexCount = 0;
GLushort *indexPtr = (GLushort *)glMapBufferRange(GL_ELEMENT_ARRAY_BUFFER, 0, engine.polylist->count * 6 * sizeof(GLushort), GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT); GLushort *indexPtr = (GLushort *)glMapBufferRange(GL_ELEMENT_ARRAY_BUFFER, 0, engine.polylist->count * 6 * sizeof(GLushort), GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
OGLPolyStates *polyStates = (OGLPolyStates *)glMapBufferRange(GL_TEXTURE_BUFFER, 0, engine.polylist->count * sizeof(OGLPolyStates), GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT); OGLPolyStates *polyStates = (OGLPolyStates *)glMapBufferRange(GL_TEXTURE_BUFFER, 0, engine.polylist->count * sizeof(OGLPolyStates), GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
@ -2185,15 +2210,16 @@ Render3DError OpenGLRenderer_3_2::BeginRender(const GFX3D &engine)
// Get the polygon's facing. // Get the polygon's facing.
const size_t n = polyType - 1; const size_t n = polyType - 1;
float facing = (vert[0].y + vert[n].y) * (vert[0].x - vert[n].x) float facing = (vert[0].y + vert[n].y) * (vert[0].x - vert[n].x) +
+ (vert[1].y + vert[0].y) * (vert[1].x - vert[0].x) (vert[1].y + vert[0].y) * (vert[1].x - vert[0].x) +
+ (vert[2].y + vert[1].y) * (vert[2].x - vert[1].x); (vert[2].y + vert[1].y) * (vert[2].x - vert[1].x);
for (size_t j = 2; j < n; j++) for (size_t j = 2; j < n; j++)
{ {
facing += (vert[j+1].y + vert[j].y) * (vert[j+1].x - vert[j].x); facing += (vert[j+1].y + vert[j].y) * (vert[j+1].x - vert[j].x);
} }
this->_renderNeedsDepthEqualsTest = this->_renderNeedsDepthEqualsTest || (thePoly.attribute.DepthEqualTest_Enable != 0);
this->_isPolyFrontFacing[i] = (facing < 0); this->_isPolyFrontFacing[i] = (facing < 0);
// Get the texture that is to be attached to this polygon. // Get the texture that is to be attached to this polygon.
@ -2227,6 +2253,7 @@ Render3DError OpenGLRenderer_3_2::BeginRender(const GFX3D &engine)
this->_geometryProgramFlags.EnableFog = (this->_enableFog) ? 1 : 0; this->_geometryProgramFlags.EnableFog = (this->_enableFog) ? 1 : 0;
this->_geometryProgramFlags.EnableEdgeMark = (this->_enableEdgeMark) ? 1 : 0; this->_geometryProgramFlags.EnableEdgeMark = (this->_enableEdgeMark) ? 1 : 0;
this->_geometryProgramFlags.ToonShadingMode = (engine.renderState.shading) ? 1 : 0; this->_geometryProgramFlags.ToonShadingMode = (engine.renderState.shading) ? 1 : 0;
this->_geometryProgramFlags.NeedsDepthEqualsTest = (this->_renderNeedsDepthEqualsTest) ? 1 : 0;
glUseProgram(OGLRef.programGeometryID[this->_geometryProgramFlags.value]); glUseProgram(OGLRef.programGeometryID[this->_geometryProgramFlags.value]);
glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_FALSE); glUniform1i(OGLRef.uniformTexDrawOpaque[this->_geometryProgramFlags.value], GL_FALSE);