diff --git a/desmume/src/OGLRender.cpp b/desmume/src/OGLRender.cpp index 9a6cd1d96..5d36c6be1 100755 --- a/desmume/src/OGLRender.cpp +++ b/desmume/src/OGLRender.cpp @@ -1264,12 +1264,12 @@ OpenGLRenderer::OpenGLRenderer() OpenGLRenderer::~OpenGLRenderer() { - free_aligned(_framebufferColor); - free_aligned(_workingTextureUnpackBuffer); + free_aligned(this->_framebufferColor); + free_aligned(this->_workingTextureUnpackBuffer); // Destroy OpenGL rendering states - delete ref; - ref = NULL; + delete this->ref; + this->ref = NULL; } bool OpenGLRenderer::IsExtensionPresent(const std::set *oglExtensionSet, const std::string extensionName) const @@ -1833,9 +1833,9 @@ size_t OpenGLRenderer::DrawPolygonsForIndexRange(const POLYLIST *polyList, const { OGLRenderRef &OGLRef = *this->ref; - if (lastIndex > (polyList->count - 1)) + if (lastIndex > (this->_clippedPolyCount - 1)) { - lastIndex = polyList->count - 1; + lastIndex = this->_clippedPolyCount - 1; } if (firstIndex > lastIndex) @@ -1860,7 +1860,7 @@ size_t OpenGLRenderer::DrawPolygonsForIndexRange(const POLYLIST *polyList, const }; // Set up the initial polygon - const POLY &initialPoly = polyList->list[indexList->list[firstIndex]]; + const POLY &initialPoly = *this->_clipper.GetClippedPolyByIndex(firstIndex).poly; TEXIMAGE_PARAM lastTexParams = initialPoly.texParam; u32 lastTexPalette = initialPoly.texPalette; u32 lastViewport = initialPoly.viewport; @@ -1874,7 +1874,7 @@ size_t OpenGLRenderer::DrawPolygonsForIndexRange(const POLYLIST *polyList, const for (size_t i = firstIndex; i <= lastIndex; i++) { - const POLY &thePoly = polyList->list[indexList->list[i]]; + const POLY &thePoly = *this->_clipper.GetClippedPolyByIndex(i).poly; // Set up the polygon if it changed if (lastPolyAttr.value != thePoly.attribute.value) @@ -1914,7 +1914,7 @@ size_t OpenGLRenderer::DrawPolygonsForIndexRange(const POLYLIST *polyList, const // the same and we're not drawing a line loop or line strip. if (i+1 <= lastIndex) { - const POLY &nextPoly = polyList->list[indexList->list[i+1]]; + const POLY &nextPoly = *this->_clipper.GetClippedPolyByIndex(i+1).poly; if (lastPolyAttr.value == nextPoly.attribute.value && lastTexParams.value == nextPoly.texParam.value && @@ -4051,7 +4051,7 @@ Render3DError OpenGLRenderer_1_2::ZeroDstAlphaPass(const POLYLIST *polyList, con glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_FALSE); glStencilFunc(GL_NOTEQUAL, 0x40, 0x40); - this->DrawPolygonsForIndexRange(polyList, indexList, polyList->opaqueCount, polyList->count - 1, indexOffset, lastPolyAttr); + this->DrawPolygonsForIndexRange(polyList, indexList, this->_clippedPolyOpaqueCount, this->_clippedPolyCount - 1, indexOffset, lastPolyAttr); // Restore OpenGL states back to normal. this->_geometryProgramFlags = oldGProgramFlags; @@ -4255,10 +4255,14 @@ Render3DError OpenGLRenderer_1_2::BeginRender(const GFX3D &engine) OGLRef.vtxPtrColor = (this->isShaderSupported) ? (GLvoid *)&engine.vertList[0].color : OGLRef.color4fBuffer; } + // Generate the clipped polygon list. + this->_PerformClipping(engine.vertList, engine.polylist, &engine.indexlist); + this->_renderNeedsDepthEqualsTest = false; - for (size_t i = 0, vertIndexCount = 0; i < engine.polylist->count; i++) + for (size_t i = 0, vertIndexCount = 0; i < this->_clippedPolyCount; i++) { - const POLY &thePoly = engine.polylist->list[engine.indexlist.list[i]]; + const POLY &thePoly = *this->_clipper.GetClippedPolyByIndex(i).poly; + const size_t polyType = thePoly.type; const VERT vert[4] = { engine.vertList[thePoly.vertIndexes[0]], @@ -4428,7 +4432,7 @@ Render3DError OpenGLRenderer_1_2::BeginRender(const GFX3D &engine) Render3DError OpenGLRenderer_1_2::RenderGeometry(const GFX3D_State &renderState, const POLYLIST *polyList, const INDEXLIST *indexList) { - if (polyList->count > 0) + if (this->_clippedPolyCount > 0) { glEnable(GL_DEPTH_TEST); glEnable(GL_STENCIL_TEST); @@ -4448,29 +4452,29 @@ Render3DError OpenGLRenderer_1_2::RenderGeometry(const GFX3D_State &renderState, size_t indexOffset = 0; - const POLY &firstPoly = polyList->list[indexList->list[0]]; + const POLY &firstPoly = *this->_clipper.GetClippedPolyByIndex(0).poly; POLYGON_ATTR lastPolyAttr = firstPoly.attribute; - if (polyList->opaqueCount > 0) + if (this->_clippedPolyOpaqueCount > 0) { this->SetupPolygon(firstPoly, false, true); - this->DrawPolygonsForIndexRange(polyList, indexList, 0, polyList->opaqueCount - 1, indexOffset, lastPolyAttr); + this->DrawPolygonsForIndexRange(polyList, indexList, 0, this->_clippedPolyOpaqueCount - 1, indexOffset, lastPolyAttr); } - if (polyList->opaqueCount < polyList->count) + if (this->_clippedPolyOpaqueCount < this->_clippedPolyCount) { if (this->_needsZeroDstAlphaPass && this->_emulateSpecialZeroAlphaBlending) { - if (polyList->opaqueCount == 0) + if (this->_clippedPolyOpaqueCount == 0) { this->SetupPolygon(firstPoly, true, false); } this->ZeroDstAlphaPass(polyList, indexList, renderState.enableAlphaBlending, indexOffset, lastPolyAttr); - if (polyList->opaqueCount > 0) + if (this->_clippedPolyOpaqueCount > 0) { - const POLY &lastOpaquePoly = polyList->list[indexList->list[polyList->opaqueCount - 1]]; + const POLY &lastOpaquePoly = *this->_clipper.GetClippedPolyByIndex(this->_clippedPolyOpaqueCount - 1).poly; lastPolyAttr = lastOpaquePoly.attribute; this->SetupPolygon(lastOpaquePoly, false, true); } @@ -4485,12 +4489,12 @@ Render3DError OpenGLRenderer_1_2::RenderGeometry(const GFX3D_State &renderState, glStencilMask(0xFF); } - if (polyList->opaqueCount == 0) + if (this->_clippedPolyOpaqueCount == 0) { this->SetupPolygon(firstPoly, true, true); } - this->DrawPolygonsForIndexRange(polyList, indexList, polyList->opaqueCount, polyList->count - 1, indexOffset, lastPolyAttr); + this->DrawPolygonsForIndexRange(polyList, indexList, this->_clippedPolyOpaqueCount, this->_clippedPolyCount - 1, indexOffset, lastPolyAttr); } glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); @@ -5558,10 +5562,14 @@ Render3DError OpenGLRenderer_2_0::BeginRender(const GFX3D &engine) // Only copy as much vertex data as we need to, since this can be a potentially large upload size. glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(VERT) * engine.vertListCount, engine.vertList); + // Generate the clipped polygon list. + this->_PerformClipping(engine.vertList, engine.polylist, &engine.indexlist); + this->_renderNeedsDepthEqualsTest = false; - for (size_t i = 0, vertIndexCount = 0; i < engine.polylist->count; i++) + for (size_t i = 0, vertIndexCount = 0; i < this->_clippedPolyCount; i++) { - const POLY &thePoly = engine.polylist->list[engine.indexlist.list[i]]; + const POLY &thePoly = *this->_clipper.GetClippedPolyByIndex(i).poly; + const size_t polyType = thePoly.type; const VERT vert[4] = { engine.vertList[thePoly.vertIndexes[0]], diff --git a/desmume/src/OGLRender.h b/desmume/src/OGLRender.h index db951a564..15e0152b4 100755 --- a/desmume/src/OGLRender.h +++ b/desmume/src/OGLRender.h @@ -309,7 +309,8 @@ enum OGLTextureUnitID enum OGLBindingPointID { - OGLBindingPointID_RenderStates = 0 + OGLBindingPointID_RenderStates = 0, + OGLBindingPointID_PolyStates = 1 }; enum OGLErrorCode @@ -488,6 +489,7 @@ struct OGLRenderRef // UBO / TBO GLuint uboRenderStatesID; + GLuint uboPolyStatesID; GLuint tboPolyStatesID; GLuint texPolyStatesID; @@ -715,6 +717,7 @@ protected: Render3DError FlushFramebuffer(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16); OpenGLTexture* GetLoadedTextureFromPolygon(const POLY &thePoly, bool enableTexturing); + template size_t DrawPolygonsForIndexRange(const POLYLIST *polyList, const INDEXLIST *indexList, size_t firstIndex, size_t lastIndex, size_t &indexOffset, POLYGON_ATTR &lastPolyAttr); template Render3DError DrawAlphaTexturePolygon(const GLenum polyPrimitive, const GLsizei vertIndexCount, diff --git a/desmume/src/OGLRender_3_2.cpp b/desmume/src/OGLRender_3_2.cpp index 189d0dee4..55b43d5ac 100755 --- a/desmume/src/OGLRender_3_2.cpp +++ b/desmume/src/OGLRender_3_2.cpp @@ -121,7 +121,14 @@ in vec4 inPosition;\n\ in vec2 inTexCoord0;\n\ in vec3 inColor; \n\ \n\ +#if IS_USING_UBO_POLY_STATES\n\ +layout (std140) uniform PolyStates\n\ +{\n\ + ivec4 value[4096];\n\ +} polyState;\n\ +#else\n\ uniform isamplerBuffer PolyStates;\n\ +#endif\n\ uniform int polyIndex;\n\ \n\ out vec2 vtxTexCoord;\n\ @@ -136,7 +143,12 @@ flat out int texSingleBitAlpha;\n\ \n\ void main()\n\ {\n\ +#if IS_USING_UBO_POLY_STATES\n\ + ivec4 polyStateVec = polyState.value[polyIndex >> 2];\n\ + int polyStateBits = polyStateVec[polyIndex & 0x03];\n\ +#else\n\ int polyStateBits = texelFetch(PolyStates, polyIndex).r;\n\ +#endif\n\ int texSizeShiftS = (polyStateBits >> 18) & 0x07;\n\ int texSizeShiftT = (polyStateBits >> 21) & 0x07;\n\ \n\ @@ -772,6 +784,7 @@ void OGLCreateRenderer_3_2(OpenGLRenderer **rendererPtr) OpenGLRenderer_3_2::OpenGLRenderer_3_2() { + _is64kUBOSupported = false; _syncBufferSetup = NULL; } @@ -797,6 +810,10 @@ Render3DError OpenGLRenderer_3_2::InitExtensions() this->GetExtensionSet(&oglExtensionSet); // Get host GPU device properties + GLint maxUBOSize = 0; + glGetIntegerv(GL_MAX_UNIFORM_BLOCK_SIZE, &maxUBOSize); + this->_is64kUBOSupported = (maxUBOSize >= 65536); + GLfloat maxAnisotropyOGL = 1.0f; glGetFloatv(GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &maxAnisotropyOGL); this->_deviceInfo.maxAnisotropy = (float)maxAnisotropyOGL; @@ -1271,6 +1288,42 @@ Render3DError OpenGLRenderer_3_2::CreateGeometryPrograms() Render3DError error = OGLERROR_NOERR; OGLRenderRef &OGLRef = *this->ref; + // Create shader resources. + if (OGLRef.uboRenderStatesID == 0) + { + glGenBuffers(1, &OGLRef.uboRenderStatesID); + glBindBuffer(GL_UNIFORM_BUFFER, OGLRef.uboRenderStatesID); + glBufferData(GL_UNIFORM_BUFFER, sizeof(OGLRenderStates), NULL, GL_DYNAMIC_DRAW); + glBindBufferBase(GL_UNIFORM_BUFFER, OGLBindingPointID_RenderStates, OGLRef.uboRenderStatesID); + } + + if (this->_is64kUBOSupported) + { + if (OGLRef.uboPolyStatesID == 0) + { + glGenBuffers(1, &OGLRef.uboPolyStatesID); + glBindBuffer(GL_UNIFORM_BUFFER, OGLRef.uboPolyStatesID); + glBufferData(GL_UNIFORM_BUFFER, MAX_CLIPPED_POLY_COUNT_FOR_UBO * sizeof(OGLPolyStates), NULL, GL_DYNAMIC_DRAW); + glBindBufferBase(GL_UNIFORM_BUFFER, OGLBindingPointID_PolyStates, OGLRef.uboPolyStatesID); + } + } + else + { + if (OGLRef.tboPolyStatesID == 0) + { + // Set up poly states TBO + glGenBuffers(1, &OGLRef.tboPolyStatesID); + glBindBuffer(GL_TEXTURE_BUFFER, OGLRef.tboPolyStatesID); + glBufferData(GL_TEXTURE_BUFFER, POLYLIST_SIZE * sizeof(OGLPolyStates), NULL, GL_DYNAMIC_DRAW); + + glGenTextures(1, &OGLRef.texPolyStatesID); + glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_PolyStates); + glBindTexture(GL_TEXTURE_BUFFER, OGLRef.texPolyStatesID); + glTexBuffer(GL_TEXTURE_BUFFER, GL_R32I, OGLRef.tboPolyStatesID); + glActiveTexture(GL_TEXTURE0); + } + } + OGLGeometryFlags programFlags; programFlags.value = 0; @@ -1284,6 +1337,8 @@ Render3DError OpenGLRenderer_3_2::CreateGeometryPrograms() vtxShaderHeader << "#version 150\n"; } vtxShaderHeader << "\n"; + vtxShaderHeader << "#define IS_USING_UBO_POLY_STATES " << ((OGLRef.uboPolyStatesID != 0) ? 1 : 0) << "\n"; + vtxShaderHeader << "\n"; std::string vtxShaderCode = vtxShaderHeader.str() + std::string(GeometryVtxShader_150); @@ -1380,9 +1435,18 @@ Render3DError OpenGLRenderer_3_2::CreateGeometryPrograms() assert(uboSize == sizeof(OGLRenderStates)); const GLint uniformTexRenderObject = glGetUniformLocation(OGLRef.programGeometryID[flagsValue], "texRenderObject"); - const GLint uniformTexBufferPolyStates = glGetUniformLocation(OGLRef.programGeometryID[flagsValue], "PolyStates"); glUniform1i(uniformTexRenderObject, 0); - glUniform1i(uniformTexBufferPolyStates, OGLTextureUnitID_PolyStates); + + if (OGLRef.uboPolyStatesID != 0) + { + const GLuint uniformBlockPolyStates = glGetUniformBlockIndex(OGLRef.programGeometryID[flagsValue], "PolyStates"); + glUniformBlockBinding(OGLRef.programGeometryID[flagsValue], uniformBlockPolyStates, OGLBindingPointID_PolyStates); + } + else + { + const GLint uniformTexBufferPolyStates = glGetUniformLocation(OGLRef.programGeometryID[flagsValue], "PolyStates"); + glUniform1i(uniformTexBufferPolyStates, OGLTextureUnitID_PolyStates); + } OGLRef.uniformTexDrawOpaque[flagsValue] = glGetUniformLocation(OGLRef.programGeometryID[flagsValue], "texDrawOpaque"); OGLRef.uniformPolyDrawShadow[flagsValue] = glGetUniformLocation(OGLRef.programGeometryID[flagsValue], "polyDrawShadow"); @@ -1390,28 +1454,6 @@ Render3DError OpenGLRenderer_3_2::CreateGeometryPrograms() OGLRef.uniformPolyDepthOffsetMode[flagsValue] = glGetUniformLocation(OGLRef.programGeometryID[flagsValue], "polyDepthOffsetMode"); } - if (OGLRef.uboRenderStatesID == 0) - { - glGenBuffers(1, &OGLRef.uboRenderStatesID); - glBindBuffer(GL_UNIFORM_BUFFER, OGLRef.uboRenderStatesID); - glBufferData(GL_UNIFORM_BUFFER, sizeof(OGLRenderStates), NULL, GL_DYNAMIC_DRAW); - glBindBufferBase(GL_UNIFORM_BUFFER, OGLBindingPointID_RenderStates, OGLRef.uboRenderStatesID); - } - - if (OGLRef.tboPolyStatesID == 0) - { - // Set up poly states TBO - glGenBuffers(1, &OGLRef.tboPolyStatesID); - glBindBuffer(GL_TEXTURE_BUFFER, OGLRef.tboPolyStatesID); - glBufferData(GL_TEXTURE_BUFFER, POLYLIST_SIZE * sizeof(OGLPolyStates), NULL, GL_DYNAMIC_DRAW); - - glGenTextures(1, &OGLRef.texPolyStatesID); - glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_PolyStates); - glBindTexture(GL_TEXTURE_BUFFER, OGLRef.texPolyStatesID); - glTexBuffer(GL_TEXTURE_BUFFER, GL_R32I, OGLRef.tboPolyStatesID); - glActiveTexture(GL_TEXTURE0); - } - return error; } @@ -1427,9 +1469,11 @@ void OpenGLRenderer_3_2::DestroyGeometryPrograms() glBindBuffer(GL_UNIFORM_BUFFER, 0); glBindBuffer(GL_TEXTURE_BUFFER, 0); glDeleteBuffers(1, &OGLRef.uboRenderStatesID); + glDeleteBuffers(1, &OGLRef.uboPolyStatesID); glDeleteBuffers(1, &OGLRef.tboPolyStatesID); OGLRef.uboRenderStatesID = 0; + OGLRef.uboPolyStatesID = 0; OGLRef.tboPolyStatesID = 0; for (size_t flagsValue = 0; flagsValue < 128; flagsValue++) @@ -1964,7 +2008,7 @@ Render3DError OpenGLRenderer_3_2::ZeroDstAlphaPass(const POLYLIST *polyList, con glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_FALSE); glStencilFunc(GL_NOTEQUAL, 0x40, 0x40); - this->DrawPolygonsForIndexRange(polyList, indexList, polyList->opaqueCount, polyList->count - 1, indexOffset, lastPolyAttr); + this->DrawPolygonsForIndexRange(polyList, indexList, this->_clippedPolyOpaqueCount, this->_clippedPolyCount - 1, indexOffset, lastPolyAttr); // Restore OpenGL states back to normal. this->_geometryProgramFlags = oldGProgramFlags; @@ -2165,13 +2209,27 @@ Render3DError OpenGLRenderer_3_2::BeginRender(const GFX3D &engine) this->_pendingRenderStates.toonColor[i].a = 1.0f; } + glBindBuffer(GL_UNIFORM_BUFFER, OGLRef.uboRenderStatesID); glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(this->_pendingRenderStates), &this->_pendingRenderStates); + // Generate the clipped polygon list. + this->_PerformClipping(engine.vertList, engine.polylist, &engine.indexlist); + + if ( (OGLRef.uboPolyStatesID != 0) && (this->_clippedPolyCount > MAX_CLIPPED_POLY_COUNT_FOR_UBO) ) + { + // In practice, there shouldn't be any game scene with a clipped polygon count that + // would exceed POLYLIST_SIZE. But if for some reason there is, then we need to limit + // the polygon count here. Please report if this happens! + printf("OpenGL: Clipped poly count of %d exceeds %d. Please report!!!\n", (int)this->_clippedPolyCount, MAX_CLIPPED_POLY_COUNT_FOR_UBO); + this->_clippedPolyCount = MAX_CLIPPED_POLY_COUNT_FOR_UBO; + } + // Set up the polygon states. this->_renderNeedsDepthEqualsTest = false; - for (size_t i = 0, vertIndexCount = 0; i < engine.polylist->count; i++) + for (size_t i = 0, vertIndexCount = 0; i < this->_clippedPolyCount; i++) { - const POLY &thePoly = engine.polylist->list[engine.indexlist.list[i]]; + const POLY &thePoly = *this->_clipper.GetClippedPolyByIndex(i).poly; + const size_t polyType = thePoly.type; const VERT vert[4] = { engine.vertList[thePoly.vertIndexes[0]], @@ -2243,17 +2301,23 @@ Render3DError OpenGLRenderer_3_2::BeginRender(const GFX3D &engine) } } - // Replace the entire index buffer as a hint to the driver that we can orphan the index buffer and - // avoid a synchronization cost. + // Replace the entire buffer as a hint to the driver to orphan the buffer and avoid a synchronization cost. glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(OGLRef.vertIndexBuffer), OGLRef.vertIndexBuffer); - // Some drivers seem to have problems with glMapBufferRange() and GL_TEXTURE_BUFFER, causing - // certain polygons to intermittently flicker in certain games. Therefore, we'll use glMapBuffer() - // in this case in order to prevent these glitches from happening. - OGLPolyStates *polyStates = (OGLPolyStates *)glMapBuffer(GL_TEXTURE_BUFFER, GL_WRITE_ONLY); - for (size_t i = 0; i < engine.polylist->count; i++) + OGLPolyStates *polyStates = this->_pendingPolyStates; + + if (OGLRef.uboPolyStatesID == 0) { - const POLY &thePoly = engine.polylist->list[engine.indexlist.list[i]]; + // Some drivers seem to have problems with GL_TEXTURE_BUFFER used as the target for + // glMapBufferRange() or glBufferSubData(), causing certain polygons to intermittently + // flicker in certain games. Therefore, we'll use glMapBuffer() in this case in order + // to prevent these glitches from happening. + polyStates = (OGLPolyStates *)glMapBuffer(GL_TEXTURE_BUFFER, GL_WRITE_ONLY); + } + + for (size_t i = 0; i < this->_clippedPolyCount; i++) + { + const POLY &thePoly = *this->_clipper.GetClippedPolyByIndex(i).poly; // Get all of the polygon states that can be handled within the shader. const NDSTextureFormat packFormat = this->_textureList[i]->GetPackFormat(); @@ -2272,7 +2336,17 @@ Render3DError OpenGLRenderer_3_2::BeginRender(const GFX3D &engine) polyStates[i].TexSizeShiftS = thePoly.texParam.SizeShiftS; // Note that we are using the preshifted size of S polyStates[i].TexSizeShiftT = thePoly.texParam.SizeShiftT; // Note that we are using the preshifted size of T } - glUnmapBuffer(GL_TEXTURE_BUFFER); + + if (OGLRef.uboPolyStatesID != 0) + { + // Replace the entire buffer as a hint to the driver to orphan the buffer and avoid a synchronization cost. + glBindBuffer(GL_UNIFORM_BUFFER, OGLRef.uboPolyStatesID); + glBufferSubData(GL_UNIFORM_BUFFER, 0, MAX_CLIPPED_POLY_COUNT_FOR_UBO * sizeof(OGLPolyStates), this->_pendingPolyStates); + } + else + { + glUnmapBuffer(GL_TEXTURE_BUFFER); + } // Set up the default draw call states. this->_geometryProgramFlags.EnableWDepth = (engine.renderState.wbuffer) ? 1 : 0; diff --git a/desmume/src/OGLRender_3_2.h b/desmume/src/OGLRender_3_2.h index fc64dadcf..bbb2d833c 100644 --- a/desmume/src/OGLRender_3_2.h +++ b/desmume/src/OGLRender_3_2.h @@ -56,14 +56,18 @@ #include "OGLRender.h" +#define MAX_CLIPPED_POLY_COUNT_FOR_UBO 16384 + void OGLLoadEntryPoints_3_2(); void OGLCreateRenderer_3_2(OpenGLRenderer **rendererPtr); class OpenGLRenderer_3_2 : public OpenGLRenderer_2_1 { protected: + bool _is64kUBOSupported; GLsync _syncBufferSetup; CACHE_ALIGN OGLRenderStates _pendingRenderStates; + CACHE_ALIGN OGLPolyStates _pendingPolyStates[POLYLIST_SIZE]; virtual Render3DError InitExtensions(); diff --git a/desmume/src/gfx3d.cpp b/desmume/src/gfx3d.cpp index da4a2ca72..9c52892a7 100755 --- a/desmume/src/gfx3d.cpp +++ b/desmume/src/gfx3d.cpp @@ -1,6 +1,6 @@ /* Copyright (C) 2006 yopyop - Copyright (C) 2008-2018 DeSmuME team + Copyright (C) 2008-2019 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1745,9 +1745,9 @@ static BOOL gfx3d_glBoxTest(u32 v) polys[5].setVertIndexes(0,4,5,1); //bottom //setup the clipper - GFX3D_Clipper::TClippedPoly tempClippedPoly; - boxtestClipper.clippedPolys = &tempClippedPoly; - boxtestClipper.reset(); + CPoly tempClippedPoly; + boxtestClipper.SetClippedPolyBufferPtr(&tempClippedPoly); + boxtestClipper.Reset(); ////----------------------------- ////awesome hack: @@ -1800,10 +1800,10 @@ static BOOL gfx3d_glBoxTest(u32 v) &verts[thePoly.vertIndexes[3]] }; - boxtestClipper.clipPoly(thePoly, vertTable); + boxtestClipper.ClipPoly(thePoly, vertTable); //if any portion of this poly was retained, then the test passes. - if (boxtestClipper.clippedPolyCounter > 0) + if (boxtestClipper.GetPolyCount() > 0) { //printf("%06d PASS %d\n",gxFIFO.size, i); MMU_new.gxstat.tr = 1; @@ -3037,8 +3037,8 @@ static T interpolate(const float ratio, const T& x0, const T& x1) } //http://www.cs.berkeley.edu/~ug/slide/pipeline/assignments/as6/discussion.shtml -template -static FORCEINLINE VERT clipPoint(bool hirez, const VERT *inside, const VERT *outside) +template +static FORCEINLINE VERT clipPoint(const VERT *inside, const VERT *outside) { VERT ret; const float coord_inside = inside->coord[COORD]; @@ -3050,16 +3050,23 @@ static FORCEINLINE VERT clipPoint(bool hirez, const VERT *inside, const VERT *ou #define INTERP(X) ret . X = interpolate(t, inside-> X ,outside-> X ) INTERP(coord[0]); INTERP(coord[1]); INTERP(coord[2]); INTERP(coord[3]); - INTERP(texcoord[0]); INTERP(texcoord[1]); - if (hirez) + switch (CLIPPERMODE) { - INTERP(fcolor[0]); INTERP(fcolor[1]); INTERP(fcolor[2]); - } - else - { - INTERP(color[0]); INTERP(color[1]); INTERP(color[2]); - ret.color_to_float(); + case ClipperMode_Full: + INTERP(texcoord[0]); INTERP(texcoord[1]); + INTERP(color[0]); INTERP(color[1]); INTERP(color[2]); + ret.color_to_float(); + break; + + case ClipperMode_InterpolateFull: + INTERP(texcoord[0]); INTERP(texcoord[1]); + INTERP(fcolor[0]); INTERP(fcolor[1]); INTERP(fcolor[2]); + break; + + case ClipperMode_DetermineClipOnly: + // Do nothing. + break; } //this seems like a prudent measure to make sure that math doesnt make a point pop back out @@ -3073,10 +3080,10 @@ static FORCEINLINE VERT clipPoint(bool hirez, const VERT *inside, const VERT *ou } #define MAX_SCRATCH_CLIP_VERTS (4*6 + 40) -static VERT scratchClipVerts [MAX_SCRATCH_CLIP_VERTS]; -static int numScratchClipVerts = 0; +static VERT scratchClipVerts[MAX_SCRATCH_CLIP_VERTS]; +static size_t numScratchClipVerts = 0; -template +template class ClipperPlane { public: @@ -3089,20 +3096,20 @@ public: m_next.init(verts); } - void clipVert(bool hirez, const VERT *vert) + void clipVert(const VERT *vert) { if (m_prevVert) - this->clipSegmentVsPlane(hirez, m_prevVert, vert); + this->clipSegmentVsPlane(m_prevVert, vert); else m_firstVert = (VERT *)vert; m_prevVert = (VERT *)vert; } // closes the loop and returns the number of clipped output verts - int finish(bool hirez) + int finish() { - this->clipVert(hirez, m_firstVert); - return m_next.finish(hirez); + this->clipVert(m_firstVert); + return m_next.finish(); } private: @@ -3110,7 +3117,7 @@ private: VERT* m_firstVert; NEXT& m_next; - FORCEINLINE void clipSegmentVsPlane(bool hirez, const VERT *vert0, const VERT *vert1) + FORCEINLINE void clipSegmentVsPlane(const VERT *vert0, const VERT *vert1) { const float *vert0coord = vert0->coord; const float *vert1coord = vert1->coord; @@ -3133,7 +3140,7 @@ private: if (!out0 && !out1) { CLIPLOG(" both inside\n"); - m_next.clipVert(hirez, vert1); + m_next.clipVert(vert1); } //exiting volume: insert the clipped point @@ -3141,8 +3148,8 @@ private: { CLIPLOG(" exiting\n"); assert((u32)numScratchClipVerts < MAX_SCRATCH_CLIP_VERTS); - scratchClipVerts[numScratchClipVerts] = clipPoint(hirez, vert0, vert1); - m_next.clipVert(hirez, &scratchClipVerts[numScratchClipVerts++]); + scratchClipVerts[numScratchClipVerts] = clipPoint(vert0, vert1); + m_next.clipVert(&scratchClipVerts[numScratchClipVerts++]); } //entering volume: insert clipped point and the next (interior) point @@ -3150,9 +3157,9 @@ private: { CLIPLOG(" entering\n"); assert((u32)numScratchClipVerts < MAX_SCRATCH_CLIP_VERTS); - scratchClipVerts[numScratchClipVerts] = clipPoint(hirez, vert1, vert0); - m_next.clipVert(hirez, &scratchClipVerts[numScratchClipVerts++]); - m_next.clipVert(hirez, vert1); + scratchClipVerts[numScratchClipVerts] = clipPoint(vert1, vert0); + m_next.clipVert(&scratchClipVerts[numScratchClipVerts++]); + m_next.clipVert(vert1); } } }; @@ -3166,14 +3173,14 @@ public: m_numVerts = 0; } - void clipVert(bool hirez, const VERT *vert) + void clipVert(const VERT *vert) { assert((u32)m_numVerts < MAX_CLIPPED_VERTS); *m_nextDestVert++ = *vert; m_numVerts++; } - int finish(bool hirez) + int finish() { return m_numVerts; } @@ -3185,28 +3192,107 @@ private: // see "Template juggling with Sutherland-Hodgman" http://www.codeguru.com/cpp/misc/misc/graphics/article.php/c8965__2/ // for the idea behind setting things up like this. -static ClipperOutput clipperOut; -typedef ClipperPlane<2, 1,ClipperOutput> Stage6; static Stage6 clipper6 (clipperOut); // back plane //TODO - we need to parameterize back plane clipping -typedef ClipperPlane<2,-1,Stage6> Stage5; static Stage5 clipper5 (clipper6); // front plane -typedef ClipperPlane<1, 1,Stage5> Stage4; static Stage4 clipper4 (clipper5); // top plane -typedef ClipperPlane<1,-1,Stage4> Stage3; static Stage3 clipper3 (clipper4); // bottom plane -typedef ClipperPlane<0, 1,Stage3> Stage2; static Stage2 clipper2 (clipper3); // right plane -typedef ClipperPlane<0,-1,Stage2> Stage1; static Stage1 clipper (clipper2); // left plane -template -void GFX3D_Clipper::clipPoly(const POLY &poly, const VERT **verts) +// Non-interpolated clippers +static ClipperOutput clipperOut; +typedef ClipperPlane Stage6; static Stage6 clipper6 (clipperOut); // back plane //TODO - we need to parameterize back plane clipping +typedef ClipperPlane Stage5; static Stage5 clipper5 (clipper6); // front plane +typedef ClipperPlane Stage4; static Stage4 clipper4 (clipper5); // top plane +typedef ClipperPlane Stage3; static Stage3 clipper3 (clipper4); // bottom plane +typedef ClipperPlane Stage2; static Stage2 clipper2 (clipper3); // right plane +typedef ClipperPlane Stage1; static Stage1 clipper1 (clipper2); // left plane + +// Interpolated clippers +static ClipperOutput clipperOuti; +typedef ClipperPlane Stage6i; static Stage6 clipper6i (clipperOuti); // back plane //TODO - we need to parameterize back plane clipping +typedef ClipperPlane Stage5i; static Stage5 clipper5i (clipper6i); // front plane +typedef ClipperPlane Stage4i; static Stage4 clipper4i (clipper5i); // top plane +typedef ClipperPlane Stage3i; static Stage3 clipper3i (clipper4i); // bottom plane +typedef ClipperPlane Stage2i; static Stage2 clipper2i (clipper3i); // right plane +typedef ClipperPlane Stage1i; static Stage1 clipper1i (clipper2i); // left plane + +// Determine's clip status only +static ClipperOutput clipperOutd; +typedef ClipperPlane Stage6d; static Stage6 clipper6d (clipperOutd); // back plane //TODO - we need to parameterize back plane clipping +typedef ClipperPlane Stage5d; static Stage5 clipper5d (clipper6d); // front plane +typedef ClipperPlane Stage4d; static Stage4 clipper4d (clipper5d); // top plane +typedef ClipperPlane Stage3d; static Stage3 clipper3d (clipper4d); // bottom plane +typedef ClipperPlane Stage2d; static Stage2 clipper2d (clipper3d); // right plane +typedef ClipperPlane Stage1d; static Stage1 clipper1d (clipper2d); // left plane + +GFX3D_Clipper::GFX3D_Clipper() +{ + _clippedPolyList = NULL; + _clippedPolyCounter = 0; +} + +const CPoly* GFX3D_Clipper::GetClippedPolyBufferPtr() +{ + return this->_clippedPolyList; +} + +void GFX3D_Clipper::SetClippedPolyBufferPtr(CPoly *bufferPtr) +{ + this->_clippedPolyList = bufferPtr; +} + +const CPoly& GFX3D_Clipper::GetClippedPolyByIndex(size_t index) const +{ + return this->_clippedPolyList[index]; +} + +size_t GFX3D_Clipper::GetPolyCount() const +{ + return this->_clippedPolyCounter; +} + +void GFX3D_Clipper::Reset() +{ + this->_clippedPolyCounter = 0; +} + +template +void GFX3D_Clipper::ClipPoly(const POLY &poly, const VERT **verts) { CLIPLOG("==Begin poly==\n"); + PolygonType outType; const PolygonType type = poly.type; numScratchClipVerts = 0; - - clipper.init(clippedPolys[clippedPolyCounter].clipVerts); - for (size_t i = 0; i < type; i++) - clipper.clipVert(USEHIRESINTERPOLATE, verts[i]); - const PolygonType outType = (PolygonType)clipper.finish(USEHIRESINTERPOLATE); - + switch (CLIPPERMODE) + { + case ClipperMode_Full: + { + clipper1.init(this->_clippedPolyList[this->_clippedPolyCounter].clipVerts); + for (size_t i = 0; i < type; i++) + clipper1.clipVert(verts[i]); + + outType = (PolygonType)clipper1.finish(); + break; + } + + case ClipperMode_InterpolateFull: + { + clipper1i.init(this->_clippedPolyList[this->_clippedPolyCounter].clipVerts); + for (size_t i = 0; i < type; i++) + clipper1i.clipVert(verts[i]); + + outType = (PolygonType)clipper1i.finish(); + break; + } + + case ClipperMode_DetermineClipOnly: + { + clipper1d.init(this->_clippedPolyList[this->_clippedPolyCounter].clipVerts); + for (size_t i = 0; i < type; i++) + clipper1d.clipVert(verts[i]); + + outType = (PolygonType)clipper1d.finish(); + break; + } + } + assert((u32)outType < MAX_CLIPPED_VERTS); if (outType < POLYGON_TYPE_TRIANGLE) { @@ -3215,24 +3301,13 @@ void GFX3D_Clipper::clipPoly(const POLY &poly, const VERT **verts) } else { - clippedPolys[clippedPolyCounter].type = outType; - clippedPolys[clippedPolyCounter].poly = (POLY *)&poly; - clippedPolyCounter++; + this->_clippedPolyList[this->_clippedPolyCounter].type = outType; + this->_clippedPolyList[this->_clippedPolyCounter].poly = (POLY *)&poly; + this->_clippedPolyCounter++; } } + //these templates needed to be instantiated manually -template void GFX3D_Clipper::clipPoly(const POLY &poly, const VERT **verts); -template void GFX3D_Clipper::clipPoly(const POLY &poly, const VERT **verts); - -void GFX3D_Clipper::clipSegmentVsPlane(VERT** verts, const int coord, int which) -{ - // not used (it's probably ok to delete this function) - assert(0); -} - -void GFX3D_Clipper::clipPolyVsPlane(const int coord, int which) -{ - // not used (it's probably ok to delete this function) - assert(0); -} - +template void GFX3D_Clipper::ClipPoly(const POLY &poly, const VERT **verts); +template void GFX3D_Clipper::ClipPoly(const POLY &poly, const VERT **verts); +template void GFX3D_Clipper::ClipPoly(const POLY &poly, const VERT **verts); diff --git a/desmume/src/gfx3d.h b/desmume/src/gfx3d.h index 330f5b5a8..36f347a13 100644 --- a/desmume/src/gfx3d.h +++ b/desmume/src/gfx3d.h @@ -1,6 +1,6 @@ /* Copyright (C) 2006 yopyop - Copyright (C) 2008-2018 DeSmuME team + Copyright (C) 2008-2019 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -477,31 +477,37 @@ struct VIEWPORT { //four corners of the hexagon, and you will observe a decagon #define MAX_CLIPPED_VERTS 10 +enum ClipperMode +{ + ClipperMode_Full = 0, + ClipperMode_InterpolateFull = 1, + ClipperMode_DetermineClipOnly = 2 +}; + +struct CPoly +{ + PolygonType type; //otherwise known as "count" of verts + POLY *poly; + VERT clipVerts[MAX_CLIPPED_VERTS]; +}; + class GFX3D_Clipper { -public: +protected: + size_t _clippedPolyCounter; + CPoly *_clippedPolyList; // The output of clipping operations goes into here. Be sure you init it before clipping! - struct TClippedPoly - { - PolygonType type; //otherwise known as "count" of verts - POLY *poly; - VERT clipVerts[MAX_CLIPPED_VERTS]; - }; - - //the entry point for poly clipping - template void clipPoly(const POLY &poly, const VERT **verts); - - //the output of clipping operations goes into here. - //be sure you init it before clipping! - TClippedPoly *clippedPolys; - size_t clippedPolyCounter; - void reset() { clippedPolyCounter=0; } - -private: - TClippedPoly tempClippedPoly; - TClippedPoly outClippedPoly; - FORCEINLINE void clipSegmentVsPlane(VERT** verts, const int coord, int which); - FORCEINLINE void clipPolyVsPlane(const int coord, int which); +public: + GFX3D_Clipper(); + + const CPoly* GetClippedPolyBufferPtr(); + void SetClippedPolyBufferPtr(CPoly *bufferPtr); + + const CPoly& GetClippedPolyByIndex(size_t index) const; + size_t GetPolyCount() const; + + void Reset(); + template void ClipPoly(const POLY &poly, const VERT **verts); // the entry point for poly clipping }; //used to communicate state to the renderer diff --git a/desmume/src/rasterize.cpp b/desmume/src/rasterize.cpp index df11dfbcd..e1dc8726f 100755 --- a/desmume/src/rasterize.cpp +++ b/desmume/src/rasterize.cpp @@ -1260,7 +1260,7 @@ void RasterizerUnit::SetRenderer(SoftRasterizerRenderer *theRenderer) template template FORCEINLINE void RasterizerUnit::Render() { - const size_t polyCount = this->_softRender->_clippedPolyCount; + const size_t polyCount = this->_softRender->GetClippedPolyCount(); if (polyCount == 0) { return; @@ -1270,7 +1270,7 @@ FORCEINLINE void RasterizerUnit::Render() const size_t dstWidth = this->_softRender->GetFramebufferWidth(); const size_t dstHeight = this->_softRender->GetFramebufferHeight(); - const GFX3D_Clipper::TClippedPoly &firstClippedPoly = this->_softRender->clippedPolys[0]; + const CPoly &firstClippedPoly = this->_softRender->GetClippedPolyByIndex(0); const POLY &firstPoly = *firstClippedPoly.poly; POLYGON_ATTR polyAttr = firstPoly.attribute; TEXIMAGE_PARAM lastTexParams = firstPoly.texParam; @@ -1285,7 +1285,7 @@ FORCEINLINE void RasterizerUnit::Render() if (!this->_softRender->polyVisible[i]) continue; this->_polynum = i; - GFX3D_Clipper::TClippedPoly &clippedPoly = this->_softRender->clippedPolys[i]; + const CPoly &clippedPoly = this->_softRender->GetClippedPolyByIndex(i); const POLY &thePoly = *clippedPoly.poly; const int vertCount = clippedPoly.type; const bool useLineHack = USELINEHACK && (thePoly.vtxFormat & 4); @@ -1738,7 +1738,6 @@ SoftRasterizerRenderer::SoftRasterizerRenderer() _task = NULL; _debug_drawClippedUserPoly = -1; - clippedPolys = clipper.clippedPolys = new GFX3D_Clipper::TClippedPoly[POLYLIST_SIZE*2]; _renderGeometryNeedsFinish = false; _framebufferAttributes = NULL; @@ -1868,27 +1867,6 @@ Render3DError SoftRasterizerRenderer::InitTables() return RENDER3DERROR_NOERR; } -template -size_t SoftRasterizerRenderer::performClipping(const VERT *vertList, const POLYLIST *polyList, const INDEXLIST *indexList) -{ - //submit all polys to clipper - clipper.reset(); - for (size_t i = 0; i < polyList->count; i++) - { - const POLY &poly = polyList->list[indexList->list[i]]; - const VERT *clipVerts[4] = { - &vertList[poly.vertIndexes[0]], - &vertList[poly.vertIndexes[1]], - &vertList[poly.vertIndexes[2]], - (poly.type == POLYGON_TYPE_QUAD) ? &vertList[poly.vertIndexes[3]] : NULL - }; - - clipper.clipPoly(poly, clipVerts); - } - - return clipper.clippedPolyCounter; -} - void SoftRasterizerRenderer::performViewportTransforms() { const float wScalar = (float)this->_framebufferWidth / (float)GPU_FRAMEBUFFER_NATIVE_WIDTH; @@ -1897,7 +1875,7 @@ void SoftRasterizerRenderer::performViewportTransforms() //viewport transforms for (size_t i = 0; i < this->_clippedPolyCount; i++) { - GFX3D_Clipper::TClippedPoly &poly = this->clippedPolys[i]; + CPoly &poly = this->_clippedPolyList[i]; for (size_t j = 0; j < poly.type; j++) { VERT &vert = poly.clipVerts[j]; @@ -1953,7 +1931,7 @@ void SoftRasterizerRenderer::performCoordAdjustment() { for (size_t i = 0; i < this->_clippedPolyCount; i++) { - GFX3D_Clipper::TClippedPoly &clippedPoly = this->clippedPolys[i]; + CPoly &clippedPoly = this->_clippedPolyList[i]; const PolygonType type = clippedPoly.type; VERT *verts = &clippedPoly.clipVerts[0]; @@ -1970,7 +1948,7 @@ void SoftRasterizerRenderer::GetAndLoadAllTextures() { for (size_t i = 0; i < this->_clippedPolyCount; i++) { - const GFX3D_Clipper::TClippedPoly &clippedPoly = this->clippedPolys[i]; + const CPoly &clippedPoly = this->_clippedPolyList[i]; const POLY &thePoly = *clippedPoly.poly; //make sure all the textures we'll need are cached @@ -1985,7 +1963,7 @@ void SoftRasterizerRenderer::performBackfaceTests() { for (size_t i = 0; i < this->_clippedPolyCount; i++) { - const GFX3D_Clipper::TClippedPoly &clippedPoly = this->clippedPolys[i]; + const CPoly &clippedPoly = this->_clippedPolyList[i]; const POLY &thePoly = *clippedPoly.poly; const PolygonType type = clippedPoly.type; const VERT *verts = &clippedPoly.clipVerts[0]; @@ -2045,11 +2023,11 @@ Render3DError SoftRasterizerRenderer::BeginRender(const GFX3D &engine) if (this->_enableHighPrecisionColorInterpolation) { - this->_clippedPolyCount = this->performClipping(engine.vertList, engine.polylist, &engine.indexlist); + this->_PerformClipping(engine.vertList, engine.polylist, &engine.indexlist); } else { - this->_clippedPolyCount = this->performClipping(engine.vertList, engine.polylist, &engine.indexlist); + this->_PerformClipping(engine.vertList, engine.polylist, &engine.indexlist); } const bool doMultithreadedStateSetup = (this->_threadCount >= 2); @@ -2228,9 +2206,6 @@ Render3DError SoftRasterizerRenderer::RenderEdgeMarkingAndFog(const SoftRasteriz if (param.enableEdgeMarking) { - FragmentColor edgeMarkColor; - edgeMarkColor.color = 0; - // a good test case for edge marking is Sonic Rush: // - the edges are completely sharp/opaque on the very brief title screen intro, // - the level-start intro gets a pseudo-antialiasing effect around the silhouette, @@ -2245,46 +2220,32 @@ Render3DError SoftRasterizerRenderer::RenderEdgeMarkingAndFog(const SoftRasteriz const bool left = (x < 1) ? isEdgeMarkingClearValues : ((polyID != this->_framebufferAttributes->opaquePolyID[i-1]) && (depth >= this->_framebufferAttributes->depth[i-1])); const bool up = (y < 1) ? isEdgeMarkingClearValues : ((polyID != this->_framebufferAttributes->opaquePolyID[i-this->_framebufferWidth]) && (depth >= this->_framebufferAttributes->depth[i-this->_framebufferWidth])); + FragmentColor edgeMarkColor = this->edgeMarkTable[this->_framebufferAttributes->opaquePolyID[i] >> 3]; + if (right) { - if (x >= this->_framebufferWidth - 1) - { - edgeMarkColor = this->edgeMarkTable[this->_framebufferAttributes->opaquePolyID[i] >> 3]; - } - else + if (x < this->_framebufferWidth - 1) { edgeMarkColor = this->edgeMarkTable[this->_framebufferAttributes->opaquePolyID[i+1] >> 3]; } } else if (down) { - if (y >= this->_framebufferHeight - 1) - { - edgeMarkColor = this->edgeMarkTable[this->_framebufferAttributes->opaquePolyID[i] >> 3]; - } - else + if (y < this->_framebufferHeight - 1) { edgeMarkColor = this->edgeMarkTable[this->_framebufferAttributes->opaquePolyID[i+this->_framebufferWidth] >> 3]; } } else if (left) { - if (x < 1) - { - edgeMarkColor = this->edgeMarkTable[this->_framebufferAttributes->opaquePolyID[i] >> 3]; - } - else + if (x > 0) { edgeMarkColor = this->edgeMarkTable[this->_framebufferAttributes->opaquePolyID[i-1] >> 3]; } } else if (up) { - if (y < 1) - { - edgeMarkColor = this->edgeMarkTable[this->_framebufferAttributes->opaquePolyID[i] >> 3]; - } - else + if (y > 0) { edgeMarkColor = this->edgeMarkTable[this->_framebufferAttributes->opaquePolyID[i-this->_framebufferWidth] >> 3]; } diff --git a/desmume/src/rasterize.h b/desmume/src/rasterize.h index 6069df187..59d328a76 100644 --- a/desmume/src/rasterize.h +++ b/desmume/src/rasterize.h @@ -106,7 +106,7 @@ protected: SoftRasterizerRenderer *_softRender; SoftRasterizerTexture *_currentTexture; - VERT *_verts[MAX_CLIPPED_VERTS]; + const VERT *_verts[MAX_CLIPPED_VERTS]; size_t _polynum; u8 _textureWrapMode; @@ -158,7 +158,6 @@ protected: size_t _customLinesPerThread; size_t _customPixelsPerThread; - GFX3D_Clipper clipper; u8 fogTable[32768]; FragmentColor edgeMarkTable[8]; bool edgeMarkDisabled[8]; @@ -171,8 +170,6 @@ protected: // SoftRasterizer-specific methods virtual Render3DError InitTables(); - template size_t performClipping(const VERT *vertList, const POLYLIST *polyList, const INDEXLIST *indexList); - // Base rendering methods virtual Render3DError BeginRender(const GFX3D &engine); virtual Render3DError RenderGeometry(const GFX3D_State &renderState, const POLYLIST *polyList, const INDEXLIST *indexList); @@ -183,9 +180,7 @@ protected: public: int _debug_drawClippedUserPoly; - size_t _clippedPolyCount; FragmentColor toonColor32LUT[32]; - GFX3D_Clipper::TClippedPoly *clippedPolys; FragmentAttributesBuffer *_framebufferAttributes; bool polyVisible[POLYLIST_SIZE]; bool polyBackfacing[POLYLIST_SIZE]; diff --git a/desmume/src/render3D.cpp b/desmume/src/render3D.cpp index c628d85b2..029c9cc5c 100755 --- a/desmume/src/render3D.cpp +++ b/desmume/src/render3D.cpp @@ -249,6 +249,9 @@ Render3D::Render3D() _textureList[i] = NULL; } + _clippedPolyList = (CPoly *)malloc_alignedCacheLine(POLYLIST_SIZE * 2 * sizeof(CPoly)); + _clipper.SetClippedPolyBufferPtr(_clippedPolyList); + memset(this->clearImageColor16Buffer, 0, sizeof(this->clearImageColor16Buffer)); memset(this->clearImageDepthBuffer, 0, sizeof(this->clearImageDepthBuffer)); memset(this->clearImageFogBuffer, 0, sizeof(this->clearImageFogBuffer)); @@ -264,6 +267,8 @@ Render3D::~Render3D() this->_textureDeposterizeDstSurface.Surface = NULL; this->_textureDeposterizeDstSurface.workingSurface[0] = NULL; } + + free_aligned(this->_clippedPolyList); } const Render3DDeviceInfo& Render3D::GetDeviceInfo() @@ -399,6 +404,54 @@ Render3DTexture* Render3D::GetTextureByPolygonRenderIndex(size_t polyRenderIndex return this->_textureList[polyRenderIndex]; } +const CPoly& Render3D::GetClippedPolyByIndex(size_t index) const +{ + return this->_clippedPolyList[index]; +} + +size_t Render3D::GetClippedPolyCount() const +{ + return this->_clippedPolyCount; +} + +template +void Render3D::_PerformClipping(const VERT *vertList, const POLYLIST *polyList, const INDEXLIST *indexList) +{ + //submit all polys to clipper + this->_clipper.Reset(); + + size_t i = 0; + for (; i < polyList->opaqueCount; i++) + { + const POLY &poly = polyList->list[indexList->list[i]]; + const VERT *clipVerts[4] = { + &vertList[poly.vertIndexes[0]], + &vertList[poly.vertIndexes[1]], + &vertList[poly.vertIndexes[2]], + (poly.type == POLYGON_TYPE_QUAD) ? &vertList[poly.vertIndexes[3]] : NULL + }; + + this->_clipper.ClipPoly(poly, clipVerts); + } + + this->_clippedPolyOpaqueCount = this->_clipper.GetPolyCount(); + + for (; i < polyList->count; i++) + { + const POLY &poly = polyList->list[indexList->list[i]]; + const VERT *clipVerts[4] = { + &vertList[poly.vertIndexes[0]], + &vertList[poly.vertIndexes[1]], + &vertList[poly.vertIndexes[2]], + (poly.type == POLYGON_TYPE_QUAD) ? &vertList[poly.vertIndexes[3]] : NULL + }; + + this->_clipper.ClipPoly(poly, clipVerts); + } + + this->_clippedPolyCount = this->_clipper.GetPolyCount(); +} + Render3DError Render3D::ApplyRenderingSettings(const GFX3D_State &renderState) { this->_enableEdgeMark = (CommonSettings.GFX3D_EdgeMark) ? renderState.enableEdgeMarking : false; @@ -966,5 +1019,9 @@ Render3DError Render3D_SSE2::ClearFramebuffer(const GFX3D_State &renderState) #endif // defined(ENABLE_AVX2) || defined(ENABLE_SSE2) +template void Render3D::_PerformClipping(const VERT *vertList, const POLYLIST *polyList, const INDEXLIST *indexList); +template void Render3D::_PerformClipping(const VERT *vertList, const POLYLIST *polyList, const INDEXLIST *indexList); +template void Render3D::_PerformClipping(const VERT *vertList, const POLYLIST *polyList, const INDEXLIST *indexList); + template Render3D_SIMD<16>::Render3D_SIMD(); template Render3D_SIMD<32>::Render3D_SIMD(); diff --git a/desmume/src/render3D.h b/desmume/src/render3D.h index 2664c0447..34fc623c8 100644 --- a/desmume/src/render3D.h +++ b/desmume/src/render3D.h @@ -62,7 +62,8 @@ enum RendererID RENDERID_SOFTRASTERIZER = 1, RENDERID_OPENGL_AUTO = 1000, RENDERID_OPENGL_LEGACY = 1001, - RENDERID_OPENGL_3_2 = 1002 + RENDERID_OPENGL_3_2 = 1002, + RENDERID_METAL = 2000 }; enum Render3DErrorCode @@ -184,10 +185,17 @@ protected: u32 *_textureUpscaleBuffer; Render3DTexture *_textureList[POLYLIST_SIZE]; + size_t _clippedPolyCount; + size_t _clippedPolyOpaqueCount; + GFX3D_Clipper _clipper; + CPoly *_clippedPolyList; + CACHE_ALIGN u16 clearImageColor16Buffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT]; CACHE_ALIGN u32 clearImageDepthBuffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT]; CACHE_ALIGN u8 clearImageFogBuffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT]; + template void _PerformClipping(const VERT *vertList, const POLYLIST *polyList, const INDEXLIST *indexList); + template void _ClearImageScrolledLoop(const u8 xScroll, const u8 yScroll, const u16 *__restrict inColor16, const u16 *__restrict inDepth16, u16 *__restrict outColor16, u32 *__restrict outDepth24, u8 *__restrict outFog); @@ -263,6 +271,9 @@ public: void SetTextureProcessingProperties(); Render3DTexture* GetTextureByPolygonRenderIndex(size_t polyRenderIndex) const; + + const CPoly& GetClippedPolyByIndex(size_t index) const; + size_t GetClippedPolyCount() const; }; template