OpenGL Renderer: Reduce some buffer related synchronization in BeginRender().

This commit is contained in:
rogerman 2019-01-18 13:54:37 -08:00
parent 02cd950b02
commit ab3d48947a
5 changed files with 93 additions and 98 deletions

View File

@ -2394,9 +2394,6 @@ OpenGLRenderer_1_2::~OpenGLRenderer_1_2()
delete[] ref->color4fBuffer; delete[] ref->color4fBuffer;
ref->color4fBuffer = NULL; ref->color4fBuffer = NULL;
delete[] ref->vertIndexBuffer;
ref->vertIndexBuffer = NULL;
if (this->isShaderSupported) if (this->isShaderSupported)
{ {
glUseProgram(0); glUseProgram(0);
@ -2697,7 +2694,7 @@ Render3DError OpenGLRenderer_1_2::CreateVBOs()
glBindBufferARB(GL_ARRAY_BUFFER_ARB, OGLRef.vboGeometryVtxID); glBindBufferARB(GL_ARRAY_BUFFER_ARB, OGLRef.vboGeometryVtxID);
glBufferDataARB(GL_ARRAY_BUFFER_ARB, VERTLIST_SIZE * sizeof(VERT), NULL, GL_STREAM_DRAW_ARB); glBufferDataARB(GL_ARRAY_BUFFER_ARB, VERTLIST_SIZE * sizeof(VERT), NULL, GL_STREAM_DRAW_ARB);
glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, OGLRef.iboGeometryIndexID); glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, OGLRef.iboGeometryIndexID);
glBufferDataARB(GL_ELEMENT_ARRAY_BUFFER_ARB, OGLRENDER_VERT_INDEX_BUFFER_COUNT * sizeof(GLushort), NULL, GL_STREAM_DRAW_ARB); glBufferDataARB(GL_ELEMENT_ARRAY_BUFFER_ARB, sizeof(OGLRef.vertIndexBuffer), NULL, GL_STREAM_DRAW_ARB);
glBindBufferARB(GL_ARRAY_BUFFER_ARB, OGLRef.vboPostprocessVtxID); glBindBufferARB(GL_ARRAY_BUFFER_ARB, OGLRef.vboPostprocessVtxID);
glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(PostprocessVtxBuffer), PostprocessVtxBuffer, GL_STATIC_DRAW_ARB); glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(PostprocessVtxBuffer), PostprocessVtxBuffer, GL_STATIC_DRAW_ARB);
@ -3745,10 +3742,6 @@ Render3DError OpenGLRenderer_1_2::InitFinalRenderStates(const std::set<std::stri
// simply reference the colors+alpha from just the vertices by themselves.) // simply reference the colors+alpha from just the vertices by themselves.)
OGLRef.color4fBuffer = (this->isShaderSupported) ? NULL : new GLfloat[VERTLIST_SIZE * 4]; OGLRef.color4fBuffer = (this->isShaderSupported) ? NULL : new GLfloat[VERTLIST_SIZE * 4];
// If VBOs aren't supported, then we need to create the index buffer on the
// client side so that we have a buffer to update.
OGLRef.vertIndexBuffer = (this->isVBOSupported) ? NULL : new GLushort[OGLRENDER_VERT_INDEX_BUFFER_COUNT];
return OGLERROR_NOERR; return OGLERROR_NOERR;
} }
@ -4243,13 +4236,13 @@ Render3DError OpenGLRenderer_1_2::BeginRender(const GFX3D &engine)
return OGLERROR_BEGINGL_FAILED; return OGLERROR_BEGINGL_FAILED;
} }
GLushort *indexPtr = NULL;
if (this->isVBOSupported) if (this->isVBOSupported)
{ {
glBindBufferARB(GL_ARRAY_BUFFER_ARB, OGLRef.vboGeometryVtxID); glBindBufferARB(GL_ARRAY_BUFFER_ARB, OGLRef.vboGeometryVtxID);
glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, OGLRef.iboGeometryIndexID); glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, OGLRef.iboGeometryIndexID);
indexPtr = (GLushort *)glMapBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, GL_WRITE_ONLY_ARB);
// Only copy as much vertex data as we need to, since this can be a potentially large upload size.
glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, sizeof(VERT) * engine.vertListCount, engine.vertList);
} }
else else
{ {
@ -4257,13 +4250,10 @@ Render3DError OpenGLRenderer_1_2::BeginRender(const GFX3D &engine)
OGLRef.vtxPtrPosition = &engine.vertList[0].coord; OGLRef.vtxPtrPosition = &engine.vertList[0].coord;
OGLRef.vtxPtrTexCoord = &engine.vertList[0].texcoord; OGLRef.vtxPtrTexCoord = &engine.vertList[0].texcoord;
OGLRef.vtxPtrColor = (this->isShaderSupported) ? (GLvoid *)&engine.vertList[0].color : OGLRef.color4fBuffer; OGLRef.vtxPtrColor = (this->isShaderSupported) ? (GLvoid *)&engine.vertList[0].color : OGLRef.color4fBuffer;
indexPtr = OGLRef.vertIndexBuffer;
} }
this->_renderNeedsDepthEqualsTest = false; this->_renderNeedsDepthEqualsTest = false;
size_t vertIndexCount = 0; for (size_t i = 0, vertIndexCount = 0; i < engine.polylist->count; i++)
for (size_t i = 0; i < engine.polylist->count; i++)
{ {
const POLY &thePoly = engine.polylist->list[engine.indexlist.list[i]]; const POLY &thePoly = engine.polylist->list[engine.indexlist.list[i]];
const size_t polyType = thePoly.type; const size_t polyType = thePoly.type;
@ -4284,16 +4274,16 @@ Render3DError OpenGLRenderer_1_2::BeginRender(const GFX3D &engine)
// a buffer. For GFX3D_QUADS and GFX3D_QUAD_STRIP, we also add additional // a buffer. For GFX3D_QUADS and GFX3D_QUAD_STRIP, we also add additional
// vertices here to convert them to GL_TRIANGLES, which are much easier // vertices here to convert them to GL_TRIANGLES, which are much easier
// to work with and won't be deprecated in future OpenGL versions. // to work with and won't be deprecated in future OpenGL versions.
indexPtr[vertIndexCount++] = vertIndex; OGLRef.vertIndexBuffer[vertIndexCount++] = vertIndex;
if (!thePoly.isWireframe() && (thePoly.vtxFormat == GFX3D_QUADS || thePoly.vtxFormat == GFX3D_QUAD_STRIP)) if (!thePoly.isWireframe() && (thePoly.vtxFormat == GFX3D_QUADS || thePoly.vtxFormat == GFX3D_QUAD_STRIP))
{ {
if (j == 2) if (j == 2)
{ {
indexPtr[vertIndexCount++] = vertIndex; OGLRef.vertIndexBuffer[vertIndexCount++] = vertIndex;
} }
else if (j == 3) else if (j == 3)
{ {
indexPtr[vertIndexCount++] = thePoly.vertIndexes[0]; OGLRef.vertIndexBuffer[vertIndexCount++] = thePoly.vertIndexes[0];
} }
} }
} }
@ -4319,16 +4309,16 @@ Render3DError OpenGLRenderer_1_2::BeginRender(const GFX3D &engine)
// buffer. For GFX3D_QUADS and GFX3D_QUAD_STRIP, we also add additional // buffer. For GFX3D_QUADS and GFX3D_QUAD_STRIP, we also add additional
// vertices here to convert them to GL_TRIANGLES, which are much easier // vertices here to convert them to GL_TRIANGLES, which are much easier
// to work with and won't be deprecated in future OpenGL versions. // to work with and won't be deprecated in future OpenGL versions.
indexPtr[vertIndexCount++] = vertIndex; OGLRef.vertIndexBuffer[vertIndexCount++] = vertIndex;
if (!thePoly.isWireframe() && (thePoly.vtxFormat == GFX3D_QUADS || thePoly.vtxFormat == GFX3D_QUAD_STRIP)) if (!thePoly.isWireframe() && (thePoly.vtxFormat == GFX3D_QUADS || thePoly.vtxFormat == GFX3D_QUAD_STRIP))
{ {
if (j == 2) if (j == 2)
{ {
indexPtr[vertIndexCount++] = vertIndex; OGLRef.vertIndexBuffer[vertIndexCount++] = vertIndex;
} }
else if (j == 3) else if (j == 3)
{ {
indexPtr[vertIndexCount++] = thePoly.vertIndexes[0]; OGLRef.vertIndexBuffer[vertIndexCount++] = thePoly.vertIndexes[0];
} }
} }
} }
@ -4377,8 +4367,9 @@ Render3DError OpenGLRenderer_1_2::BeginRender(const GFX3D &engine)
if (this->isVBOSupported) if (this->isVBOSupported)
{ {
glUnmapBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB); // Replace the entire index buffer as a hint to the driver that we can orphan the index buffer and
glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, sizeof(VERT) * engine.vertListCount, engine.vertList); // avoid a synchronization cost.
glBufferSubDataARB(GL_ELEMENT_ARRAY_BUFFER_ARB, 0, sizeof(OGLRef.vertIndexBuffer), OGLRef.vertIndexBuffer);
} }
if (this->isShaderSupported) if (this->isShaderSupported)
@ -5262,11 +5253,6 @@ Render3DError OpenGLRenderer_1_2::Reset()
memset(OGLRef.color4fBuffer, 0, VERTLIST_SIZE * 4 * sizeof(GLfloat)); memset(OGLRef.color4fBuffer, 0, VERTLIST_SIZE * 4 * sizeof(GLfloat));
} }
if (OGLRef.vertIndexBuffer != NULL)
{
memset(OGLRef.vertIndexBuffer, 0, OGLRENDER_VERT_INDEX_BUFFER_COUNT * sizeof(GLushort));
}
this->_renderNeedsDepthEqualsTest = false; this->_renderNeedsDepthEqualsTest = false;
this->_currentPolyIndex = 0; this->_currentPolyIndex = 0;
@ -5514,9 +5500,6 @@ Render3DError OpenGLRenderer_2_0::InitFinalRenderStates(const std::set<std::stri
// Ignore our color buffer since we'll transfer the polygon alpha through a uniform. // Ignore our color buffer since we'll transfer the polygon alpha through a uniform.
OGLRef.color4fBuffer = NULL; OGLRef.color4fBuffer = NULL;
// VBOs are supported here, so just use the index buffer on the GPU.
OGLRef.vertIndexBuffer = NULL;
return OGLERROR_NOERR; return OGLERROR_NOERR;
} }
@ -5569,12 +5552,11 @@ Render3DError OpenGLRenderer_2_0::BeginRender(const GFX3D &engine)
glBindBuffer(GL_ARRAY_BUFFER, OGLRef.vboGeometryVtxID); glBindBuffer(GL_ARRAY_BUFFER, OGLRef.vboGeometryVtxID);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, OGLRef.iboGeometryIndexID); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, OGLRef.iboGeometryIndexID);
// Only copy as much vertex data as we need to, since this can be a potentially large upload size.
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(VERT) * engine.vertListCount, engine.vertList);
this->_renderNeedsDepthEqualsTest = false; this->_renderNeedsDepthEqualsTest = false;
for (size_t i = 0, vertIndexCount = 0; i < engine.polylist->count; i++)
size_t vertIndexCount = 0;
GLushort *indexPtr = (GLushort *)glMapBuffer(GL_ELEMENT_ARRAY_BUFFER, GL_WRITE_ONLY);
for (size_t i = 0; i < engine.polylist->count; i++)
{ {
const POLY &thePoly = engine.polylist->list[engine.indexlist.list[i]]; const POLY &thePoly = engine.polylist->list[engine.indexlist.list[i]];
const size_t polyType = thePoly.type; const size_t polyType = thePoly.type;
@ -5593,16 +5575,16 @@ Render3DError OpenGLRenderer_2_0::BeginRender(const GFX3D &engine)
// a buffer. For GFX3D_QUADS and GFX3D_QUAD_STRIP, we also add additional // a buffer. For GFX3D_QUADS and GFX3D_QUAD_STRIP, we also add additional
// vertices here to convert them to GL_TRIANGLES, which are much easier // vertices here to convert them to GL_TRIANGLES, which are much easier
// to work with and won't be deprecated in future OpenGL versions. // to work with and won't be deprecated in future OpenGL versions.
indexPtr[vertIndexCount++] = vertIndex; OGLRef.vertIndexBuffer[vertIndexCount++] = vertIndex;
if (!thePoly.isWireframe() && (thePoly.vtxFormat == GFX3D_QUADS || thePoly.vtxFormat == GFX3D_QUAD_STRIP)) if (!thePoly.isWireframe() && (thePoly.vtxFormat == GFX3D_QUADS || thePoly.vtxFormat == GFX3D_QUAD_STRIP))
{ {
if (j == 2) if (j == 2)
{ {
indexPtr[vertIndexCount++] = vertIndex; OGLRef.vertIndexBuffer[vertIndexCount++] = vertIndex;
} }
else if (j == 3) else if (j == 3)
{ {
indexPtr[vertIndexCount++] = thePoly.vertIndexes[0]; OGLRef.vertIndexBuffer[vertIndexCount++] = thePoly.vertIndexes[0];
} }
} }
} }
@ -5648,8 +5630,9 @@ Render3DError OpenGLRenderer_2_0::BeginRender(const GFX3D &engine)
} }
} }
glUnmapBuffer(GL_ELEMENT_ARRAY_BUFFER); // Replace the entire index buffer as a hint to the driver that we can orphan the index buffer and
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(VERT) * engine.vertListCount, engine.vertList); // avoid a synchronization cost.
glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(OGLRef.vertIndexBuffer), OGLRef.vertIndexBuffer);
// Setup render states // Setup render states
this->_geometryProgramFlags.EnableWDepth = (engine.renderState.wbuffer) ? 1 : 0; this->_geometryProgramFlags.EnableWDepth = (engine.renderState.wbuffer) ? 1 : 0;

View File

@ -569,7 +569,7 @@ struct OGLRenderRef
// Client-side Buffers // Client-side Buffers
GLfloat *color4fBuffer; GLfloat *color4fBuffer;
GLushort *vertIndexBuffer; CACHE_ALIGN GLushort vertIndexBuffer[OGLRENDER_VERT_INDEX_BUFFER_COUNT];
CACHE_ALIGN GLushort workingCIColorBuffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT]; CACHE_ALIGN GLushort workingCIColorBuffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];
CACHE_ALIGN GLuint workingCIDepthStencilBuffer[2][GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT]; CACHE_ALIGN GLuint workingCIDepthStencilBuffer[2][GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];
CACHE_ALIGN GLuint workingCIFogAttributesBuffer[2][GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT]; CACHE_ALIGN GLuint workingCIFogAttributesBuffer[2][GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];

View File

@ -1401,7 +1401,6 @@ Render3DError OpenGLRenderer_3_2::CreateGeometryPrograms()
glGenBuffers(1, &OGLRef.tboPolyStatesID); glGenBuffers(1, &OGLRef.tboPolyStatesID);
glBindBuffer(GL_TEXTURE_BUFFER, OGLRef.tboPolyStatesID); glBindBuffer(GL_TEXTURE_BUFFER, OGLRef.tboPolyStatesID);
glBufferData(GL_TEXTURE_BUFFER, POLYLIST_SIZE * sizeof(OGLPolyStates), NULL, GL_DYNAMIC_DRAW); glBufferData(GL_TEXTURE_BUFFER, POLYLIST_SIZE * sizeof(OGLPolyStates), NULL, GL_DYNAMIC_DRAW);
glBindBuffer(GL_TEXTURE_BUFFER, 0);
glGenTextures(1, &OGLRef.texPolyStatesID); glGenTextures(1, &OGLRef.texPolyStatesID);
glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_PolyStates); glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_PolyStates);
@ -2107,63 +2106,65 @@ Render3DError OpenGLRenderer_3_2::BeginRender(const GFX3D &engine)
return OGLERROR_BEGINGL_FAILED; return OGLERROR_BEGINGL_FAILED;
} }
glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_PolyStates);
glBindBuffer(GL_ARRAY_BUFFER, OGLRef.vboGeometryVtxID); glBindBuffer(GL_ARRAY_BUFFER, OGLRef.vboGeometryVtxID);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, OGLRef.iboGeometryIndexID); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, OGLRef.iboGeometryIndexID);
glBindBuffer(GL_TEXTURE_BUFFER, OGLRef.tboPolyStatesID);
glBindBuffer(GL_UNIFORM_BUFFER, OGLRef.uboRenderStatesID);
// Copy the vertex data. // Copy the vertex data to the GPU asynchronously due to the potentially large upload size.
// This buffer write will need to be synchronized before we start drawing.
if (this->_syncBufferSetup != NULL)
{
glWaitSync(this->_syncBufferSetup, 0, GL_TIMEOUT_IGNORED);
glDeleteSync(this->_syncBufferSetup);
}
const size_t vtxBufferSize = sizeof(VERT) * engine.vertListCount; const size_t vtxBufferSize = sizeof(VERT) * engine.vertListCount;
VERT *vtxPtr = (VERT *)glMapBufferRange(GL_ARRAY_BUFFER, 0, vtxBufferSize, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT); VERT *vtxPtr = (VERT *)glMapBufferRange(GL_ARRAY_BUFFER, 0, vtxBufferSize, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
memcpy(vtxPtr, engine.vertList, vtxBufferSize); memcpy(vtxPtr, engine.vertList, vtxBufferSize);
glUnmapBuffer(GL_ARRAY_BUFFER); glUnmapBuffer(GL_ARRAY_BUFFER);
// Set up rendering states that will remain constant for the entire frame. this->_syncBufferSetup = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
OGLRenderStates *state = (OGLRenderStates *)glMapBufferRange(GL_UNIFORM_BUFFER, 0, sizeof(OGLRenderStates), GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
state->enableAntialiasing = (engine.renderState.enableAntialiasing) ? GL_TRUE : GL_FALSE; // Set up rendering states that will remain constant for the entire frame.
state->enableFogAlphaOnly = (engine.renderState.enableFogAlphaOnly) ? GL_TRUE : GL_FALSE; this->_pendingRenderStates.enableAntialiasing = (engine.renderState.enableAntialiasing) ? GL_TRUE : GL_FALSE;
state->clearPolyID = this->_clearAttributes.opaquePolyID; this->_pendingRenderStates.enableFogAlphaOnly = (engine.renderState.enableFogAlphaOnly) ? GL_TRUE : GL_FALSE;
state->clearDepth = (GLfloat)this->_clearAttributes.depth / (GLfloat)0x00FFFFFF; this->_pendingRenderStates.clearPolyID = this->_clearAttributes.opaquePolyID;
state->alphaTestRef = divide5bitBy31_LUT[engine.renderState.alphaTestRef]; this->_pendingRenderStates.clearDepth = (GLfloat)this->_clearAttributes.depth / (GLfloat)0x00FFFFFF;
state->fogColor.r = divide5bitBy31_LUT[(engine.renderState.fogColor ) & 0x0000001F]; this->_pendingRenderStates.alphaTestRef = divide5bitBy31_LUT[engine.renderState.alphaTestRef];
state->fogColor.g = divide5bitBy31_LUT[(engine.renderState.fogColor >> 5) & 0x0000001F]; this->_pendingRenderStates.fogColor.r = divide5bitBy31_LUT[(engine.renderState.fogColor ) & 0x0000001F];
state->fogColor.b = divide5bitBy31_LUT[(engine.renderState.fogColor >> 10) & 0x0000001F]; this->_pendingRenderStates.fogColor.g = divide5bitBy31_LUT[(engine.renderState.fogColor >> 5) & 0x0000001F];
state->fogColor.a = divide5bitBy31_LUT[(engine.renderState.fogColor >> 16) & 0x0000001F]; this->_pendingRenderStates.fogColor.b = divide5bitBy31_LUT[(engine.renderState.fogColor >> 10) & 0x0000001F];
state->fogOffset = (GLfloat)(engine.renderState.fogOffset & 0x7FFF) / 32767.0f; this->_pendingRenderStates.fogColor.a = divide5bitBy31_LUT[(engine.renderState.fogColor >> 16) & 0x0000001F];
state->fogStep = (GLfloat)(0x0400 >> engine.renderState.fogShift) / 32767.0f; this->_pendingRenderStates.fogOffset = (GLfloat)(engine.renderState.fogOffset & 0x7FFF) / 32767.0f;
this->_pendingRenderStates.fogStep = (GLfloat)(0x0400 >> engine.renderState.fogShift) / 32767.0f;
for (size_t i = 0; i < 32; i++) for (size_t i = 0; i < 32; i++)
{ {
state->fogDensity[i].r = (engine.renderState.fogDensityTable[i] == 127) ? 1.0f : (GLfloat)engine.renderState.fogDensityTable[i] / 128.0f; this->_pendingRenderStates.fogDensity[i].r = (engine.renderState.fogDensityTable[i] == 127) ? 1.0f : (GLfloat)engine.renderState.fogDensityTable[i] / 128.0f;
state->fogDensity[i].g = 0.0f; this->_pendingRenderStates.fogDensity[i].g = 0.0f;
state->fogDensity[i].b = 0.0f; this->_pendingRenderStates.fogDensity[i].b = 0.0f;
state->fogDensity[i].a = 0.0f; this->_pendingRenderStates.fogDensity[i].a = 0.0f;
} }
const GLfloat edgeColorAlpha = (engine.renderState.enableAntialiasing) ? (16.0f/31.0f) : 1.0f; const GLfloat edgeColorAlpha = (engine.renderState.enableAntialiasing) ? (16.0f/31.0f) : 1.0f;
for (size_t i = 0; i < 8; i++) for (size_t i = 0; i < 8; i++)
{ {
state->edgeColor[i].r = divide5bitBy31_LUT[(engine.renderState.edgeMarkColorTable[i] ) & 0x001F]; this->_pendingRenderStates.edgeColor[i].r = divide5bitBy31_LUT[(engine.renderState.edgeMarkColorTable[i] ) & 0x001F];
state->edgeColor[i].g = divide5bitBy31_LUT[(engine.renderState.edgeMarkColorTable[i] >> 5) & 0x001F]; this->_pendingRenderStates.edgeColor[i].g = divide5bitBy31_LUT[(engine.renderState.edgeMarkColorTable[i] >> 5) & 0x001F];
state->edgeColor[i].b = divide5bitBy31_LUT[(engine.renderState.edgeMarkColorTable[i] >> 10) & 0x001F]; this->_pendingRenderStates.edgeColor[i].b = divide5bitBy31_LUT[(engine.renderState.edgeMarkColorTable[i] >> 10) & 0x001F];
state->edgeColor[i].a = edgeColorAlpha; this->_pendingRenderStates.edgeColor[i].a = edgeColorAlpha;
} }
for (size_t i = 0; i < 32; i++) for (size_t i = 0; i < 32; i++)
{ {
state->toonColor[i].r = divide5bitBy31_LUT[(engine.renderState.u16ToonTable[i] ) & 0x001F]; this->_pendingRenderStates.toonColor[i].r = divide5bitBy31_LUT[(engine.renderState.u16ToonTable[i] ) & 0x001F];
state->toonColor[i].g = divide5bitBy31_LUT[(engine.renderState.u16ToonTable[i] >> 5) & 0x001F]; this->_pendingRenderStates.toonColor[i].g = divide5bitBy31_LUT[(engine.renderState.u16ToonTable[i] >> 5) & 0x001F];
state->toonColor[i].b = divide5bitBy31_LUT[(engine.renderState.u16ToonTable[i] >> 10) & 0x001F]; this->_pendingRenderStates.toonColor[i].b = divide5bitBy31_LUT[(engine.renderState.u16ToonTable[i] >> 10) & 0x001F];
state->toonColor[i].a = 1.0f; this->_pendingRenderStates.toonColor[i].a = 1.0f;
} }
glUnmapBuffer(GL_UNIFORM_BUFFER); glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(this->_pendingRenderStates), &this->_pendingRenderStates);
// Set up the polygon states. // Set up the polygon states.
GLushort *indexPtr = (GLushort *)glMapBufferRange(GL_ELEMENT_ARRAY_BUFFER, 0, engine.polylist->count * 6 * sizeof(GLushort), GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
this->_renderNeedsDepthEqualsTest = false; this->_renderNeedsDepthEqualsTest = false;
for (size_t i = 0, vertIndexCount = 0; i < engine.polylist->count; i++) for (size_t i = 0, vertIndexCount = 0; i < engine.polylist->count; i++)
{ {
@ -2184,16 +2185,16 @@ Render3DError OpenGLRenderer_3_2::BeginRender(const GFX3D &engine)
// a buffer. For GFX3D_QUADS and GFX3D_QUAD_STRIP, we also add additional // a buffer. For GFX3D_QUADS and GFX3D_QUAD_STRIP, we also add additional
// vertices here to convert them to GL_TRIANGLES, which are much easier // vertices here to convert them to GL_TRIANGLES, which are much easier
// to work with and won't be deprecated in future OpenGL versions. // to work with and won't be deprecated in future OpenGL versions.
indexPtr[vertIndexCount++] = vertIndex; OGLRef.vertIndexBuffer[vertIndexCount++] = vertIndex;
if (!thePoly.isWireframe() && (thePoly.vtxFormat == GFX3D_QUADS || thePoly.vtxFormat == GFX3D_QUAD_STRIP)) if (!thePoly.isWireframe() && (thePoly.vtxFormat == GFX3D_QUADS || thePoly.vtxFormat == GFX3D_QUAD_STRIP))
{ {
if (j == 2) if (j == 2)
{ {
indexPtr[vertIndexCount++] = vertIndex; OGLRef.vertIndexBuffer[vertIndexCount++] = vertIndex;
} }
else if (j == 3) else if (j == 3)
{ {
indexPtr[vertIndexCount++] = thePoly.vertIndexes[0]; OGLRef.vertIndexBuffer[vertIndexCount++] = thePoly.vertIndexes[0];
} }
} }
} }
@ -2238,16 +2239,10 @@ Render3DError OpenGLRenderer_3_2::BeginRender(const GFX3D &engine)
this->_willForceTextureSampleClampT[i] = this->_willForceTextureSampleClampT[i] && ( ((tc[3].y > -0.0001f) && (tc[3].y < 0.0001f)) || ((tc[3].y > 0.9999f) && (tc[3].y < 1.0001f)) ); this->_willForceTextureSampleClampT[i] = this->_willForceTextureSampleClampT[i] && ( ((tc[3].y > -0.0001f) && (tc[3].y < 0.0001f)) || ((tc[3].y > 0.9999f) && (tc[3].y < 1.0001f)) );
} }
} }
glUnmapBuffer(GL_ELEMENT_ARRAY_BUFFER);
// Since we used GL_MAP_UNSYNCHRONIZED_BIT with the previous buffers, // Replace the entire index buffer as a hint to the driver that we can orphan the index buffer and
// we will need to synchronize the buffer writes before we start drawing. // avoid a synchronization cost.
if (this->_syncBufferSetup != NULL) glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(OGLRef.vertIndexBuffer), OGLRef.vertIndexBuffer);
{
glWaitSync(this->_syncBufferSetup, 0, GL_TIMEOUT_IGNORED);
glDeleteSync(this->_syncBufferSetup);
}
this->_syncBufferSetup = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
// Some drivers seem to have problems with glMapBufferRange() and GL_TEXTURE_BUFFER, causing // Some drivers seem to have problems with glMapBufferRange() and GL_TEXTURE_BUFFER, causing
// certain polygons to intermittently flicker in certain games. Therefore, we'll use glMapBuffer() // certain polygons to intermittently flicker in certain games. Therefore, we'll use glMapBuffer()

View File

@ -63,6 +63,7 @@ class OpenGLRenderer_3_2 : public OpenGLRenderer_2_1
{ {
protected: protected:
GLsync _syncBufferSetup; GLsync _syncBufferSetup;
CACHE_ALIGN OGLRenderStates _pendingRenderStates;
virtual Render3DError InitExtensions(); virtual Render3DError InitExtensions();

View File

@ -3098,6 +3098,20 @@ typedef struct
}; };
} IPv4Header; } IPv4Header;
// The maximum possible size of any 802.11 frame is 2346 bytes:
// - Max MTU is 2304 bytes
// - Max 802.11 header size is 30 bytes
// - WEP Encryption Header size is 8 bytes
// - FCS size is 4 bytes
#define MAX_PACKET_SIZE_80211 2346
// Given a connection of 2 megabits per second, we take ~4 microseconds to transfer a byte.
// This works out to needing ~8 microseconds to transfer a halfword.
#define TX_LATENCY_LIMIT 8
#define RX_LATENCY_LIMIT 8
// NDS Frame Header Information // NDS Frame Header Information
typedef struct typedef struct
{ {
@ -3111,6 +3125,19 @@ typedef struct
u16 length; // Total length of header+body+checksum, in bytes. u16 length; // Total length of header+body+checksum, in bytes.
} TXPacketHeader; } TXPacketHeader;
typedef union
{
u8 rawFrameData[sizeof(TXPacketHeader) + MAX_PACKET_SIZE_80211 + sizeof(u16) + sizeof(u16)];
struct
{
TXPacketHeader txHeader;
u8 txData[MAX_PACKET_SIZE_80211];
u16 remainingBytes;
u16 latencyCount;
};
} TXBufferedPacket;
typedef union typedef union
{ {
u16 value; u16 value;
@ -3139,17 +3166,6 @@ typedef struct
u8 rssiMin; u8 rssiMin;
} RXPacketHeader; } RXPacketHeader;
// The maximum possible size of any 802.11 frame is 2346 bytes:
// - Max MTU is 2304 bytes
// - Max 802.11 header size is 30 bytes
// - WEP Encryption Header size is 8 bytes
// - FCS size is 4 bytes
#define MAX_PACKET_SIZE_80211 2346
// Given a connection of 2 megabits per second, we take ~4 microseconds to transfer a byte.
// This works out to needing ~8 microseconds to transfer a halfword.
#define RX_LATENCY_LIMIT 8
typedef union typedef union
{ {
u8 rawFrameData[sizeof(RXPacketHeader) + MAX_PACKET_SIZE_80211 + sizeof(u16)]; u8 rawFrameData[sizeof(RXPacketHeader) + MAX_PACKET_SIZE_80211 + sizeof(u16)];