GFX3D: Reduce the memory requirement of the vertex lists to one-third of its previous size.

- Has the positive side-effect of improving the OpenGL renderer's performance when many vertices are used.
- Also fix the vertex list double-buffering so that it actually works as intended.
This commit is contained in:
rogerman 2017-10-04 16:55:08 -07:00
parent f4c01c4b57
commit e7d63e6cd2
7 changed files with 143 additions and 104 deletions

View File

@ -3579,9 +3579,9 @@ Render3DError OpenGLRenderer_1_2::BeginRender(const GFX3D &engine)
else
{
// If VBOs aren't supported, we need to use the client-side buffers here.
OGLRef.vtxPtrPosition = &engine.vertlist->list[0].coord;
OGLRef.vtxPtrTexCoord = &engine.vertlist->list[0].texcoord;
OGLRef.vtxPtrColor = (this->isShaderSupported) ? (GLvoid *)&engine.vertlist->list[0].color : OGLRef.color4fBuffer;
OGLRef.vtxPtrPosition = &engine.vertList[0].coord;
OGLRef.vtxPtrTexCoord = &engine.vertList[0].texcoord;
OGLRef.vtxPtrColor = (this->isShaderSupported) ? (GLvoid *)&engine.vertList[0].color : OGLRef.color4fBuffer;
indexPtr = OGLRef.vertIndexBuffer;
}
@ -3627,7 +3627,7 @@ Render3DError OpenGLRenderer_1_2::BeginRender(const GFX3D &engine)
// Consolidate the vertex color and the poly alpha to our internal color buffer
// so that OpenGL can use it.
const VERT *vert = &engine.vertlist->list[vertIndex];
const VERT *vert = &engine.vertList[vertIndex];
OGLRef.color4fBuffer[colorIndex+0] = material_8bit_to_float[vert->color[0]];
OGLRef.color4fBuffer[colorIndex+1] = material_8bit_to_float[vert->color[1]];
OGLRef.color4fBuffer[colorIndex+2] = material_8bit_to_float[vert->color[2]];
@ -3658,7 +3658,7 @@ Render3DError OpenGLRenderer_1_2::BeginRender(const GFX3D &engine)
if (this->isVBOSupported)
{
glUnmapBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB);
glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, sizeof(VERT) * engine.vertlist->count, engine.vertlist);
glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, sizeof(VERT) * engine.vertListCount, engine.vertList);
}
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
@ -4661,7 +4661,7 @@ Render3DError OpenGLRenderer_2_0::BeginRender(const GFX3D &engine)
}
glUnmapBuffer(GL_ELEMENT_ARRAY_BUFFER);
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(VERT) * engine.vertlist->count, engine.vertlist);
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(VERT) * engine.vertListCount, engine.vertList);
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glDepthMask(GL_TRUE);

View File

@ -545,7 +545,6 @@ struct OGLRenderRef
};
struct GFX3D_State;
struct VERTLIST;
struct POLYLIST;
struct INDEXLIST;
struct POLY;

View File

@ -1658,9 +1658,9 @@ Render3DError OpenGLRenderer_3_2::BeginRender(const GFX3D &engine)
glUnmapBuffer(GL_ELEMENT_ARRAY_BUFFER);
glUnmapBuffer(GL_TEXTURE_BUFFER);
const size_t vtxBufferSize = sizeof(VERT) * engine.vertlist->count;
const size_t vtxBufferSize = sizeof(VERT) * engine.vertListCount;
VERT *vtxPtr = (VERT *)glMapBufferRange(GL_ARRAY_BUFFER, 0, vtxBufferSize, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
memcpy(vtxPtr, engine.vertlist, vtxBufferSize);
memcpy(vtxPtr, engine.vertList, vtxBufferSize);
glUnmapBuffer(GL_ARRAY_BUFFER);
glUseProgram(OGLRef.programGeometryID);

View File

@ -367,8 +367,10 @@ static CACHE_ALIGN s32 cacheHalfVector[4][4];
//-------------poly and vertex lists and such things
POLYLIST* polylists = NULL;
POLYLIST* polylist = NULL;
VERTLIST* vertlists = NULL;
VERTLIST* vertlist = NULL;
VERT *vertLists = NULL;
VERT *vertList = NULL;
size_t vertListCount[2] = {0, 0};
int polygonListCompleted = 0;
static int listTwiddle = 1;
@ -391,10 +393,10 @@ static void twiddleLists()
listTwiddle++;
listTwiddle &= 1;
polylist = &polylists[listTwiddle];
vertlist = &vertlists[listTwiddle];
vertList = vertLists + (VERTLIST_SIZE * listTwiddle);
polylist->count = 0;
polylist->opaqueCount = 0;
vertlist->count = 0;
vertListCount[listTwiddle] = 0;
}
static BOOL drawPending = FALSE;
@ -515,7 +517,7 @@ void gfx3d_init()
// Use malloc() instead of new because, for some unknown reason, GCC 4.9 has a bug
// that causes a std::bad_alloc exception on certain memory allocations. Right now,
// POLYLIST and VERTLIST are POD-style structs, so malloc() can substitute for new
// POLYLIST and VERT are POD-style structs, so malloc() can substitute for new
// in this case.
if (polylists == NULL)
{
@ -523,10 +525,13 @@ void gfx3d_init()
polylist = &polylists[0];
}
if(vertlists == NULL)
if (vertLists == NULL)
{
vertlists = (VERTLIST *)malloc(sizeof(VERTLIST)*2);
vertlist = &vertlists[0];
vertLists = (VERT *)malloc_alignedPage(VERTLIST_SIZE * sizeof(VERT) * 2);
vertList = vertLists;
vertListCount[0] = 0;
vertListCount[1] = 0;
}
gfx3d.state.savedDISP3DCNT.value = 0;
@ -547,9 +552,9 @@ void gfx3d_deinit()
polylists = NULL;
polylist = NULL;
free(vertlists);
vertlists = NULL;
vertlist = NULL;
free_aligned(vertLists);
vertLists = NULL;
vertList = NULL;
}
void gfx3d_reset()
@ -571,12 +576,13 @@ void gfx3d_reset()
drawPending = FALSE;
memset(polylists, 0, sizeof(POLYLIST)*2);
memset(vertlists, 0, sizeof(VERTLIST)*2);
memset(vertLists, 0, VERTLIST_SIZE * sizeof(VERT) * 2);
gfx3d.state.invalidateToon = true;
listTwiddle = 1;
twiddleLists();
gfx3d.polylist = polylist;
gfx3d.vertlist = vertlist;
gfx3d.vertList = vertList;
gfx3d.vertListCount = vertListCount[listTwiddle];
polyAttr = 0;
textureFormat = 0;
@ -726,7 +732,7 @@ static void SetVertex()
//refuse to do anything if we have too many verts or polys
polygonListCompleted = 0;
if(vertlist->count >= VERTLIST_SIZE)
if(vertListCount[listTwiddle] >= VERTLIST_SIZE)
return;
if(polylist->count >= POLYLIST_SIZE)
return;
@ -745,13 +751,13 @@ static void SetVertex()
//record the vertex
//VERT &vert = tempVertList.list[tempVertList.count];
const size_t vertIndex = vertlist->count + tempVertInfo.count - continuation;
const size_t vertIndex = vertListCount[listTwiddle] + tempVertInfo.count - continuation;
if (vertIndex >= VERTLIST_SIZE)
{
printf("wtf\n");
}
VERT &vert = vertlist->list[vertIndex];
VERT &vert = vertList[vertIndex];
//printf("%f %f %f\n",coordTransformed[0],coordTransformed[1],coordTransformed[2]);
//if(coordTransformed[1] > 20)
@ -775,7 +781,7 @@ static void SetVertex()
vert.color[1] = GFX3D_5TO6_LOOKUP(colorRGB[1]);
vert.color[2] = GFX3D_5TO6_LOOKUP(colorRGB[2]);
vert.color_to_float();
tempVertInfo.map[tempVertInfo.count] = vertlist->count + tempVertInfo.count - continuation;
tempVertInfo.map[tempVertInfo.count] = vertListCount[listTwiddle] + tempVertInfo.count - continuation;
tempVertInfo.count++;
//possibly complete a polygon
@ -791,7 +797,7 @@ static void SetVertex()
SUBMITVERTEX(0,0);
SUBMITVERTEX(1,1);
SUBMITVERTEX(2,2);
vertlist->count+=3;
vertListCount[listTwiddle] += 3;
polylist->list[polylist->count].type = POLYGON_TYPE_TRIANGLE;
tempVertInfo.count = 0;
break;
@ -804,7 +810,7 @@ static void SetVertex()
SUBMITVERTEX(1,1);
SUBMITVERTEX(2,2);
SUBMITVERTEX(3,3);
vertlist->count+=4;
vertListCount[listTwiddle] += 4;
polylist->list[polylist->count].type = POLYGON_TYPE_QUAD;
tempVertInfo.count = 0;
break;
@ -819,14 +825,14 @@ static void SetVertex()
polylist->list[polylist->count].type = POLYGON_TYPE_TRIANGLE;
if(triStripToggle)
tempVertInfo.map[1] = vertlist->count+2-continuation;
tempVertInfo.map[1] = vertListCount[listTwiddle]+2-continuation;
else
tempVertInfo.map[0] = vertlist->count+2-continuation;
tempVertInfo.map[0] = vertListCount[listTwiddle]+2-continuation;
if(tempVertInfo.first)
vertlist->count+=3;
vertListCount[listTwiddle] += 3;
else
vertlist->count+=1;
vertListCount[listTwiddle] += 1;
triStripToggle ^= 1;
tempVertInfo.first = false;
@ -842,11 +848,11 @@ static void SetVertex()
SUBMITVERTEX(2,3);
SUBMITVERTEX(3,2);
polylist->list[polylist->count].type = POLYGON_TYPE_QUAD;
tempVertInfo.map[0] = vertlist->count+2-continuation;
tempVertInfo.map[1] = vertlist->count+3-continuation;
tempVertInfo.map[0] = vertListCount[listTwiddle]+2-continuation;
tempVertInfo.map[1] = vertListCount[listTwiddle]+3-continuation;
if(tempVertInfo.first)
vertlist->count+=4;
else vertlist->count+=2;
vertListCount[listTwiddle] += 4;
else vertListCount[listTwiddle] += 2;
tempVertInfo.first = false;
tempVertInfo.count = 2;
break;
@ -866,9 +872,9 @@ static void SetVertex()
if (!(textureFormat & (7 << 26))) // no texture
{
bool duplicated = false;
const VERT &vert0 = vertlist->list[poly.vertIndexes[0]];
const VERT &vert1 = vertlist->list[poly.vertIndexes[1]];
const VERT &vert2 = vertlist->list[poly.vertIndexes[2]];
const VERT &vert0 = vertList[poly.vertIndexes[0]];
const VERT &vert1 = vertList[poly.vertIndexes[1]];
const VERT &vert2 = vertList[poly.vertIndexes[2]];
if ( (vert0.x == vert1.x) && (vert0.y == vert1.y) ) duplicated = true;
else
if ( (vert1.x == vert2.x) && (vert1.y == vert2.y) ) duplicated = true;
@ -1823,7 +1829,7 @@ int gfx3d_GetNumPolys()
int gfx3d_GetNumVertex()
{
//so is this in the currently-displayed or currently-built list?
return (vertlists[listTwiddle].count);
return (vertListCount[listTwiddle]);
}
void gfx3d_UpdateToonTable(u8 offset, u16 val)
@ -2226,7 +2232,8 @@ static void gfx3d_doFlush()
//the renderer will get the lists we just built
gfx3d.polylist = polylist;
gfx3d.vertlist = vertlist;
gfx3d.vertList = vertList;
gfx3d.vertListCount = vertListCount[listTwiddle];
//and also our current render state
gfx3d.state.sortmode = BIT0(gfx3d.state.activeFlushCommand);
@ -2252,8 +2259,8 @@ static void gfx3d_doFlush()
const size_t polycount = polylist->count;
#ifdef _SHOW_VTX_COUNTERS
max_polys = max((u32)polycount, max_polys);
max_verts = max((u32)vertlist->count, max_verts);
osd->addFixed(180, 20, "%i/%i", polycount, vertlist->count); // current
max_verts = max((u32)vertListCount[listTwiddle], max_verts);
osd->addFixed(180, 20, "%i/%i", polycount, vertListCount[listTwiddle]); // current
osd->addFixed(180, 35, "%i/%i", max_polys, max_verts); // max
#endif
@ -2269,15 +2276,15 @@ static void gfx3d_doFlush()
// If both of these questions answer to yes, then how does the NDS handle a NaN?
// For now, simply prevent w from being zero.
POLY &poly = polylist->list[i];
float verty = vertlist->list[poly.vertIndexes[0]].y;
float vertw = (vertlist->list[poly.vertIndexes[0]].w != 0.0f) ? vertlist->list[poly.vertIndexes[0]].w : 0.00000001f;
float verty = vertList[poly.vertIndexes[0]].y;
float vertw = (vertList[poly.vertIndexes[0]].w != 0.0f) ? vertList[poly.vertIndexes[0]].w : 0.00000001f;
verty = 1.0f-(verty+vertw)/(2*vertw);
poly.miny = poly.maxy = verty;
for (size_t j = 1; j < poly.type; j++)
{
verty = vertlist->list[poly.vertIndexes[j]].y;
vertw = (vertlist->list[poly.vertIndexes[j]].w != 0.0f) ? vertlist->list[poly.vertIndexes[j]].w : 0.00000001f;
verty = vertList[poly.vertIndexes[j]].y;
vertw = (vertList[poly.vertIndexes[j]].w != 0.0f) ? vertList[poly.vertIndexes[j]].w : 0.00000001f;
verty = 1.0f-(verty+vertw)/(2*vertw);
poly.miny = min(poly.miny, verty);
poly.maxy = max(poly.maxy, verty);
@ -2330,8 +2337,11 @@ static void gfx3d_doFlush()
viewer3d_state->frameNumber = currFrameCounter;
viewer3d_state->state = gfx3d.state;
viewer3d_state->polylist = *gfx3d.polylist;
viewer3d_state->vertlist = *gfx3d.vertlist;
viewer3d_state->indexlist = gfx3d.indexlist;
viewer3d_state->vertListCount = gfx3d.vertListCount;
memcpy(viewer3d_state->vertList, gfx3d.vertList, gfx3d.vertListCount * sizeof(VERT));
driver->view3d->NewFrame();
}
@ -2620,9 +2630,9 @@ void gfx3d_savestate(EMUFILE &os)
os.write_32LE(4);
//dump the render lists
os.write_32LE((u32)vertlist->count);
for (size_t i = 0; i < vertlist->count; i++)
vertlist->list[i].save(os);
os.write_32LE((u32)vertListCount[listTwiddle]);
for (size_t i = 0; i < vertListCount[listTwiddle]; i++)
vertList[i].save(os);
os.write_32LE((u32)polylist->count);
for (size_t i = 0; i < polylist->count; i++)
@ -2676,8 +2686,8 @@ bool gfx3d_loadstate(EMUFILE &is, int size)
//jiggle the lists. and also wipe them. this is clearly not the best thing to be doing.
listTwiddle = 0;
polylist = &polylists[listTwiddle];
vertlist = &vertlists[listTwiddle];
polylist = &polylists[0];
vertList = vertLists;
gfx3d_parseCurrentDISP3DCNT();
@ -2687,9 +2697,9 @@ bool gfx3d_loadstate(EMUFILE &is, int size)
u32 polyListCount32 = 0;
is.read_32LE(vertListCount32);
vertlist->count = vertListCount32;
for (size_t i = 0; i < vertlist->count; i++)
vertlist->list[i].load(is);
vertListCount[0] = vertListCount32;
for (size_t i = 0; i < vertListCount[0]; i++)
vertList[i].load(is);
is.read_32LE(polyListCount32);
polylist->count = polyListCount32;
@ -2714,9 +2724,9 @@ bool gfx3d_loadstate(EMUFILE &is, int size)
}
gfx3d.polylist = &polylists[listTwiddle^1];
gfx3d.vertlist = &vertlists[listTwiddle^1];
gfx3d.vertList = vertLists + VERTLIST_SIZE;
gfx3d.polylist->count = 0;
gfx3d.vertlist->count = 0;
gfx3d.vertListCount = 0;
if (version >= 4)
{

View File

@ -478,6 +478,8 @@ struct POLY {
};
#define POLYLIST_SIZE 20000
#define VERTLIST_SIZE (POLYLIST_SIZE * 4)
struct POLYLIST {
POLY list[POLYLIST_SIZE];
size_t count;
@ -505,59 +507,86 @@ struct VERT_POS4f
}
};
//dont use SSE optimized matrix instructions in here, things might not be aligned
//we havent padded this because the sheer bulk of data leaves things running faster without the extra bloat
struct VERT {
// Align to 16 for SSE instructions to work
union {
#include "PACKED.h"
// This struct is padded in such a way so that each component can be accessed with a 16-byte alignment.
struct VERT
{
union
{
float coord[4];
struct {
struct
{
float x, y, z, w;
};
} CACHE_ALIGN;
union {
float texcoord[2];
struct {
float u,v;
};
} CACHE_ALIGN;
void set_coord(float x, float y, float z, float w) {
union
{
float texcoord[4];
struct
{
float u, v, tcPad2, tcPad3;
};
};
union
{
float fcolor[4];
struct
{
float rf, gf, bf, af; // The alpha value is unused and only exists for padding purposes.
};
};
union
{
u32 color32;
u8 color[4];
struct
{
u8 r, g, b, a; // The alpha value is unused and only exists for padding purposes.
};
};
u8 padFinal[12]; // Final padding to bring the struct to exactly 64 bytes.
void set_coord(float x, float y, float z, float w)
{
this->x = x;
this->y = y;
this->z = z;
this->w = w;
}
void set_coord(float* coords) {
void set_coord(float* coords)
{
x = coords[0];
y = coords[1];
z = coords[2];
w = coords[3];
}
float fcolor[3];
u8 color[3];
void color_to_float() {
fcolor[0] = color[0];
fcolor[1] = color[1];
fcolor[2] = color[2];
void color_to_float()
{
rf = (float)r;
gf = (float)g;
bf = (float)b;
af = (float)a;
}
void save(EMUFILE &os);
void load(EMUFILE &is);
};
#define VERTLIST_SIZE (POLYLIST_SIZE * 4)
struct VERTLIST {
VERT list[VERTLIST_SIZE];
size_t count;
};
#include "PACKED_END.h"
#define INDEXLIST_SIZE (POLYLIST_SIZE * 4)
struct INDEXLIST {
int list[INDEXLIST_SIZE];
};
struct VIEWPORT {
u8 x, y;
u16 width, height;
@ -660,9 +689,11 @@ struct Viewer3d_State
{
int frameNumber;
GFX3D_State state;
VERTLIST vertlist;
VERT vertList[VERTLIST_SIZE];
POLYLIST polylist;
INDEXLIST indexlist;
size_t vertListCount;
};
extern Viewer3d_State* viewer3d_state;
@ -670,8 +701,8 @@ extern Viewer3d_State* viewer3d_state;
struct GFX3D
{
GFX3D()
: polylist(0)
, vertlist(0)
: polylist(NULL)
, vertList(NULL)
, render3DFrameCount(0) {
}
@ -682,9 +713,10 @@ struct GFX3D
GFX3D_State renderState;
POLYLIST *polylist;
VERTLIST* vertlist;
VERT *vertList;
INDEXLIST indexlist;
size_t vertListCount;
u32 render3DFrameCount; // Increments when gfx3d_doFlush() is called. Resets every 60 video frames.
};
extern GFX3D gfx3d;

View File

@ -1474,7 +1474,7 @@ Render3DError SoftRasterizerRenderer::InitTables()
}
template<bool USEHIRESINTERPOLATE>
size_t SoftRasterizerRenderer::performClipping(const VERTLIST *vertList, const POLYLIST *polyList, const INDEXLIST *indexList)
size_t SoftRasterizerRenderer::performClipping(const VERT *vertList, const POLYLIST *polyList, const INDEXLIST *indexList)
{
//submit all polys to clipper
clipper.reset();
@ -1482,12 +1482,10 @@ size_t SoftRasterizerRenderer::performClipping(const VERTLIST *vertList, const P
{
const POLY &poly = polyList->list[indexList->list[i]];
const VERT *clipVerts[4] = {
&vertList->list[poly.vertIndexes[0]],
&vertList->list[poly.vertIndexes[1]],
&vertList->list[poly.vertIndexes[2]],
poly.type==POLYGON_TYPE_QUAD
?&vertList->list[poly.vertIndexes[3]]
:NULL
&vertList[poly.vertIndexes[0]],
&vertList[poly.vertIndexes[1]],
&vertList[poly.vertIndexes[2]],
(poly.type == POLYGON_TYPE_QUAD) ? &vertList[poly.vertIndexes[3]] : NULL
};
clipper.clipPoly<USEHIRESINTERPOLATE>(poly, clipVerts);
@ -1651,11 +1649,11 @@ Render3DError SoftRasterizerRenderer::BeginRender(const GFX3D &engine)
if (CommonSettings.GFX3D_HighResolutionInterpolateColor)
{
this->_clippedPolyCount = this->performClipping<true>(engine.vertlist, engine.polylist, &engine.indexlist);
this->_clippedPolyCount = this->performClipping<true>(engine.vertList, engine.polylist, &engine.indexlist);
}
else
{
this->_clippedPolyCount = this->performClipping<false>(engine.vertlist, engine.polylist, &engine.indexlist);
this->_clippedPolyCount = this->performClipping<false>(engine.vertList, engine.polylist, &engine.indexlist);
}
if (rasterizerCores >= 4)

View File

@ -101,7 +101,7 @@ protected:
// SoftRasterizer-specific methods
virtual Render3DError InitTables();
template<bool USEHIRESINTERPOLATE> size_t performClipping(const VERTLIST *vertList, const POLYLIST *polyList, const INDEXLIST *indexList);
template<bool USEHIRESINTERPOLATE> size_t performClipping(const VERT *vertList, const POLYLIST *polyList, const INDEXLIST *indexList);
// Base rendering methods
virtual Render3DError BeginRender(const GFX3D &engine);