From a2f078ec8102d231169e3b947cb19e0979024b0b Mon Sep 17 00:00:00 2001 From: rogerman Date: Tue, 15 Jan 2013 09:36:17 +0000 Subject: [PATCH] GFX3D: - Remove NDS_3D_GetLineData(), as it wasn't appropriately named or used for its intended purpose. - Add NDS_3D_RenderFinish(), which is what should have been used in the first place. (Purpose: Blocks the thread until 3D rendering is finished.) - Optimize gpu3DNull so that it doesn't have to clear the 3D layer every frame. task.h: - Fix multiple #include compiling bug. SoftRasterizer: - Improve the performance of rendering in multithreaded mode. - Improve the stability of reset and shutdown in multithreaded mode. - Do some minor code cleanup. OpenGL Renderer: - Improve the stability of reset and shutdown. - Do some minor code cleanup. --- desmume/src/OGLRender.cpp | 206 ++++++++++++++++++++++---------------- desmume/src/gfx3d.cpp | 17 ++-- desmume/src/rasterize.cpp | 88 ++++++++++++---- desmume/src/render3D.cpp | 57 +++++++++-- desmume/src/render3D.h | 23 +++-- desmume/src/utils/task.h | 1 + 6 files changed, 259 insertions(+), 133 deletions(-) diff --git a/desmume/src/OGLRender.cpp b/desmume/src/OGLRender.cpp index 140b3c5ba..046717408 100644 --- a/desmume/src/OGLRender.cpp +++ b/desmume/src/OGLRender.cpp @@ -117,8 +117,7 @@ enum OGLTextureUnitID // Multithreading States static bool enableMultithreading = false; -static bool isReadPixelsWorking = false; -static Task oglReadPixelsTask; +static Task oglReadPixelsTask[2]; // Polygon Info static GLfloat polyAlpha = 1.0f; @@ -498,42 +497,52 @@ static void OGLInitShaders(const char *oglExtensionString) static void OGLReset() { + gpuScreen3DHasNewData[0] = false; + gpuScreen3DHasNewData[1] = false; + + if (enableMultithreading) + { + for (unsigned int i = 0; i < 2; i++) + { + oglReadPixelsTask[i].finish(); + } + } + + if(!BEGINGL()) + return; + + glFinish(); + + for (unsigned int i = 0; i < 2; i++) + { + memset(GPU_screen3D[i], 0, sizeof(GPU_screen3D[i])); + } + if(isShaderSupported) { hasTexture = false; - if(BEGINGL()) - { - glUniform1i(uniformPolyID, 0); - glUniform1f(uniformPolyAlpha, 1.0f); - glUniform2f(uniformTexScale, 1.0f, 1.0f); - glUniform1i(uniformHasTexture, GL_FALSE); - glUniform1i(uniformPolygonMode, 0); - glUniform1i(uniformToonShadingMode, 0); - glUniform1i(uniformWBuffer, 0); - glUniform1i(uniformEnableAlphaTest, GL_TRUE); - glUniform1f(uniformAlphaTestRef, 0.0f); - - ENDGL(); - } + glUniform1i(uniformPolyID, 0); + glUniform1f(uniformPolyAlpha, 1.0f); + glUniform2f(uniformTexScale, 1.0f, 1.0f); + glUniform1i(uniformHasTexture, GL_FALSE); + glUniform1i(uniformPolygonMode, 0); + glUniform1i(uniformToonShadingMode, 0); + glUniform1i(uniformWBuffer, 0); + glUniform1i(uniformEnableAlphaTest, GL_TRUE); + glUniform1f(uniformAlphaTestRef, 0.0f); } else { memset(color4fBuffer, 0, VERTLIST_SIZE * 4 * sizeof(GLfloat)); } - TexCache_Reset(); - if (currTexture) - delete currTexture; - currTexture = NULL; - - for (unsigned int i = 0; i < 2; i++) - { - memset(GPU_screen3D[i], 0, sizeof(GPU_screen3D[i])); - gpuScreen3DHasNewData[i] = false; - } + ENDGL(); memset(vertIndexBuffer, 0, VERT_INDEX_BUFFER_SIZE * sizeof(GLushort)); + currTexture = NULL; + + Default3D_Reset(); } //static class OGLTexCacheUser : public ITexCacheUser @@ -636,13 +645,25 @@ static bool OGLIsMinimumVersionSupported(const char *oglVersionString) static char OGLInit(void) { + char result = 0; + if(!oglrender_init) - return 0; + return result; if(!oglrender_init()) - return 0; + return result; + + result = Default3D_Init(); + if (result == 0) + { + return result; + } if(!BEGINGL()) - return 0; + { + INFO("OpenGL: Could not initialize -- BEGINGL() failed."); + result = 0; + return result; + } // Get OpenGL info const char *oglVendorString = (const char *)glGetString(GL_VENDOR); @@ -655,13 +676,10 @@ static char OGLInit(void) OGL_MINIMUM_GPU_VERSION_REQUIRED_MAJOR, OGL_MINIMUM_GPU_VERSION_REQUIRED_MINOR, OGL_MINIMUM_GPU_VERSION_REQUIRED_REVISION, oglVersionString, oglVendorString, oglRendererString); - return 0; + result = 0; + return result; } - glViewport(0, 0, 256, 192); - if (glGetError() != GL_NO_ERROR) - return 0; - const char *oglExtensionString = (const char *)glGetString(GL_EXTENSIONS); for (u8 i = 0; i < 255; i++) @@ -936,8 +954,6 @@ static char OGLInit(void) ENDGL(); // Multithreading Setup - isReadPixelsWorking = false; - if (CommonSettings.num_cores > 1) { #ifdef _WINDOWS @@ -947,7 +963,11 @@ static char OGLInit(void) enableMultithreading = false; #else enableMultithreading = true; - oglReadPixelsTask.start(false); + + for (unsigned int i = 0; i < 2; i++) + { + oglReadPixelsTask[i].start(false); + } #endif } else @@ -960,27 +980,28 @@ static char OGLInit(void) INFO("OpenGL: Initialized successfully.\n[GPU Info - Version: %s, Vendor: %s, Renderer: %s]\n", oglVersionString, oglVendorString, oglRendererString); - return 1; + return result; } static void OGLClose() { - if (enableMultithreading) - { - oglReadPixelsTask.finish(); - oglReadPixelsTask.shutdown(); - isReadPixelsWorking = false; - } - gpuScreen3DHasNewData[0] = false; gpuScreen3DHasNewData[1] = false; - delete [] vertIndexBuffer; - vertIndexBuffer = NULL; + if (enableMultithreading) + { + for (unsigned int i = 0; i < 2; i++) + { + oglReadPixelsTask[i].finish(); + oglReadPixelsTask[i].shutdown(); + } + } if(!BEGINGL()) return; - + + glFinish(); + if(isShaderSupported) { glUseProgram(0); @@ -992,6 +1013,8 @@ static void OGLClose() glDeleteShader(vertexShaderID); glDeleteShader(fragmentShaderID); + glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_ToonTable); + glBindTexture(GL_TEXTURE_1D, 0); glDeleteTextures(1, &texToonTableID); isShaderSupported = false; @@ -1004,27 +1027,20 @@ static void OGLClose() if (isVAOSupported) { + glBindVertexArray(0); glDeleteVertexArrays(1, &vaoMainStatesID); isVAOSupported = false; } - //kill the tex cache to free all the texture ids - TexCache_Reset(); - - while(!freeTextureIds.empty()) - { - GLuint temp = freeTextureIds.front(); - freeTextureIds.pop(); - glDeleteTextures(1,&temp); - } - if (isVBOSupported) { + glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0); glDeleteBuffersARB(1, &vboVertexID); } if (isPBOSupported) { + glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0); glDeleteBuffersARB(2, pboRenderDataID); pboRenderBuffer[0] = NULL; pboRenderBuffer[1] = NULL; @@ -1033,14 +1049,36 @@ static void OGLClose() // FBO if (isFBOSupported) { + glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_ClearImage); + glBindTexture(GL_TEXTURE_2D, 0); glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0); glDeleteFramebuffersEXT(1, &fboClearImageID); glDeleteTextures(1, &texClearImageColorID); glDeleteTextures(1, &texClearImageDepthStencilID); } - + + //kill the tex cache to free all the texture ids + TexCache_Reset(); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, 0); + + while(!freeTextureIds.empty()) + { + GLuint temp = freeTextureIds.front(); + freeTextureIds.pop(); + glDeleteTextures(1,&temp); + } + + glFinish(); + ENDGL(); + + delete [] vertIndexBuffer; + vertIndexBuffer = NULL; + + Default3D_Close(); } static void texDeleteCallback(TexCacheItem* item) @@ -1318,15 +1356,14 @@ static void GL_ReadFramebuffer() { static unsigned int bufferIndex = 0; - if (enableMultithreading && isReadPixelsWorking) - { - oglReadPixelsTask.finish(); - isReadPixelsWorking = false; - } - bufferIndex = (bufferIndex + 1) & 0x01; gpuScreen3DBufferIndex = bufferIndex; + if (enableMultithreading) + { + oglReadPixelsTask[bufferIndex].finish(); + } + if (isPBOSupported) { if(!BEGINGL()) return; @@ -1353,14 +1390,12 @@ static void GL_ReadFramebuffer() // H-Blank, and let that logic determine whether a pixel read is needed or not. // This can save us some time in cases where games don't require the 3D layer // for this particular frame. + + gpuScreen3DHasNewData[bufferIndex] = true; + if (enableMultithreading) { - isReadPixelsWorking = true; - oglReadPixelsTask.execute(&execReadPixelsTask, &bufferIndex); - } - else - { - gpuScreen3DHasNewData[bufferIndex] = true; + oglReadPixelsTask[bufferIndex].execute(&execReadPixelsTask, &bufferIndex); } } @@ -1738,27 +1773,26 @@ static void OGLRender() static void OGLVramReconfigureSignal() { - TexCache_Invalidate(); + Default3D_VramReconfigureSignal(); } -static u8* OGLGetLineData(u8 lineNumber) +static void OGLRenderFinish() { // If OpenGL is still reading back pixels on a separate thread, wait for it to finish. - if (isReadPixelsWorking) - { - oglReadPixelsTask.finish(); - isReadPixelsWorking = false; - } - - // If we're doing a pixel read on this thread and we have new rendered data, - // then do the pixel read now. + // Otherwise, just do the pixel read now. if (gpuScreen3DHasNewData[gpuScreen3DBufferIndex]) { - execReadPixelsTask(&gpuScreen3DBufferIndex); + if (enableMultithreading) + { + oglReadPixelsTask[gpuScreen3DBufferIndex].finish(); + } + else + { + execReadPixelsTask(&gpuScreen3DBufferIndex); + } + gpuScreen3DHasNewData[gpuScreen3DBufferIndex] = false; } - - return ( gfx3d_convertedScreen + (lineNumber << (8+2)) ); } GPU3DInterface gpu3Dgl = { @@ -1767,6 +1801,6 @@ GPU3DInterface gpu3Dgl = { OGLReset, OGLClose, OGLRender, - OGLVramReconfigureSignal, - OGLGetLineData + OGLRenderFinish, + OGLVramReconfigureSignal }; diff --git a/desmume/src/gfx3d.cpp b/desmume/src/gfx3d.cpp index 67b88fe8b..40c82436b 100644 --- a/desmume/src/gfx3d.cpp +++ b/desmume/src/gfx3d.cpp @@ -489,6 +489,8 @@ void gfx3d_init() void gfx3d_reset() { + gpu3D->NDS_3D_RenderFinish(); + #ifdef _SHOW_VTX_COUNTERS max_polys = max_verts = 0; #endif @@ -2183,13 +2185,12 @@ void gfx3d_VBlankEndSignal(bool skipFrame) drawPending = FALSE; - //if the null 3d core is chosen, then we need to clear out the 3d buffers to keep old data from being rendered - if(gpu3D == &gpu3DNull || !CommonSettings.showGpu.main) + if(!CommonSettings.showGpu.main) { memset(gfx3d_convertedScreen,0,sizeof(gfx3d_convertedScreen)); return; } - + gpu3D->NDS_3D_Render(); } @@ -2301,14 +2302,8 @@ void gfx3d_glGetLightColor(unsigned int index, unsigned int* dest) void gfx3d_GetLineData(int line, u8** dst) { - if (gpu3D->NDS_3D_GetLineData == NULL) - { - *dst = gfx3d_convertedScreen+((line)<<(8+2)); - } - else - { - *dst = gpu3D->NDS_3D_GetLineData(line); - } + gpu3D->NDS_3D_RenderFinish(); + *dst = gfx3d_convertedScreen+((line)<<(8+2)); } void gfx3d_GetLineData15bpp(int line, u16** dst) diff --git a/desmume/src/rasterize.cpp b/desmume/src/rasterize.cpp index e4e5cc181..ac892152e 100644 --- a/desmume/src/rasterize.cpp +++ b/desmume/src/rasterize.cpp @@ -67,7 +67,7 @@ static u8 decal_table[32][64][64]; static u8 index_lookup_table[65]; static u8 index_start_table[8]; - +static bool softRastHasNewData = false; ////optimized float floor useful in limited cases ////from http://www.stereopsis.com/FPU.html#convert @@ -1073,7 +1073,7 @@ static SoftRasterizerEngine mainSoftRasterizer; static Task rasterizerUnitTask[_MAX_CORES]; static RasterizerUnit rasterizerUnit[_MAX_CORES]; static RasterizerUnit _HACK_viewer_rasterizerUnit; -static int rasterizerCores; +static unsigned int rasterizerCores; static bool rasterizerUnitTasksInited = false; static void* execRasterizerUnit(void* arg) @@ -1085,6 +1085,12 @@ static void* execRasterizerUnit(void* arg) static char SoftRastInit(void) { + char result = Default3D_Init(); + if (result == 0) + { + return result; + } + if(!rasterizerUnitTasksInited) { rasterizerUnitTasksInited = true; @@ -1095,7 +1101,7 @@ static char SoftRastInit(void) rasterizerCores = CommonSettings.num_cores; if (rasterizerCores > _MAX_CORES) rasterizerCores = _MAX_CORES; - if(CommonSettings.num_cores == 1) + if(CommonSettings.num_cores <= 1) { rasterizerCores = 1; rasterizerUnit[0].SLI_MASK = 0; @@ -1146,22 +1152,44 @@ static char SoftRastInit(void) TexCache_Reset(); printf("SoftRast Initialized with cores=%d\n",rasterizerCores); - return 1; + return result; } -static void SoftRastReset() { - TexCache_Reset(); +static void SoftRastReset() +{ + if (rasterizerCores > 1) + { + for(unsigned int i = 0; i < rasterizerCores; i++) + { + rasterizerUnitTask[i].finish(); + } + } + + softRastHasNewData = false; + + Default3D_Reset(); } static void SoftRastClose() { - for(int i=0; i<_MAX_CORES; i++) - rasterizerUnitTask[i].shutdown(); + if (rasterizerCores > 1) + { + for(unsigned int i = 0; i < rasterizerCores; i++) + { + rasterizerUnitTask[i].finish(); + rasterizerUnitTask[i].shutdown(); + } + } + rasterizerUnitTasksInited = false; + softRastHasNewData = false; + + Default3D_Close(); } -static void SoftRastVramReconfigureSignal() { - TexCache_Invalidate(); +static void SoftRastVramReconfigureSignal() +{ + Default3D_VramReconfigureSignal(); } static void SoftRastConvertFramebuffer() @@ -1622,24 +1650,44 @@ static void SoftRastRender() mainSoftRasterizer.performCoordAdjustment(true); mainSoftRasterizer.setupTextures(true); - - if(rasterizerCores==1) + softRastHasNewData = true; + + if (rasterizerCores > 1) { - rasterizerUnit[0].mainLoop(&mainSoftRasterizer); + for(unsigned int i = 0; i < rasterizerCores; i++) + { + rasterizerUnitTask[i].execute(&execRasterizerUnit, (void *)i); + } } else { - for(int i=0;i(&mainSoftRasterizer); } +} +static void SoftRastRenderFinish() +{ + if (!softRastHasNewData) + { + return; + } + + if (rasterizerCores > 1) + { + for(unsigned int i = 0; i < rasterizerCores; i++) + { + rasterizerUnitTask[i].finish(); + } + } + TexCache_EvictFrame(); - - + mainSoftRasterizer.framebufferProcess(); - + // printf("rendered %d of %d polys after backface culling\n",gfx3d.polylist->count-culled,gfx3d.polylist->count); SoftRastConvertFramebuffer(); + + softRastHasNewData = false; } GPU3DInterface gpu3DRasterize = { @@ -1648,7 +1696,7 @@ GPU3DInterface gpu3DRasterize = { SoftRastReset, SoftRastClose, SoftRastRender, - SoftRastVramReconfigureSignal, - NULL + SoftRastRenderFinish, + SoftRastVramReconfigureSignal }; diff --git a/desmume/src/render3D.cpp b/desmume/src/render3D.cpp index 180469162..21a314c02 100644 --- a/desmume/src/render3D.cpp +++ b/desmume/src/render3D.cpp @@ -17,23 +17,62 @@ */ #include "render3D.h" +#include "gfx3d.h" +#include "texcache.h" int cur3DCore = GPU3D_NULL; -static void NDS_nullFunc1 (void){} -static char NDS_nullFunc2 (void){ return 1; } - GPU3DInterface gpu3DNull = { "None", - NDS_nullFunc2, //NDS_3D_Init - NDS_nullFunc1, //NDS_3D_Reset - NDS_nullFunc1, //NDS_3D_Close - NDS_nullFunc1, //NDS_3D_Render - NDS_nullFunc1, //NDS_3D_VramReconfigureSignal - 0 + Default3D_Init, + Default3D_Reset, + Default3D_Close, + Default3D_Render, + Default3D_RenderFinish, + Default3D_VramReconfigureSignal }; GPU3DInterface *gpu3D = &gpu3DNull; +static bool default3DAlreadyClearedLayer = false; + +char Default3D_Init() +{ + default3DAlreadyClearedLayer = false; + + return 1; +} + +void Default3D_Reset() +{ + default3DAlreadyClearedLayer = false; + + TexCache_Reset(); +} + +void Default3D_Close() +{ + memset(gfx3d_convertedScreen, 0, sizeof(gfx3d_convertedScreen)); + default3DAlreadyClearedLayer = false; +} + +void Default3D_Render() +{ + if (!default3DAlreadyClearedLayer) + { + memset(gfx3d_convertedScreen, 0, sizeof(gfx3d_convertedScreen)); + default3DAlreadyClearedLayer = true; + } +} + +void Default3D_RenderFinish() +{ + // Do nothing +} + +void Default3D_VramReconfigureSignal() +{ + TexCache_Invalidate(); +} void NDS_3D_SetDriver (int core3DIndex) { diff --git a/desmume/src/render3D.h b/desmume/src/render3D.h index 6c64b7242..6923721a3 100644 --- a/desmume/src/render3D.h +++ b/desmume/src/render3D.h @@ -30,22 +30,24 @@ typedef struct Render3DInterface const char * name; //called once when the plugin starts up - char (CALL_CONVENTION* NDS_3D_Init) (void); + char (CALL_CONVENTION* NDS_3D_Init) (); //called when the emulator resets (is this necessary?) - void (CALL_CONVENTION* NDS_3D_Reset) (void); + void (CALL_CONVENTION* NDS_3D_Reset) (); //called when the plugin shuts down - void (CALL_CONVENTION* NDS_3D_Close) (void); + void (CALL_CONVENTION* NDS_3D_Close) (); //called when the renderer should do its job and render the current display lists - void (CALL_CONVENTION* NDS_3D_Render) (void); + void (CALL_CONVENTION* NDS_3D_Render) (); + + // Called whenever 3D rendering needs to finish. This function should block the calling thread + // and only release the block when 3D rendering is finished. (Before reading the 3D layer, be + // sure to always call this function.) + void (CALL_CONVENTION* NDS_3D_RenderFinish) (); //called when the emulator reconfigures its vram. you may need to invalidate your texture cache. void (CALL_CONVENTION* NDS_3D_VramReconfigureSignal) (); - - //called when the emulator requests rendered graphics data - u8* (CALL_CONVENTION* NDS_3D_GetLineData) (u8 lineNumber); } GPU3DInterface; @@ -61,6 +63,13 @@ extern GPU3DInterface gpu3DNull; // Extern pointer extern GPU3DInterface *gpu3D; +char Default3D_Init(); +void Default3D_Reset(); +void Default3D_Close(); +void Default3D_Render(); +void Default3D_RenderFinish(); +void Default3D_VramReconfigureSignal(); + void NDS_3D_SetDriver (int core3DIndex); bool NDS_3D_ChangeCore(int newCore); diff --git a/desmume/src/utils/task.h b/desmume/src/utils/task.h index 80cd4820b..30c59ed4b 100644 --- a/desmume/src/utils/task.h +++ b/desmume/src/utils/task.h @@ -16,6 +16,7 @@ */ #ifndef _TASK_H_ +#define _TASK_H_ //Sort of like a single-thread thread pool. //You hand it a worker function and then call finish() to synch with its completion