- Remove NDS_3D_GetLineData(), as it wasn't appropriately named or used for its intended purpose.
- Add NDS_3D_RenderFinish(), which is what should have been used in the first place. (Purpose: Blocks the thread until 3D rendering is finished.)
- Optimize gpu3DNull so that it doesn't have to clear the 3D layer every frame.

task.h:
- Fix multiple #include compiling bug.

SoftRasterizer:
- Improve the performance of rendering in multithreaded mode.
- Improve the stability of reset and shutdown in multithreaded mode.
- Do some minor code cleanup.

OpenGL Renderer:
- Improve the stability of reset and shutdown.
- Do some minor code cleanup.
This commit is contained in:
rogerman 2013-01-15 09:36:17 +00:00
parent ad65e6abf7
commit a2f078ec81
6 changed files with 259 additions and 133 deletions

View File

@ -117,8 +117,7 @@ enum OGLTextureUnitID
// Multithreading States
static bool enableMultithreading = false;
static bool isReadPixelsWorking = false;
static Task oglReadPixelsTask;
static Task oglReadPixelsTask[2];
// Polygon Info
static GLfloat polyAlpha = 1.0f;
@ -498,12 +497,31 @@ static void OGLInitShaders(const char *oglExtensionString)
static void OGLReset()
{
gpuScreen3DHasNewData[0] = false;
gpuScreen3DHasNewData[1] = false;
if (enableMultithreading)
{
for (unsigned int i = 0; i < 2; i++)
{
oglReadPixelsTask[i].finish();
}
}
if(!BEGINGL())
return;
glFinish();
for (unsigned int i = 0; i < 2; i++)
{
memset(GPU_screen3D[i], 0, sizeof(GPU_screen3D[i]));
}
if(isShaderSupported)
{
hasTexture = false;
if(BEGINGL())
{
glUniform1i(uniformPolyID, 0);
glUniform1f(uniformPolyAlpha, 1.0f);
glUniform2f(uniformTexScale, 1.0f, 1.0f);
@ -513,27 +531,18 @@ static void OGLReset()
glUniform1i(uniformWBuffer, 0);
glUniform1i(uniformEnableAlphaTest, GL_TRUE);
glUniform1f(uniformAlphaTestRef, 0.0f);
ENDGL();
}
}
else
{
memset(color4fBuffer, 0, VERTLIST_SIZE * 4 * sizeof(GLfloat));
}
TexCache_Reset();
if (currTexture)
delete currTexture;
currTexture = NULL;
for (unsigned int i = 0; i < 2; i++)
{
memset(GPU_screen3D[i], 0, sizeof(GPU_screen3D[i]));
gpuScreen3DHasNewData[i] = false;
}
ENDGL();
memset(vertIndexBuffer, 0, VERT_INDEX_BUFFER_SIZE * sizeof(GLushort));
currTexture = NULL;
Default3D_Reset();
}
//static class OGLTexCacheUser : public ITexCacheUser
@ -636,13 +645,25 @@ static bool OGLIsMinimumVersionSupported(const char *oglVersionString)
static char OGLInit(void)
{
char result = 0;
if(!oglrender_init)
return 0;
return result;
if(!oglrender_init())
return 0;
return result;
result = Default3D_Init();
if (result == 0)
{
return result;
}
if(!BEGINGL())
return 0;
{
INFO("OpenGL: Could not initialize -- BEGINGL() failed.");
result = 0;
return result;
}
// Get OpenGL info
const char *oglVendorString = (const char *)glGetString(GL_VENDOR);
@ -655,13 +676,10 @@ static char OGLInit(void)
OGL_MINIMUM_GPU_VERSION_REQUIRED_MAJOR, OGL_MINIMUM_GPU_VERSION_REQUIRED_MINOR, OGL_MINIMUM_GPU_VERSION_REQUIRED_REVISION,
oglVersionString, oglVendorString, oglRendererString);
return 0;
result = 0;
return result;
}
glViewport(0, 0, 256, 192);
if (glGetError() != GL_NO_ERROR)
return 0;
const char *oglExtensionString = (const char *)glGetString(GL_EXTENSIONS);
for (u8 i = 0; i < 255; i++)
@ -936,8 +954,6 @@ static char OGLInit(void)
ENDGL();
// Multithreading Setup
isReadPixelsWorking = false;
if (CommonSettings.num_cores > 1)
{
#ifdef _WINDOWS
@ -947,7 +963,11 @@ static char OGLInit(void)
enableMultithreading = false;
#else
enableMultithreading = true;
oglReadPixelsTask.start(false);
for (unsigned int i = 0; i < 2; i++)
{
oglReadPixelsTask[i].start(false);
}
#endif
}
else
@ -960,27 +980,28 @@ static char OGLInit(void)
INFO("OpenGL: Initialized successfully.\n[GPU Info - Version: %s, Vendor: %s, Renderer: %s]\n",
oglVersionString, oglVendorString, oglRendererString);
return 1;
return result;
}
static void OGLClose()
{
if (enableMultithreading)
{
oglReadPixelsTask.finish();
oglReadPixelsTask.shutdown();
isReadPixelsWorking = false;
}
gpuScreen3DHasNewData[0] = false;
gpuScreen3DHasNewData[1] = false;
delete [] vertIndexBuffer;
vertIndexBuffer = NULL;
if (enableMultithreading)
{
for (unsigned int i = 0; i < 2; i++)
{
oglReadPixelsTask[i].finish();
oglReadPixelsTask[i].shutdown();
}
}
if(!BEGINGL())
return;
glFinish();
if(isShaderSupported)
{
glUseProgram(0);
@ -992,6 +1013,8 @@ static void OGLClose()
glDeleteShader(vertexShaderID);
glDeleteShader(fragmentShaderID);
glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_ToonTable);
glBindTexture(GL_TEXTURE_1D, 0);
glDeleteTextures(1, &texToonTableID);
isShaderSupported = false;
@ -1004,27 +1027,20 @@ static void OGLClose()
if (isVAOSupported)
{
glBindVertexArray(0);
glDeleteVertexArrays(1, &vaoMainStatesID);
isVAOSupported = false;
}
//kill the tex cache to free all the texture ids
TexCache_Reset();
while(!freeTextureIds.empty())
{
GLuint temp = freeTextureIds.front();
freeTextureIds.pop();
glDeleteTextures(1,&temp);
}
if (isVBOSupported)
{
glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
glDeleteBuffersARB(1, &vboVertexID);
}
if (isPBOSupported)
{
glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0);
glDeleteBuffersARB(2, pboRenderDataID);
pboRenderBuffer[0] = NULL;
pboRenderBuffer[1] = NULL;
@ -1033,6 +1049,8 @@ static void OGLClose()
// FBO
if (isFBOSupported)
{
glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_ClearImage);
glBindTexture(GL_TEXTURE_2D, 0);
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);
glDeleteFramebuffersEXT(1, &fboClearImageID);
@ -1040,7 +1058,27 @@ static void OGLClose()
glDeleteTextures(1, &texClearImageDepthStencilID);
}
//kill the tex cache to free all the texture ids
TexCache_Reset();
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, 0);
while(!freeTextureIds.empty())
{
GLuint temp = freeTextureIds.front();
freeTextureIds.pop();
glDeleteTextures(1,&temp);
}
glFinish();
ENDGL();
delete [] vertIndexBuffer;
vertIndexBuffer = NULL;
Default3D_Close();
}
static void texDeleteCallback(TexCacheItem* item)
@ -1318,15 +1356,14 @@ static void GL_ReadFramebuffer()
{
static unsigned int bufferIndex = 0;
if (enableMultithreading && isReadPixelsWorking)
{
oglReadPixelsTask.finish();
isReadPixelsWorking = false;
}
bufferIndex = (bufferIndex + 1) & 0x01;
gpuScreen3DBufferIndex = bufferIndex;
if (enableMultithreading)
{
oglReadPixelsTask[bufferIndex].finish();
}
if (isPBOSupported)
{
if(!BEGINGL()) return;
@ -1353,14 +1390,12 @@ static void GL_ReadFramebuffer()
// H-Blank, and let that logic determine whether a pixel read is needed or not.
// This can save us some time in cases where games don't require the 3D layer
// for this particular frame.
gpuScreen3DHasNewData[bufferIndex] = true;
if (enableMultithreading)
{
isReadPixelsWorking = true;
oglReadPixelsTask.execute(&execReadPixelsTask, &bufferIndex);
}
else
{
gpuScreen3DHasNewData[bufferIndex] = true;
oglReadPixelsTask[bufferIndex].execute(&execReadPixelsTask, &bufferIndex);
}
}
@ -1738,27 +1773,26 @@ static void OGLRender()
static void OGLVramReconfigureSignal()
{
TexCache_Invalidate();
Default3D_VramReconfigureSignal();
}
static u8* OGLGetLineData(u8 lineNumber)
static void OGLRenderFinish()
{
// If OpenGL is still reading back pixels on a separate thread, wait for it to finish.
if (isReadPixelsWorking)
{
oglReadPixelsTask.finish();
isReadPixelsWorking = false;
}
// If we're doing a pixel read on this thread and we have new rendered data,
// then do the pixel read now.
// Otherwise, just do the pixel read now.
if (gpuScreen3DHasNewData[gpuScreen3DBufferIndex])
{
if (enableMultithreading)
{
oglReadPixelsTask[gpuScreen3DBufferIndex].finish();
}
else
{
execReadPixelsTask(&gpuScreen3DBufferIndex);
gpuScreen3DHasNewData[gpuScreen3DBufferIndex] = false;
}
return ( gfx3d_convertedScreen + (lineNumber << (8+2)) );
gpuScreen3DHasNewData[gpuScreen3DBufferIndex] = false;
}
}
GPU3DInterface gpu3Dgl = {
@ -1767,6 +1801,6 @@ GPU3DInterface gpu3Dgl = {
OGLReset,
OGLClose,
OGLRender,
OGLVramReconfigureSignal,
OGLGetLineData
OGLRenderFinish,
OGLVramReconfigureSignal
};

View File

@ -489,6 +489,8 @@ void gfx3d_init()
void gfx3d_reset()
{
gpu3D->NDS_3D_RenderFinish();
#ifdef _SHOW_VTX_COUNTERS
max_polys = max_verts = 0;
#endif
@ -2183,8 +2185,7 @@ void gfx3d_VBlankEndSignal(bool skipFrame)
drawPending = FALSE;
//if the null 3d core is chosen, then we need to clear out the 3d buffers to keep old data from being rendered
if(gpu3D == &gpu3DNull || !CommonSettings.showGpu.main)
if(!CommonSettings.showGpu.main)
{
memset(gfx3d_convertedScreen,0,sizeof(gfx3d_convertedScreen));
return;
@ -2301,15 +2302,9 @@ void gfx3d_glGetLightColor(unsigned int index, unsigned int* dest)
void gfx3d_GetLineData(int line, u8** dst)
{
if (gpu3D->NDS_3D_GetLineData == NULL)
{
gpu3D->NDS_3D_RenderFinish();
*dst = gfx3d_convertedScreen+((line)<<(8+2));
}
else
{
*dst = gpu3D->NDS_3D_GetLineData(line);
}
}
void gfx3d_GetLineData15bpp(int line, u16** dst)
{

View File

@ -67,7 +67,7 @@ static u8 decal_table[32][64][64];
static u8 index_lookup_table[65];
static u8 index_start_table[8];
static bool softRastHasNewData = false;
////optimized float floor useful in limited cases
////from http://www.stereopsis.com/FPU.html#convert
@ -1073,7 +1073,7 @@ static SoftRasterizerEngine mainSoftRasterizer;
static Task rasterizerUnitTask[_MAX_CORES];
static RasterizerUnit<true> rasterizerUnit[_MAX_CORES];
static RasterizerUnit<false> _HACK_viewer_rasterizerUnit;
static int rasterizerCores;
static unsigned int rasterizerCores;
static bool rasterizerUnitTasksInited = false;
static void* execRasterizerUnit(void* arg)
@ -1085,6 +1085,12 @@ static void* execRasterizerUnit(void* arg)
static char SoftRastInit(void)
{
char result = Default3D_Init();
if (result == 0)
{
return result;
}
if(!rasterizerUnitTasksInited)
{
rasterizerUnitTasksInited = true;
@ -1095,7 +1101,7 @@ static char SoftRastInit(void)
rasterizerCores = CommonSettings.num_cores;
if (rasterizerCores > _MAX_CORES)
rasterizerCores = _MAX_CORES;
if(CommonSettings.num_cores == 1)
if(CommonSettings.num_cores <= 1)
{
rasterizerCores = 1;
rasterizerUnit[0].SLI_MASK = 0;
@ -1146,22 +1152,44 @@ static char SoftRastInit(void)
TexCache_Reset();
printf("SoftRast Initialized with cores=%d\n",rasterizerCores);
return 1;
return result;
}
static void SoftRastReset() {
TexCache_Reset();
static void SoftRastReset()
{
if (rasterizerCores > 1)
{
for(unsigned int i = 0; i < rasterizerCores; i++)
{
rasterizerUnitTask[i].finish();
}
}
softRastHasNewData = false;
Default3D_Reset();
}
static void SoftRastClose()
{
for(int i=0; i<_MAX_CORES; i++)
if (rasterizerCores > 1)
{
for(unsigned int i = 0; i < rasterizerCores; i++)
{
rasterizerUnitTask[i].finish();
rasterizerUnitTask[i].shutdown();
rasterizerUnitTasksInited = false;
}
}
static void SoftRastVramReconfigureSignal() {
TexCache_Invalidate();
rasterizerUnitTasksInited = false;
softRastHasNewData = false;
Default3D_Close();
}
static void SoftRastVramReconfigureSignal()
{
Default3D_VramReconfigureSignal();
}
static void SoftRastConvertFramebuffer()
@ -1622,24 +1650,44 @@ static void SoftRastRender()
mainSoftRasterizer.performCoordAdjustment(true);
mainSoftRasterizer.setupTextures(true);
softRastHasNewData = true;
if(rasterizerCores==1)
if (rasterizerCores > 1)
{
rasterizerUnit[0].mainLoop<false>(&mainSoftRasterizer);
for(unsigned int i = 0; i < rasterizerCores; i++)
{
rasterizerUnitTask[i].execute(&execRasterizerUnit, (void *)i);
}
}
else
{
for(int i=0;i<rasterizerCores;i++) rasterizerUnitTask[i].execute(execRasterizerUnit,(void*)i);
for(int i=0;i<rasterizerCores;i++) rasterizerUnitTask[i].finish();
rasterizerUnit[0].mainLoop<false>(&mainSoftRasterizer);
}
}
static void SoftRastRenderFinish()
{
if (!softRastHasNewData)
{
return;
}
if (rasterizerCores > 1)
{
for(unsigned int i = 0; i < rasterizerCores; i++)
{
rasterizerUnitTask[i].finish();
}
}
TexCache_EvictFrame();
mainSoftRasterizer.framebufferProcess();
// printf("rendered %d of %d polys after backface culling\n",gfx3d.polylist->count-culled,gfx3d.polylist->count);
SoftRastConvertFramebuffer();
softRastHasNewData = false;
}
GPU3DInterface gpu3DRasterize = {
@ -1648,7 +1696,7 @@ GPU3DInterface gpu3DRasterize = {
SoftRastReset,
SoftRastClose,
SoftRastRender,
SoftRastVramReconfigureSignal,
NULL
SoftRastRenderFinish,
SoftRastVramReconfigureSignal
};

View File

@ -17,23 +17,62 @@
*/
#include "render3D.h"
#include "gfx3d.h"
#include "texcache.h"
int cur3DCore = GPU3D_NULL;
static void NDS_nullFunc1 (void){}
static char NDS_nullFunc2 (void){ return 1; }
GPU3DInterface gpu3DNull = {
"None",
NDS_nullFunc2, //NDS_3D_Init
NDS_nullFunc1, //NDS_3D_Reset
NDS_nullFunc1, //NDS_3D_Close
NDS_nullFunc1, //NDS_3D_Render
NDS_nullFunc1, //NDS_3D_VramReconfigureSignal
0
Default3D_Init,
Default3D_Reset,
Default3D_Close,
Default3D_Render,
Default3D_RenderFinish,
Default3D_VramReconfigureSignal
};
GPU3DInterface *gpu3D = &gpu3DNull;
static bool default3DAlreadyClearedLayer = false;
char Default3D_Init()
{
default3DAlreadyClearedLayer = false;
return 1;
}
void Default3D_Reset()
{
default3DAlreadyClearedLayer = false;
TexCache_Reset();
}
void Default3D_Close()
{
memset(gfx3d_convertedScreen, 0, sizeof(gfx3d_convertedScreen));
default3DAlreadyClearedLayer = false;
}
void Default3D_Render()
{
if (!default3DAlreadyClearedLayer)
{
memset(gfx3d_convertedScreen, 0, sizeof(gfx3d_convertedScreen));
default3DAlreadyClearedLayer = true;
}
}
void Default3D_RenderFinish()
{
// Do nothing
}
void Default3D_VramReconfigureSignal()
{
TexCache_Invalidate();
}
void NDS_3D_SetDriver (int core3DIndex)
{

View File

@ -30,23 +30,25 @@ typedef struct Render3DInterface
const char * name;
//called once when the plugin starts up
char (CALL_CONVENTION* NDS_3D_Init) (void);
char (CALL_CONVENTION* NDS_3D_Init) ();
//called when the emulator resets (is this necessary?)
void (CALL_CONVENTION* NDS_3D_Reset) (void);
void (CALL_CONVENTION* NDS_3D_Reset) ();
//called when the plugin shuts down
void (CALL_CONVENTION* NDS_3D_Close) (void);
void (CALL_CONVENTION* NDS_3D_Close) ();
//called when the renderer should do its job and render the current display lists
void (CALL_CONVENTION* NDS_3D_Render) (void);
void (CALL_CONVENTION* NDS_3D_Render) ();
// Called whenever 3D rendering needs to finish. This function should block the calling thread
// and only release the block when 3D rendering is finished. (Before reading the 3D layer, be
// sure to always call this function.)
void (CALL_CONVENTION* NDS_3D_RenderFinish) ();
//called when the emulator reconfigures its vram. you may need to invalidate your texture cache.
void (CALL_CONVENTION* NDS_3D_VramReconfigureSignal) ();
//called when the emulator requests rendered graphics data
u8* (CALL_CONVENTION* NDS_3D_GetLineData) (u8 lineNumber);
} GPU3DInterface;
extern int cur3DCore;
@ -61,6 +63,13 @@ extern GPU3DInterface gpu3DNull;
// Extern pointer
extern GPU3DInterface *gpu3D;
char Default3D_Init();
void Default3D_Reset();
void Default3D_Close();
void Default3D_Render();
void Default3D_RenderFinish();
void Default3D_VramReconfigureSignal();
void NDS_3D_SetDriver (int core3DIndex);
bool NDS_3D_ChangeCore(int newCore);

View File

@ -16,6 +16,7 @@
*/
#ifndef _TASK_H_
#define _TASK_H_
//Sort of like a single-thread thread pool.
//You hand it a worker function and then call finish() to synch with its completion