win32: what's this..? is it..? could it be..? indeed, it is! a multicore optimization! now featuring all final display logic triple-buffered in another thread. offloads to another thread: rotation, color conversion, filter scaling, and directdraw buffer copy. this makes filter scaling effectively costless.

This commit is contained in:
zeromus 2009-07-21 01:55:16 +00:00
parent f95c64cc6a
commit 3f3e086d61
5 changed files with 102 additions and 9 deletions

View File

@ -400,6 +400,7 @@ public:
unsigned width() const { return m_rbuf.width(); }
unsigned height() const { return m_rbuf.height(); }
unsigned stride() const { return m_rbuf.stride(); }
// Conversions
//-----------------------

View File

@ -36,6 +36,7 @@ CommandLine::CommandLine()
{
load_slot = 0;
arm9_gdb_port = arm7_gdb_port = 0;
single_core = 0;
start_paused = FALSE;
}
@ -63,6 +64,9 @@ void CommandLine::loadCommonOptions()
{ "start-paused", 0, 0, G_OPTION_ARG_NONE, &start_paused, "Indicates that emulation should start paused", "START_PAUSED"},
{ "cflash-image", 0, 0, G_OPTION_ARG_FILENAME, &_cflash_image, "Requests cflash in gbaslot with fat image at this path", "CFLASH_IMAGE"},
{ "cflash-path", 0, 0, G_OPTION_ARG_FILENAME, &_cflash_path, "Requests cflash in gbaslot with filesystem rooted at this path", "CFLASH_PATH"},
#ifdef _MSC_VER
{ "single-core", 0, 0, G_OPTION_ARG_NONE, &single_core, "Limit execution to use approximately only one core", "NUM_CORES"},
#endif
#ifdef GDB_STUB
{ "arm9gdb", 0, 0, G_OPTION_ARG_INT, &arm9_gdb_port, "Enable the ARM9 GDB stub on the given port", "PORT_NUM"},
{ "arm7gdb", 0, 0, G_OPTION_ARG_INT, &arm7_gdb_port, "Enable the ARM7 GDB stub on the given port", "PORT_NUM"},

View File

@ -41,6 +41,7 @@ public:
std::string play_movie_file;
std::string record_movie_file;
int arm9_gdb_port, arm7_gdb_port;
int single_core;
int start_paused;
std::string cflash_image;
std::string cflash_path;

View File

@ -715,7 +715,8 @@ template<typename T, int bpp> static void doRotate(void* dst)
}
}
void Display()
//the directdraw final presentation portion of display, including rotating
static void DD_DoDisplay()
{
int res;
memset(&ddsd, 0, sizeof(ddsd));
@ -785,6 +786,94 @@ void Display()
}
}
//tripple buffering logic
u16 displayBuffers[3][256*192*4];
int currDisplayBuffer=-1;
int newestDisplayBuffer=-2;
GMutex *display_mutex = NULL;
GThread *display_thread = NULL;
//does a single display work unit. only to be used from the display thread
static void DoDisplay()
{
osd->update();
DrawHUD();
video.filter();
DD_DoDisplay();
osd->clear();
}
void displayProc()
{
g_mutex_lock(display_mutex);
//find a buffer to display
int todo = newestDisplayBuffer;
bool alreadyDisplayed = (todo == currDisplayBuffer);
g_mutex_unlock(display_mutex);
//nothing to display. give up.
if(alreadyDisplayed) return;
//start displaying a new buffer
currDisplayBuffer = todo;
video.srcBuffer = (u8*)displayBuffers[currDisplayBuffer];
aggDraw.hud->attach(video.srcBuffer, 256, 384, 512);
DoDisplay();
}
void displayThread(void*)
{
for(;;) {
displayProc();
Sleep(10); //don't be greedy and use a whole cpu core, but leave room for 60fps
}
}
void Display()
{
if(display_thread == NULL)
{
display_mutex = g_mutex_new();
display_thread = g_thread_create( (GThreadFunc)displayThread,
NULL,
TRUE,
NULL);
}
g_mutex_lock(display_mutex);
//huh... i wonder if there is a less ugly way to do this
if(currDisplayBuffer == 0)
if(newestDisplayBuffer == 1)
newestDisplayBuffer = 2;
else newestDisplayBuffer = 1;
else if(currDisplayBuffer == 1)
if(newestDisplayBuffer == 2)
newestDisplayBuffer = 0;
else newestDisplayBuffer = 2;
else //if(currDisplayBuffer == 1)
if(newestDisplayBuffer == 0)
newestDisplayBuffer = 1;
else newestDisplayBuffer = 0;
memcpy(displayBuffers[newestDisplayBuffer],GPU_screen,256*192*4);
g_mutex_unlock(display_mutex);
//the no-multithreading codepath
//but based on my research, this runs just fine on a single core system due to the generous
//sleep in the display loop
//video.srcBuffer = (u8*)GPU_screen;
//doDisplay();
}
void CheckMessages()
{
MSG msg;
@ -887,11 +976,7 @@ DWORD WINAPI run()
Hud.fps = fps;
Hud.fps3d = fps3d;
osd->update();
DrawHUD();
video.filter();
Display();
osd->clear();
gfx3d.frameCtrRaw++;
if(gfx3d.frameCtrRaw == 60) {
@ -1411,6 +1496,9 @@ int _main()
return 1;
}
if(cmdline.single_core)
SetProcessAffinityMask(GetCurrentProcess(),1);
//sprintf(text, "%s", DESMUME_NAME_AND_VERSION);
MainWindow = new WINCLASS(CLASSNAME, hAppInst);
DWORD dwStyle = WS_CAPTION| WS_SYSMENU | WS_SIZEBOX | WS_MINIMIZEBOX | WS_CLIPCHILDREN | WS_CLIPSIBLINGS;
@ -2651,9 +2739,7 @@ LRESULT CALLBACK WindowProcedure (HWND hwnd, UINT message, WPARAM wParam, LPARAM
hdc = BeginPaint(hwnd, &ps);
osd->update();
Display();
osd->clear();
EndPaint(hwnd, &ps);
}

View File

@ -12,6 +12,7 @@ public:
int currentfilter;
u8* srcBuffer;
CACHE_ALIGN u8 filteredbuffer[4*256*192*4];
enum {
@ -53,7 +54,7 @@ public:
u16* finalBuffer() const
{
if(currentfilter == NONE)
return (u16*)GPU_screen;
return (u16*)srcBuffer;
else return (u16*)filteredbuffer;
}
@ -62,7 +63,7 @@ public:
src.Height = 384;
src.Width = 256;
src.Pitch = 512;
src.Surface = (u8*)GPU_screen;
src.Surface = (u8*)srcBuffer;
dst.Height = 768;
dst.Width = 512;