diff --git a/desmume/src/agg2d.h b/desmume/src/agg2d.h index 758fc8535..24c89b6d5 100644 --- a/desmume/src/agg2d.h +++ b/desmume/src/agg2d.h @@ -400,6 +400,7 @@ public: unsigned width() const { return m_rbuf.width(); } unsigned height() const { return m_rbuf.height(); } + unsigned stride() const { return m_rbuf.stride(); } // Conversions //----------------------- diff --git a/desmume/src/commandline.cpp b/desmume/src/commandline.cpp index a89c8c638..ca734374a 100644 --- a/desmume/src/commandline.cpp +++ b/desmume/src/commandline.cpp @@ -36,6 +36,7 @@ CommandLine::CommandLine() { load_slot = 0; arm9_gdb_port = arm7_gdb_port = 0; + single_core = 0; start_paused = FALSE; } @@ -63,6 +64,9 @@ void CommandLine::loadCommonOptions() { "start-paused", 0, 0, G_OPTION_ARG_NONE, &start_paused, "Indicates that emulation should start paused", "START_PAUSED"}, { "cflash-image", 0, 0, G_OPTION_ARG_FILENAME, &_cflash_image, "Requests cflash in gbaslot with fat image at this path", "CFLASH_IMAGE"}, { "cflash-path", 0, 0, G_OPTION_ARG_FILENAME, &_cflash_path, "Requests cflash in gbaslot with filesystem rooted at this path", "CFLASH_PATH"}, +#ifdef _MSC_VER + { "single-core", 0, 0, G_OPTION_ARG_NONE, &single_core, "Limit execution to use approximately only one core", "NUM_CORES"}, +#endif #ifdef GDB_STUB { "arm9gdb", 0, 0, G_OPTION_ARG_INT, &arm9_gdb_port, "Enable the ARM9 GDB stub on the given port", "PORT_NUM"}, { "arm7gdb", 0, 0, G_OPTION_ARG_INT, &arm7_gdb_port, "Enable the ARM7 GDB stub on the given port", "PORT_NUM"}, diff --git a/desmume/src/commandline.h b/desmume/src/commandline.h index bb70dbfef..b550be083 100644 --- a/desmume/src/commandline.h +++ b/desmume/src/commandline.h @@ -41,6 +41,7 @@ public: std::string play_movie_file; std::string record_movie_file; int arm9_gdb_port, arm7_gdb_port; + int single_core; int start_paused; std::string cflash_image; std::string cflash_path; diff --git a/desmume/src/windows/main.cpp b/desmume/src/windows/main.cpp index c57750589..a2586f7f7 100644 --- a/desmume/src/windows/main.cpp +++ b/desmume/src/windows/main.cpp @@ -715,7 +715,8 @@ template static void doRotate(void* dst) } } -void Display() +//the directdraw final presentation portion of display, including rotating +static void DD_DoDisplay() { int res; memset(&ddsd, 0, sizeof(ddsd)); @@ -785,6 +786,94 @@ void Display() } } +//tripple buffering logic +u16 displayBuffers[3][256*192*4]; +int currDisplayBuffer=-1; +int newestDisplayBuffer=-2; +GMutex *display_mutex = NULL; +GThread *display_thread = NULL; + +//does a single display work unit. only to be used from the display thread +static void DoDisplay() +{ + osd->update(); + DrawHUD(); + video.filter(); + DD_DoDisplay(); + osd->clear(); +} + +void displayProc() +{ + g_mutex_lock(display_mutex); + + //find a buffer to display + int todo = newestDisplayBuffer; + bool alreadyDisplayed = (todo == currDisplayBuffer); + + g_mutex_unlock(display_mutex); + + //nothing to display. give up. + if(alreadyDisplayed) return; + + //start displaying a new buffer + currDisplayBuffer = todo; + + video.srcBuffer = (u8*)displayBuffers[currDisplayBuffer]; + + aggDraw.hud->attach(video.srcBuffer, 256, 384, 512); + + DoDisplay(); +} + + +void displayThread(void*) +{ + for(;;) { + displayProc(); + Sleep(10); //don't be greedy and use a whole cpu core, but leave room for 60fps + } +} + +void Display() +{ + if(display_thread == NULL) + { + display_mutex = g_mutex_new(); + display_thread = g_thread_create( (GThreadFunc)displayThread, + NULL, + TRUE, + NULL); + } + + g_mutex_lock(display_mutex); + + //huh... i wonder if there is a less ugly way to do this + if(currDisplayBuffer == 0) + if(newestDisplayBuffer == 1) + newestDisplayBuffer = 2; + else newestDisplayBuffer = 1; + else if(currDisplayBuffer == 1) + if(newestDisplayBuffer == 2) + newestDisplayBuffer = 0; + else newestDisplayBuffer = 2; + else //if(currDisplayBuffer == 1) + if(newestDisplayBuffer == 0) + newestDisplayBuffer = 1; + else newestDisplayBuffer = 0; + + memcpy(displayBuffers[newestDisplayBuffer],GPU_screen,256*192*4); + + g_mutex_unlock(display_mutex); + + //the no-multithreading codepath + //but based on my research, this runs just fine on a single core system due to the generous + //sleep in the display loop + //video.srcBuffer = (u8*)GPU_screen; + //doDisplay(); +} + + void CheckMessages() { MSG msg; @@ -887,11 +976,7 @@ DWORD WINAPI run() Hud.fps = fps; Hud.fps3d = fps3d; - osd->update(); - DrawHUD(); - video.filter(); Display(); - osd->clear(); gfx3d.frameCtrRaw++; if(gfx3d.frameCtrRaw == 60) { @@ -1411,6 +1496,9 @@ int _main() return 1; } + if(cmdline.single_core) + SetProcessAffinityMask(GetCurrentProcess(),1); + //sprintf(text, "%s", DESMUME_NAME_AND_VERSION); MainWindow = new WINCLASS(CLASSNAME, hAppInst); DWORD dwStyle = WS_CAPTION| WS_SYSMENU | WS_SIZEBOX | WS_MINIMIZEBOX | WS_CLIPCHILDREN | WS_CLIPSIBLINGS; @@ -2651,9 +2739,7 @@ LRESULT CALLBACK WindowProcedure (HWND hwnd, UINT message, WPARAM wParam, LPARAM hdc = BeginPaint(hwnd, &ps); - osd->update(); Display(); - osd->clear(); EndPaint(hwnd, &ps); } diff --git a/desmume/src/windows/video.h b/desmume/src/windows/video.h index 04a250603..6ede5c105 100644 --- a/desmume/src/windows/video.h +++ b/desmume/src/windows/video.h @@ -12,6 +12,7 @@ public: int currentfilter; + u8* srcBuffer; CACHE_ALIGN u8 filteredbuffer[4*256*192*4]; enum { @@ -53,7 +54,7 @@ public: u16* finalBuffer() const { if(currentfilter == NONE) - return (u16*)GPU_screen; + return (u16*)srcBuffer; else return (u16*)filteredbuffer; } @@ -62,7 +63,7 @@ public: src.Height = 384; src.Width = 256; src.Pitch = 512; - src.Surface = (u8*)GPU_screen; + src.Surface = (u8*)srcBuffer; dst.Height = 768; dst.Width = 512;