win32: what's this..? is it..? could it be..? indeed, it is! a multicore optimization! now featuring all final display logic triple-buffered in another thread. offloads to another thread: rotation, color conversion, filter scaling, and directdraw buffer copy. this makes filter scaling effectively costless.
This commit is contained in:
parent
f95c64cc6a
commit
3f3e086d61
|
@ -400,6 +400,7 @@ public:
|
|||
|
||||
unsigned width() const { return m_rbuf.width(); }
|
||||
unsigned height() const { return m_rbuf.height(); }
|
||||
unsigned stride() const { return m_rbuf.stride(); }
|
||||
|
||||
// Conversions
|
||||
//-----------------------
|
||||
|
|
|
@ -36,6 +36,7 @@ CommandLine::CommandLine()
|
|||
{
|
||||
load_slot = 0;
|
||||
arm9_gdb_port = arm7_gdb_port = 0;
|
||||
single_core = 0;
|
||||
start_paused = FALSE;
|
||||
}
|
||||
|
||||
|
@ -63,6 +64,9 @@ void CommandLine::loadCommonOptions()
|
|||
{ "start-paused", 0, 0, G_OPTION_ARG_NONE, &start_paused, "Indicates that emulation should start paused", "START_PAUSED"},
|
||||
{ "cflash-image", 0, 0, G_OPTION_ARG_FILENAME, &_cflash_image, "Requests cflash in gbaslot with fat image at this path", "CFLASH_IMAGE"},
|
||||
{ "cflash-path", 0, 0, G_OPTION_ARG_FILENAME, &_cflash_path, "Requests cflash in gbaslot with filesystem rooted at this path", "CFLASH_PATH"},
|
||||
#ifdef _MSC_VER
|
||||
{ "single-core", 0, 0, G_OPTION_ARG_NONE, &single_core, "Limit execution to use approximately only one core", "NUM_CORES"},
|
||||
#endif
|
||||
#ifdef GDB_STUB
|
||||
{ "arm9gdb", 0, 0, G_OPTION_ARG_INT, &arm9_gdb_port, "Enable the ARM9 GDB stub on the given port", "PORT_NUM"},
|
||||
{ "arm7gdb", 0, 0, G_OPTION_ARG_INT, &arm7_gdb_port, "Enable the ARM7 GDB stub on the given port", "PORT_NUM"},
|
||||
|
|
|
@ -41,6 +41,7 @@ public:
|
|||
std::string play_movie_file;
|
||||
std::string record_movie_file;
|
||||
int arm9_gdb_port, arm7_gdb_port;
|
||||
int single_core;
|
||||
int start_paused;
|
||||
std::string cflash_image;
|
||||
std::string cflash_path;
|
||||
|
|
|
@ -715,7 +715,8 @@ template<typename T, int bpp> static void doRotate(void* dst)
|
|||
}
|
||||
}
|
||||
|
||||
void Display()
|
||||
//the directdraw final presentation portion of display, including rotating
|
||||
static void DD_DoDisplay()
|
||||
{
|
||||
int res;
|
||||
memset(&ddsd, 0, sizeof(ddsd));
|
||||
|
@ -785,6 +786,94 @@ void Display()
|
|||
}
|
||||
}
|
||||
|
||||
//tripple buffering logic
|
||||
u16 displayBuffers[3][256*192*4];
|
||||
int currDisplayBuffer=-1;
|
||||
int newestDisplayBuffer=-2;
|
||||
GMutex *display_mutex = NULL;
|
||||
GThread *display_thread = NULL;
|
||||
|
||||
//does a single display work unit. only to be used from the display thread
|
||||
static void DoDisplay()
|
||||
{
|
||||
osd->update();
|
||||
DrawHUD();
|
||||
video.filter();
|
||||
DD_DoDisplay();
|
||||
osd->clear();
|
||||
}
|
||||
|
||||
void displayProc()
|
||||
{
|
||||
g_mutex_lock(display_mutex);
|
||||
|
||||
//find a buffer to display
|
||||
int todo = newestDisplayBuffer;
|
||||
bool alreadyDisplayed = (todo == currDisplayBuffer);
|
||||
|
||||
g_mutex_unlock(display_mutex);
|
||||
|
||||
//nothing to display. give up.
|
||||
if(alreadyDisplayed) return;
|
||||
|
||||
//start displaying a new buffer
|
||||
currDisplayBuffer = todo;
|
||||
|
||||
video.srcBuffer = (u8*)displayBuffers[currDisplayBuffer];
|
||||
|
||||
aggDraw.hud->attach(video.srcBuffer, 256, 384, 512);
|
||||
|
||||
DoDisplay();
|
||||
}
|
||||
|
||||
|
||||
void displayThread(void*)
|
||||
{
|
||||
for(;;) {
|
||||
displayProc();
|
||||
Sleep(10); //don't be greedy and use a whole cpu core, but leave room for 60fps
|
||||
}
|
||||
}
|
||||
|
||||
void Display()
|
||||
{
|
||||
if(display_thread == NULL)
|
||||
{
|
||||
display_mutex = g_mutex_new();
|
||||
display_thread = g_thread_create( (GThreadFunc)displayThread,
|
||||
NULL,
|
||||
TRUE,
|
||||
NULL);
|
||||
}
|
||||
|
||||
g_mutex_lock(display_mutex);
|
||||
|
||||
//huh... i wonder if there is a less ugly way to do this
|
||||
if(currDisplayBuffer == 0)
|
||||
if(newestDisplayBuffer == 1)
|
||||
newestDisplayBuffer = 2;
|
||||
else newestDisplayBuffer = 1;
|
||||
else if(currDisplayBuffer == 1)
|
||||
if(newestDisplayBuffer == 2)
|
||||
newestDisplayBuffer = 0;
|
||||
else newestDisplayBuffer = 2;
|
||||
else //if(currDisplayBuffer == 1)
|
||||
if(newestDisplayBuffer == 0)
|
||||
newestDisplayBuffer = 1;
|
||||
else newestDisplayBuffer = 0;
|
||||
|
||||
memcpy(displayBuffers[newestDisplayBuffer],GPU_screen,256*192*4);
|
||||
|
||||
g_mutex_unlock(display_mutex);
|
||||
|
||||
//the no-multithreading codepath
|
||||
//but based on my research, this runs just fine on a single core system due to the generous
|
||||
//sleep in the display loop
|
||||
//video.srcBuffer = (u8*)GPU_screen;
|
||||
//doDisplay();
|
||||
}
|
||||
|
||||
|
||||
void CheckMessages()
|
||||
{
|
||||
MSG msg;
|
||||
|
@ -887,11 +976,7 @@ DWORD WINAPI run()
|
|||
Hud.fps = fps;
|
||||
Hud.fps3d = fps3d;
|
||||
|
||||
osd->update();
|
||||
DrawHUD();
|
||||
video.filter();
|
||||
Display();
|
||||
osd->clear();
|
||||
|
||||
gfx3d.frameCtrRaw++;
|
||||
if(gfx3d.frameCtrRaw == 60) {
|
||||
|
@ -1411,6 +1496,9 @@ int _main()
|
|||
return 1;
|
||||
}
|
||||
|
||||
if(cmdline.single_core)
|
||||
SetProcessAffinityMask(GetCurrentProcess(),1);
|
||||
|
||||
//sprintf(text, "%s", DESMUME_NAME_AND_VERSION);
|
||||
MainWindow = new WINCLASS(CLASSNAME, hAppInst);
|
||||
DWORD dwStyle = WS_CAPTION| WS_SYSMENU | WS_SIZEBOX | WS_MINIMIZEBOX | WS_CLIPCHILDREN | WS_CLIPSIBLINGS;
|
||||
|
@ -2651,9 +2739,7 @@ LRESULT CALLBACK WindowProcedure (HWND hwnd, UINT message, WPARAM wParam, LPARAM
|
|||
|
||||
hdc = BeginPaint(hwnd, &ps);
|
||||
|
||||
osd->update();
|
||||
Display();
|
||||
osd->clear();
|
||||
|
||||
EndPaint(hwnd, &ps);
|
||||
}
|
||||
|
|
|
@ -12,6 +12,7 @@ public:
|
|||
|
||||
int currentfilter;
|
||||
|
||||
u8* srcBuffer;
|
||||
CACHE_ALIGN u8 filteredbuffer[4*256*192*4];
|
||||
|
||||
enum {
|
||||
|
@ -53,7 +54,7 @@ public:
|
|||
u16* finalBuffer() const
|
||||
{
|
||||
if(currentfilter == NONE)
|
||||
return (u16*)GPU_screen;
|
||||
return (u16*)srcBuffer;
|
||||
else return (u16*)filteredbuffer;
|
||||
}
|
||||
|
||||
|
@ -62,7 +63,7 @@ public:
|
|||
src.Height = 384;
|
||||
src.Width = 256;
|
||||
src.Pitch = 512;
|
||||
src.Surface = (u8*)GPU_screen;
|
||||
src.Surface = (u8*)srcBuffer;
|
||||
|
||||
dst.Height = 768;
|
||||
dst.Width = 512;
|
||||
|
|
Loading…
Reference in New Issue