win32: what's this..? is it..? could it be..? indeed, it is! a multicore optimization! now featuring all final display logic triple-buffered in another thread. offloads to another thread: rotation, color conversion, filter scaling, and directdraw buffer copy. this makes filter scaling effectively costless.
This commit is contained in:
parent
f95c64cc6a
commit
3f3e086d61
|
@ -400,6 +400,7 @@ public:
|
||||||
|
|
||||||
unsigned width() const { return m_rbuf.width(); }
|
unsigned width() const { return m_rbuf.width(); }
|
||||||
unsigned height() const { return m_rbuf.height(); }
|
unsigned height() const { return m_rbuf.height(); }
|
||||||
|
unsigned stride() const { return m_rbuf.stride(); }
|
||||||
|
|
||||||
// Conversions
|
// Conversions
|
||||||
//-----------------------
|
//-----------------------
|
||||||
|
|
|
@ -36,6 +36,7 @@ CommandLine::CommandLine()
|
||||||
{
|
{
|
||||||
load_slot = 0;
|
load_slot = 0;
|
||||||
arm9_gdb_port = arm7_gdb_port = 0;
|
arm9_gdb_port = arm7_gdb_port = 0;
|
||||||
|
single_core = 0;
|
||||||
start_paused = FALSE;
|
start_paused = FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -63,6 +64,9 @@ void CommandLine::loadCommonOptions()
|
||||||
{ "start-paused", 0, 0, G_OPTION_ARG_NONE, &start_paused, "Indicates that emulation should start paused", "START_PAUSED"},
|
{ "start-paused", 0, 0, G_OPTION_ARG_NONE, &start_paused, "Indicates that emulation should start paused", "START_PAUSED"},
|
||||||
{ "cflash-image", 0, 0, G_OPTION_ARG_FILENAME, &_cflash_image, "Requests cflash in gbaslot with fat image at this path", "CFLASH_IMAGE"},
|
{ "cflash-image", 0, 0, G_OPTION_ARG_FILENAME, &_cflash_image, "Requests cflash in gbaslot with fat image at this path", "CFLASH_IMAGE"},
|
||||||
{ "cflash-path", 0, 0, G_OPTION_ARG_FILENAME, &_cflash_path, "Requests cflash in gbaslot with filesystem rooted at this path", "CFLASH_PATH"},
|
{ "cflash-path", 0, 0, G_OPTION_ARG_FILENAME, &_cflash_path, "Requests cflash in gbaslot with filesystem rooted at this path", "CFLASH_PATH"},
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
{ "single-core", 0, 0, G_OPTION_ARG_NONE, &single_core, "Limit execution to use approximately only one core", "NUM_CORES"},
|
||||||
|
#endif
|
||||||
#ifdef GDB_STUB
|
#ifdef GDB_STUB
|
||||||
{ "arm9gdb", 0, 0, G_OPTION_ARG_INT, &arm9_gdb_port, "Enable the ARM9 GDB stub on the given port", "PORT_NUM"},
|
{ "arm9gdb", 0, 0, G_OPTION_ARG_INT, &arm9_gdb_port, "Enable the ARM9 GDB stub on the given port", "PORT_NUM"},
|
||||||
{ "arm7gdb", 0, 0, G_OPTION_ARG_INT, &arm7_gdb_port, "Enable the ARM7 GDB stub on the given port", "PORT_NUM"},
|
{ "arm7gdb", 0, 0, G_OPTION_ARG_INT, &arm7_gdb_port, "Enable the ARM7 GDB stub on the given port", "PORT_NUM"},
|
||||||
|
|
|
@ -41,6 +41,7 @@ public:
|
||||||
std::string play_movie_file;
|
std::string play_movie_file;
|
||||||
std::string record_movie_file;
|
std::string record_movie_file;
|
||||||
int arm9_gdb_port, arm7_gdb_port;
|
int arm9_gdb_port, arm7_gdb_port;
|
||||||
|
int single_core;
|
||||||
int start_paused;
|
int start_paused;
|
||||||
std::string cflash_image;
|
std::string cflash_image;
|
||||||
std::string cflash_path;
|
std::string cflash_path;
|
||||||
|
|
|
@ -715,7 +715,8 @@ template<typename T, int bpp> static void doRotate(void* dst)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Display()
|
//the directdraw final presentation portion of display, including rotating
|
||||||
|
static void DD_DoDisplay()
|
||||||
{
|
{
|
||||||
int res;
|
int res;
|
||||||
memset(&ddsd, 0, sizeof(ddsd));
|
memset(&ddsd, 0, sizeof(ddsd));
|
||||||
|
@ -785,6 +786,94 @@ void Display()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//tripple buffering logic
|
||||||
|
u16 displayBuffers[3][256*192*4];
|
||||||
|
int currDisplayBuffer=-1;
|
||||||
|
int newestDisplayBuffer=-2;
|
||||||
|
GMutex *display_mutex = NULL;
|
||||||
|
GThread *display_thread = NULL;
|
||||||
|
|
||||||
|
//does a single display work unit. only to be used from the display thread
|
||||||
|
static void DoDisplay()
|
||||||
|
{
|
||||||
|
osd->update();
|
||||||
|
DrawHUD();
|
||||||
|
video.filter();
|
||||||
|
DD_DoDisplay();
|
||||||
|
osd->clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
void displayProc()
|
||||||
|
{
|
||||||
|
g_mutex_lock(display_mutex);
|
||||||
|
|
||||||
|
//find a buffer to display
|
||||||
|
int todo = newestDisplayBuffer;
|
||||||
|
bool alreadyDisplayed = (todo == currDisplayBuffer);
|
||||||
|
|
||||||
|
g_mutex_unlock(display_mutex);
|
||||||
|
|
||||||
|
//nothing to display. give up.
|
||||||
|
if(alreadyDisplayed) return;
|
||||||
|
|
||||||
|
//start displaying a new buffer
|
||||||
|
currDisplayBuffer = todo;
|
||||||
|
|
||||||
|
video.srcBuffer = (u8*)displayBuffers[currDisplayBuffer];
|
||||||
|
|
||||||
|
aggDraw.hud->attach(video.srcBuffer, 256, 384, 512);
|
||||||
|
|
||||||
|
DoDisplay();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void displayThread(void*)
|
||||||
|
{
|
||||||
|
for(;;) {
|
||||||
|
displayProc();
|
||||||
|
Sleep(10); //don't be greedy and use a whole cpu core, but leave room for 60fps
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Display()
|
||||||
|
{
|
||||||
|
if(display_thread == NULL)
|
||||||
|
{
|
||||||
|
display_mutex = g_mutex_new();
|
||||||
|
display_thread = g_thread_create( (GThreadFunc)displayThread,
|
||||||
|
NULL,
|
||||||
|
TRUE,
|
||||||
|
NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
g_mutex_lock(display_mutex);
|
||||||
|
|
||||||
|
//huh... i wonder if there is a less ugly way to do this
|
||||||
|
if(currDisplayBuffer == 0)
|
||||||
|
if(newestDisplayBuffer == 1)
|
||||||
|
newestDisplayBuffer = 2;
|
||||||
|
else newestDisplayBuffer = 1;
|
||||||
|
else if(currDisplayBuffer == 1)
|
||||||
|
if(newestDisplayBuffer == 2)
|
||||||
|
newestDisplayBuffer = 0;
|
||||||
|
else newestDisplayBuffer = 2;
|
||||||
|
else //if(currDisplayBuffer == 1)
|
||||||
|
if(newestDisplayBuffer == 0)
|
||||||
|
newestDisplayBuffer = 1;
|
||||||
|
else newestDisplayBuffer = 0;
|
||||||
|
|
||||||
|
memcpy(displayBuffers[newestDisplayBuffer],GPU_screen,256*192*4);
|
||||||
|
|
||||||
|
g_mutex_unlock(display_mutex);
|
||||||
|
|
||||||
|
//the no-multithreading codepath
|
||||||
|
//but based on my research, this runs just fine on a single core system due to the generous
|
||||||
|
//sleep in the display loop
|
||||||
|
//video.srcBuffer = (u8*)GPU_screen;
|
||||||
|
//doDisplay();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void CheckMessages()
|
void CheckMessages()
|
||||||
{
|
{
|
||||||
MSG msg;
|
MSG msg;
|
||||||
|
@ -887,11 +976,7 @@ DWORD WINAPI run()
|
||||||
Hud.fps = fps;
|
Hud.fps = fps;
|
||||||
Hud.fps3d = fps3d;
|
Hud.fps3d = fps3d;
|
||||||
|
|
||||||
osd->update();
|
|
||||||
DrawHUD();
|
|
||||||
video.filter();
|
|
||||||
Display();
|
Display();
|
||||||
osd->clear();
|
|
||||||
|
|
||||||
gfx3d.frameCtrRaw++;
|
gfx3d.frameCtrRaw++;
|
||||||
if(gfx3d.frameCtrRaw == 60) {
|
if(gfx3d.frameCtrRaw == 60) {
|
||||||
|
@ -1411,6 +1496,9 @@ int _main()
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(cmdline.single_core)
|
||||||
|
SetProcessAffinityMask(GetCurrentProcess(),1);
|
||||||
|
|
||||||
//sprintf(text, "%s", DESMUME_NAME_AND_VERSION);
|
//sprintf(text, "%s", DESMUME_NAME_AND_VERSION);
|
||||||
MainWindow = new WINCLASS(CLASSNAME, hAppInst);
|
MainWindow = new WINCLASS(CLASSNAME, hAppInst);
|
||||||
DWORD dwStyle = WS_CAPTION| WS_SYSMENU | WS_SIZEBOX | WS_MINIMIZEBOX | WS_CLIPCHILDREN | WS_CLIPSIBLINGS;
|
DWORD dwStyle = WS_CAPTION| WS_SYSMENU | WS_SIZEBOX | WS_MINIMIZEBOX | WS_CLIPCHILDREN | WS_CLIPSIBLINGS;
|
||||||
|
@ -2651,9 +2739,7 @@ LRESULT CALLBACK WindowProcedure (HWND hwnd, UINT message, WPARAM wParam, LPARAM
|
||||||
|
|
||||||
hdc = BeginPaint(hwnd, &ps);
|
hdc = BeginPaint(hwnd, &ps);
|
||||||
|
|
||||||
osd->update();
|
|
||||||
Display();
|
Display();
|
||||||
osd->clear();
|
|
||||||
|
|
||||||
EndPaint(hwnd, &ps);
|
EndPaint(hwnd, &ps);
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,6 +12,7 @@ public:
|
||||||
|
|
||||||
int currentfilter;
|
int currentfilter;
|
||||||
|
|
||||||
|
u8* srcBuffer;
|
||||||
CACHE_ALIGN u8 filteredbuffer[4*256*192*4];
|
CACHE_ALIGN u8 filteredbuffer[4*256*192*4];
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
|
@ -53,7 +54,7 @@ public:
|
||||||
u16* finalBuffer() const
|
u16* finalBuffer() const
|
||||||
{
|
{
|
||||||
if(currentfilter == NONE)
|
if(currentfilter == NONE)
|
||||||
return (u16*)GPU_screen;
|
return (u16*)srcBuffer;
|
||||||
else return (u16*)filteredbuffer;
|
else return (u16*)filteredbuffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -62,7 +63,7 @@ public:
|
||||||
src.Height = 384;
|
src.Height = 384;
|
||||||
src.Width = 256;
|
src.Width = 256;
|
||||||
src.Pitch = 512;
|
src.Pitch = 512;
|
||||||
src.Surface = (u8*)GPU_screen;
|
src.Surface = (u8*)srcBuffer;
|
||||||
|
|
||||||
dst.Height = 768;
|
dst.Height = 768;
|
||||||
dst.Width = 512;
|
dst.Width = 512;
|
||||||
|
|
Loading…
Reference in New Issue