diff --git a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp index 8da387839..f1ac7a239 100644 --- a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp +++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp @@ -129,6 +129,18 @@ struct { D3DCOLOR ColorKey; } g_OverlayProxy; +typedef struct { + // Arguments to D3DDevice_InsertCallback : + XTL::X_D3DCALLBACK pCallback; + XTL::X_D3DCALLBACKTYPE Type; + XTL::DWORD Context; +} s_Xbox_Callback; + +static std::queue g_Xbox_CallbackQueue; +static bool g_bHack_DisableHostGPUQueries = false; // TODO : Make configurable +static IDirect3DQuery *g_pHostQueryWaitForIdle = nullptr; +static IDirect3DQuery *g_pHostQueryCallbackEvent = nullptr; + static std::condition_variable g_VBConditionVariable; // Used in BlockUntilVerticalBlank static std::mutex g_VBConditionMutex; // Used in BlockUntilVerticalBlank static DWORD g_VBLastSwap = 0; @@ -139,9 +151,6 @@ static XTL::X_D3DSWAPDATA g_Xbox_SwapData = {0}; // current swap information static XTL::X_D3DSWAPCALLBACK g_pXbox_SwapCallback = xbnullptr; // Swap/Present callback routine static XTL::X_D3DVBLANKDATA g_Xbox_VBlankData = {0}; // current vertical blank information static XTL::X_D3DVBLANKCALLBACK g_pXbox_VerticalBlankCallback = xbnullptr; // Vertical-Blank callback routine -static XTL::X_D3DCALLBACK g_pXbox_Callback = xbnullptr; // D3DDevice::InsertCallback routine -static XTL::X_D3DCALLBACKTYPE g_Xbox_Callback_Type; // Callback type -static XTL::DWORD g_Xbox_Callback_Context; // Callback param XTL::X_D3DSurface *g_pXbox_BackBufferSurface = xbnullptr; static XTL::X_D3DSurface *g_pXbox_DefaultDepthStencilSurface = xbnullptr; @@ -2295,6 +2304,22 @@ static DWORD WINAPI EmuCreateDeviceProxy(LPVOID) g_pD3DDevice->GetDepthStencilSurface(&g_pDefaultHostDepthBufferSurface); UpdateDepthStencilFlags(g_pDefaultHostDepthBufferSurface); + // Can host driver create event queries? + if (SUCCEEDED(g_pD3DDevice->CreateQuery(D3DQUERYTYPE_EVENT, nullptr))) { + // Is host GPU query creation enabled? + if (!g_bHack_DisableHostGPUQueries) { + // Create a D3D event query to handle "wait-for-idle" with + hRet = g_pD3DDevice->CreateQuery(D3DQUERYTYPE_EVENT, &g_pHostQueryWaitForIdle); + DEBUG_D3DRESULT(hRet, "g_pD3DDevice->CreateQuery (wait for idle)"); + + // Create a D3D event query to handle "callback events" with + hRet = g_pD3DDevice->CreateQuery(D3DQUERYTYPE_EVENT, &g_pHostQueryCallbackEvent); + DEBUG_D3DRESULT(hRet, "g_pD3DDevice->CreateQuery (callback event)"); + } + } else { + LOG_TEST_CASE("Can't CreateQuery on host!"); + } + hRet = g_pD3DDevice->CreateVertexBuffer ( 1, 0, 0, D3DPOOL_MANAGED, @@ -6877,6 +6902,46 @@ void CxbxUpdateNativeD3DResources() */ } +// This function should be called in thight idle-wait loops. +// It's purpose is to lower CPU cost in such a way that the +// caller will still repond quickly, without actually waiting +// or giving up it's time-slice. +// See https://docs.microsoft.com/en-us/windows/win32/api/winnt/nf-winnt-yieldprocessor +// and https://software.intel.com/en-us/cpp-compiler-developer-guide-and-reference-pause-intrinsic +inline void CxbxCPUIdleWait() // TODO : Apply wherever applicable +{ + YieldProcessor(); +} + +// This function indicates whether Cxbx can flush host GPU commands. +bool CxbxCanFlushHostGPU() +{ + return (g_pHostQueryWaitForIdle != nullptr); +} + +// Wait until host GPU finished processing it's command queue +bool CxbxFlushHostGPU() +{ + // The following can only work when host GPU queries are available + if (!CxbxCanFlushHostGPU()) { + // If we can't query host GPU, return failure + return false; + } + + // See https://docs.microsoft.com/en-us/windows/win32/direct3d9/queries + // Add an end marker to the command buffer queue. + // This, so that the next GetData will always have at least one + // final query event to flush out, after which GPU will be done. + g_pHostQueryWaitForIdle->Issue(D3DISSUE_END); + + // Empty the command buffer and wait until host GPU is idle. + while (S_FALSE == g_pHostQueryWaitForIdle->GetData(nullptr, 0, D3DGETDATA_FLUSH)) + CxbxCPUIdleWait(); + + // Signal caller that host GPU has been flushed + return true; +} + // This function mimicks NV2A software callback events. // Normally, these would be handled by actual push-buffer // command handling at the point where they where inserted. @@ -6884,13 +6949,42 @@ void CxbxUpdateNativeD3DResources() // this function has to be called after 'pushing' functions. void CxbxHandleXboxCallbacks() { - // Execute callback procedure - if (g_CallbackType == XTL::X_D3DCALLBACK_WRITE) { - if (g_pCallback) { - g_pCallback(g_CallbackParam); - // TODO: Reset pointer? + // The following can only work when host GPU queries are available + if (g_pHostQueryCallbackEvent != nullptr) { + // Query whether host GPU encountered a callback event already + if (S_FALSE == g_pHostQueryCallbackEvent->GetData(nullptr, 0, 0)) { + // If not, don't handle callbacks + return; } } + + // Process inserted callbacks + while (!g_Xbox_CallbackQueue.empty()) { + // Fetch a callback from the FIFO callback queue + s_Xbox_Callback XboxCallback = g_Xbox_CallbackQueue.front(); + g_Xbox_CallbackQueue.pop(); + + // Differentiate between write and read callbacks + if (XboxCallback.Type == XTL::X_D3DCALLBACK_WRITE) { + // Write callbacks should wait until GPU is idle + if (!CxbxFlushHostGPU()) { + // Host GPU can't be flushed. In the old behaviour, we made the callback anyway + // TODO : Should we keep doing that? + } + } else { + assert(XboxCallback.Type == XTL::X_D3DCALLBACK_READ); + // Should we mimick Read callback old behaviour? + if (g_bHack_DisableHostGPUQueries) { + // Note : Previously, we only processed Write, and ignored Read callbacks + continue; + } else { + // New behaviour does place Read callbacks too + } + } + + // Make the callback + XboxCallback.pCallback(XboxCallback.Context); + } } // On Xbox, this function inserts push-buffer commands that @@ -6916,9 +7010,25 @@ void CxbxImpl_InsertCallback return; } - g_pCallback = pCallback; - g_CallbackType = Type; - g_CallbackParam = Context; + // Should we mimick old behaviour? + if (g_bHack_DisableHostGPUQueries) { + // Mimick old behaviour, in which only the final callback event + // was remembered, by emptying the callback queue entirely : + while (!g_Xbox_CallbackQueue.empty()) { + g_Xbox_CallbackQueue.pop(); + } + } + + // Push this callback's arguments into the callback queue : + s_Xbox_Callback XboxCallback = { pCallback, Type, Context }; + g_Xbox_CallbackQueue.push(XboxCallback); // g_Xbox_CallbackQueue.emplace(pCallback, Type, Context); doesn't compile? + + // Does host supports GPU queries? + if (g_pHostQueryCallbackEvent != nullptr) { + // Insert a callback event on host GPU, + // which will be handled by CxbxHandleXboxCallback + g_pHostQueryCallbackEvent->Issue(D3DISSUE_END); + } } VOID __declspec(noinline) D3DDevice_SetPixelShaderCommon(DWORD Handle) diff --git a/src/core/hle/D3D8/XbD3D8Types.h b/src/core/hle/D3D8/XbD3D8Types.h index 34d763631..345cbe7d0 100644 --- a/src/core/hle/D3D8/XbD3D8Types.h +++ b/src/core/hle/D3D8/XbD3D8Types.h @@ -82,7 +82,7 @@ #define IDirect3DSurface IDirect3DSurface9 #define IDirect3DVolume IDirect3DVolume9 #define IDirect3DSwapChain IDirect3DSwapChain9 -#define IDirect3DQuery IDirect3DQuery9 // unused +#define IDirect3DQuery IDirect3DQuery9 namespace XTL {