diff --git a/src/common/Timer.cpp b/src/common/Timer.cpp index e7acccc96..86b705d3d 100644 --- a/src/common/Timer.cpp +++ b/src/common/Timer.cpp @@ -38,6 +38,35 @@ #include #endif +// More precise sleep, but with increased CPU usage +void SleepPrecise(std::chrono::steady_clock::time_point targetTime) +{ + using namespace std::chrono; + // If we don't need to wait, return right away + + // TODO use waitable timers? + // TODO fetch the timer resolution to determine the sleep threshold? + // TODO adaptive wait? https://blat-blatnik.github.io/computerBear/making-accurate-sleep-function/ + + // Try to sleep for as much of the wait as we can + // to save CPU usage / power + // We expect sleep to overshoot, so give ourselves some extra time + // Note currently we ask Windows to give us 1ms timer resolution + constexpr auto sleepThreshold = 2ms; // Minimum remaining time before we attempt to use sleep + + auto sleepFor = (targetTime - sleepThreshold) - steady_clock::now(); + auto sleepMs = duration_cast(sleepFor).count(); + + // Sleep if required + if (sleepMs >= 0) { + Sleep((DWORD)sleepMs); + } + + // Spin wait + while (steady_clock::now() < targetTime) { + ; + } +} // Virtual clocks will probably become useful once LLE CPU is implemented, but for now we don't need them. // See the QEMUClockType QEMU_CLOCK_VIRTUAL of XQEMU for more info. diff --git a/src/common/Timer.h b/src/common/Timer.h index 21d742021..bc0f7494c 100644 --- a/src/common/Timer.h +++ b/src/common/Timer.h @@ -64,6 +64,8 @@ void Timer_ChangeExpireTime(TimerObject* Timer, uint64_t Expire_ms); uint64_t GetTime_NS(TimerObject* Timer); void Timer_Init(); +void SleepPrecise(std::chrono::steady_clock::time_point targetTime); + // A stateful replacement for QueryPerformanceCounter, ticking at an arbitrary frequency // Thread-safe and designed to avoid overflows at all cost class ScaledPerformanceCounter diff --git a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp index 1f3916580..adaa7eb70 100644 --- a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp +++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp @@ -61,6 +61,7 @@ #include "common\input\InputManager.h" #include "common/util/strConverter.hpp" // for utf8_to_utf16 #include "VertexShaderSource.h" +#include "Timer.h" #include #include @@ -2146,18 +2147,19 @@ static LRESULT WINAPI EmuMsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lPar return S_OK; // = Is not part of D3D8 handling. } -std::chrono::time_point> GetNextVBlankTime() +std::chrono::steady_clock::time_point GetNextVBlankTime() { + using namespace std::chrono; // TODO: Read display frequency from Xbox Display Adapter // This is accessed by calling CMiniport::GetRefreshRate(); // This reads from the structure located at CMinpPort::m_CurrentAvInfo // This will require at least Direct3D_CreateDevice being unpatched // otherwise, m_CurrentAvInfo will never be initialised! // 20ms should be used in the case of 50hz - return std::chrono::steady_clock::now() + 16.6666666667ms; + auto ms = 16.6666666667ms; + return steady_clock::now() + duration_cast(ms); } - // timing thread procedure static DWORD WINAPI EmuUpdateTickCount(LPVOID) { @@ -2168,51 +2170,45 @@ static DWORD WINAPI EmuUpdateTickCount(LPVOID) EmuLog(LOG_LEVEL::DEBUG, "Timing thread is running."); - // current vertical blank count - int curvb = 0; + // We check for LLE flag as NV2A handles it's own VBLANK if LLE is enabled! + if (bLLE_GPU) { + return 0; + } - // Calculate Next VBlank time auto nextVBlankTime = GetNextVBlankTime(); while(true) { - SwitchToThread(); - - // If VBlank Interval has passed, trigger VBlank callback + // Wait for VBlank // Note: This whole code block can be removed once NV2A interrupts are implemented // And Both Swap and Present can be ran unpatched // Once that is in place, MiniPort + Direct3D will handle this on it's own! - // We check for LLE flag as NV2A handles it's own VBLANK if LLE is enabled! - if (!(bLLE_GPU) && std::chrono::steady_clock::now() > nextVBlankTime) - { - nextVBlankTime = GetNextVBlankTime(); + SleepPrecise(nextVBlankTime); + nextVBlankTime = GetNextVBlankTime(); - // Increment the VBlank Counter and Wake all threads there were waiting for the VBlank to occur - std::unique_lock lk(g_VBConditionMutex); - g_Xbox_VBlankData.VBlank++; - g_VBConditionVariable.notify_all(); + // Increment the VBlank Counter and Wake all threads there were waiting for the VBlank to occur + std::unique_lock lk(g_VBConditionMutex); + g_Xbox_VBlankData.VBlank++; + g_VBConditionVariable.notify_all(); - // TODO: Fixme. This may not be right... - g_Xbox_SwapData.SwapVBlank = 1; + // TODO: Fixme. This may not be right... + g_Xbox_SwapData.SwapVBlank = 1; - if(g_pXbox_VerticalBlankCallback != xbox::zeroptr) - { - - g_pXbox_VerticalBlankCallback(&g_Xbox_VBlankData); - - } + if(g_pXbox_VerticalBlankCallback != xbox::zeroptr) + { + g_pXbox_VerticalBlankCallback(&g_Xbox_VBlankData); + } - g_Xbox_VBlankData.Swap = 0; + g_Xbox_VBlankData.Swap = 0; - // TODO: This can't be accurate... - g_Xbox_SwapData.TimeUntilSwapVBlank = 0; + // TODO: This can't be accurate... + g_Xbox_SwapData.TimeUntilSwapVBlank = 0; - // TODO: Recalculate this for PAL version if necessary. - // Also, we should check the D3DPRESENT_INTERVAL value for accurracy. - // g_Xbox_SwapData.TimeBetweenSwapVBlanks = 1/60; - g_Xbox_SwapData.TimeBetweenSwapVBlanks = 0; - } - } + // TODO: Recalculate this for PAL version if necessary. + // Also, we should check the D3DPRESENT_INTERVAL value for accurracy. + // g_Xbox_SwapData.TimeBetweenSwapVBlanks = 1/60; + g_Xbox_SwapData.TimeBetweenSwapVBlanks = 0; + } } void UpdateDepthStencilFlags(IDirect3DSurface *pDepthStencilSurface) @@ -5171,7 +5167,7 @@ xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_Present) EMUPATCH(D3DDevice_Swap)(CXBX_SWAP_PRESENT_FORWARD); // Xbox present ignores } -std::chrono::time_point frameStartTime; +std::chrono::steady_clock::time_point frameStartTime; // LTCG specific swap function... // This uses a custom calling convention where parameter is passed in EAX @@ -5478,24 +5474,11 @@ xbox::dword_xt WINAPI xbox::EMUPATCH(D3DDevice_Swap) break; } - auto targetDuration = std::chrono::duration(((1000.0f / targetRefreshRate) * multiplier)); + // Wait until it's time for the next frame + auto frameMs = (1000.0 / targetRefreshRate) * multiplier; + auto targetDuration = std::chrono::duration_cast(std::chrono::duration(frameMs)); auto targetTimestamp = frameStartTime + targetDuration; - auto actualDuration = std::chrono::duration(std::chrono::steady_clock::now() - frameStartTime); - auto startTimeAjustment = actualDuration - targetDuration; - - // Only enter the wait loop if the frame took too long - if (actualDuration < targetDuration) { - // If we need to wait for a larger amount of time (>= 1 frame at 60FPS), we can just sleep - if ((targetTimestamp - std::chrono::steady_clock::now()) > std::chrono::duration(16.0)) { - std::this_thread::sleep_until(targetTimestamp); - } else { - // Otherwise, we fall-through and just keep polling - // This prevents large waits from hogging CPU power, but allows small waits/ to remain precice. - while (std::chrono::steady_clock::now() < targetTimestamp) { - ; - } - } - } + SleepPrecise(targetTimestamp); } frameStartTime = std::chrono::steady_clock::now(); diff --git a/src/devices/video/nv2a.cpp b/src/devices/video/nv2a.cpp index 9ca04ef02..cc7baeeaf 100644 --- a/src/devices/video/nv2a.cpp +++ b/src/devices/video/nv2a.cpp @@ -322,7 +322,7 @@ const NV2ABlockInfo* EmuNV2A_Block(xbox::addr_xt addr) // HACK: Until we implement VGA/proper interrupt generation // we simulate VBLANK by calling the interrupt at 60Hz std::thread vblank_thread; -extern std::chrono::time_point> GetNextVBlankTime(); +extern std::chrono::steady_clock::time_point GetNextVBlankTime(); void _check_gl_reset() {