SleepPrecise for more accurate sleeps

We can use it to reduce the amount of spin waiting which burns CPU
Try to sleep as much as we can get away with without overshooting, and then spin wait
Use it for
- VBlank timing
- Frame limiter timing
This commit is contained in:
Anthony 2021-06-02 21:54:02 +12:00
parent 8087b157e5
commit b66f04d811
2 changed files with 63 additions and 53 deletions

View File

@ -2146,18 +2146,49 @@ static LRESULT WINAPI EmuMsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lPar
return S_OK; // = Is not part of D3D8 handling.
}
std::chrono::time_point<std::chrono::steady_clock, std::chrono::duration<double, std::nano>> GetNextVBlankTime()
// More precise sleep, but with increased CPU usage
void SleepPrecise(std::chrono::steady_clock::time_point targetTime)
{
using namespace std::chrono;
// If we don't need to wait, return right away
// TODO use waitable timers?
// TODO fetch the timer resolution to determine the sleep threshold?
// TODO adaptive wait? https://blat-blatnik.github.io/computerBear/making-accurate-sleep-function/
// Try to sleep for as much of the wait as we can
// to save CPU usage / power
// We expect sleep to overshoot, so give ourselves some extra time
// Note currently we ask Windows to give us 1ms timer resolution
constexpr auto sleepThreshold = 2ms; // Minimum remaining time before we attempt to use sleep
auto sleepFor = (targetTime - sleepThreshold) - steady_clock::now();
auto sleepMs = duration_cast<milliseconds>(sleepFor).count();
// Sleep if required
if (sleepMs >= 0) {
Sleep((DWORD)sleepMs);
}
// Spin wait
while (steady_clock::now() < targetTime) {
;
}
}
std::chrono::steady_clock::time_point GetNextVBlankTime()
{
using namespace std::chrono;
// TODO: Read display frequency from Xbox Display Adapter
// This is accessed by calling CMiniport::GetRefreshRate();
// This reads from the structure located at CMinpPort::m_CurrentAvInfo
// This will require at least Direct3D_CreateDevice being unpatched
// otherwise, m_CurrentAvInfo will never be initialised!
// 20ms should be used in the case of 50hz
return std::chrono::steady_clock::now() + 16.6666666667ms;
auto ms = 16.6666666667ms;
return steady_clock::now() + duration_cast<steady_clock::duration>(ms);
}
// timing thread procedure
static DWORD WINAPI EmuUpdateTickCount(LPVOID)
{
@ -2168,51 +2199,43 @@ static DWORD WINAPI EmuUpdateTickCount(LPVOID)
EmuLog(LOG_LEVEL::DEBUG, "Timing thread is running.");
// current vertical blank count
int curvb = 0;
// We check for LLE flag as NV2A handles it's own VBLANK if LLE is enabled!
if (bLLE_GPU) return 0;
// Calculate Next VBlank time
auto nextVBlankTime = GetNextVBlankTime();
while(true)
{
SwitchToThread();
// If VBlank Interval has passed, trigger VBlank callback
// Wait for VBlank
// Note: This whole code block can be removed once NV2A interrupts are implemented
// And Both Swap and Present can be ran unpatched
// Once that is in place, MiniPort + Direct3D will handle this on it's own!
// We check for LLE flag as NV2A handles it's own VBLANK if LLE is enabled!
if (!(bLLE_GPU) && std::chrono::steady_clock::now() > nextVBlankTime)
{
nextVBlankTime = GetNextVBlankTime();
SleepPrecise(nextVBlankTime);
nextVBlankTime = GetNextVBlankTime();
// Increment the VBlank Counter and Wake all threads there were waiting for the VBlank to occur
std::unique_lock<std::mutex> lk(g_VBConditionMutex);
g_Xbox_VBlankData.VBlank++;
g_VBConditionVariable.notify_all();
// Increment the VBlank Counter and Wake all threads there were waiting for the VBlank to occur
std::unique_lock<std::mutex> lk(g_VBConditionMutex);
g_Xbox_VBlankData.VBlank++;
g_VBConditionVariable.notify_all();
// TODO: Fixme. This may not be right...
g_Xbox_SwapData.SwapVBlank = 1;
// TODO: Fixme. This may not be right...
g_Xbox_SwapData.SwapVBlank = 1;
if(g_pXbox_VerticalBlankCallback != xbox::zeroptr)
{
g_pXbox_VerticalBlankCallback(&g_Xbox_VBlankData);
}
if(g_pXbox_VerticalBlankCallback != xbox::zeroptr)
{
g_pXbox_VerticalBlankCallback(&g_Xbox_VBlankData);
}
g_Xbox_VBlankData.Swap = 0;
g_Xbox_VBlankData.Swap = 0;
// TODO: This can't be accurate...
g_Xbox_SwapData.TimeUntilSwapVBlank = 0;
// TODO: This can't be accurate...
g_Xbox_SwapData.TimeUntilSwapVBlank = 0;
// TODO: Recalculate this for PAL version if necessary.
// Also, we should check the D3DPRESENT_INTERVAL value for accurracy.
// g_Xbox_SwapData.TimeBetweenSwapVBlanks = 1/60;
g_Xbox_SwapData.TimeBetweenSwapVBlanks = 0;
}
}
// TODO: Recalculate this for PAL version if necessary.
// Also, we should check the D3DPRESENT_INTERVAL value for accurracy.
// g_Xbox_SwapData.TimeBetweenSwapVBlanks = 1/60;
g_Xbox_SwapData.TimeBetweenSwapVBlanks = 0;
}
}
void UpdateDepthStencilFlags(IDirect3DSurface *pDepthStencilSurface)
@ -5171,7 +5194,7 @@ xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_Present)
EMUPATCH(D3DDevice_Swap)(CXBX_SWAP_PRESENT_FORWARD); // Xbox present ignores
}
std::chrono::time_point<std::chrono::steady_clock> frameStartTime;
std::chrono::steady_clock::time_point frameStartTime;
// LTCG specific swap function...
// This uses a custom calling convention where parameter is passed in EAX
@ -5478,24 +5501,11 @@ xbox::dword_xt WINAPI xbox::EMUPATCH(D3DDevice_Swap)
break;
}
auto targetDuration = std::chrono::duration<double, std::milli>(((1000.0f / targetRefreshRate) * multiplier));
// Wait until it's time for the next frame
auto frameMs = (1000.0 / targetRefreshRate) * multiplier;
auto targetDuration = std::chrono::duration_cast<std::chrono::steady_clock::duration>(std::chrono::duration<double, std::milli>(frameMs));
auto targetTimestamp = frameStartTime + targetDuration;
auto actualDuration = std::chrono::duration<double, std::milli>(std::chrono::steady_clock::now() - frameStartTime);
auto startTimeAjustment = actualDuration - targetDuration;
// Only enter the wait loop if the frame took too long
if (actualDuration < targetDuration) {
// If we need to wait for a larger amount of time (>= 1 frame at 60FPS), we can just sleep
if ((targetTimestamp - std::chrono::steady_clock::now()) > std::chrono::duration<double, std::milli>(16.0)) {
std::this_thread::sleep_until(targetTimestamp);
} else {
// Otherwise, we fall-through and just keep polling
// This prevents large waits from hogging CPU power, but allows small waits/ to remain precice.
while (std::chrono::steady_clock::now() < targetTimestamp) {
;
}
}
}
SleepPrecise(targetTimestamp);
}
frameStartTime = std::chrono::steady_clock::now();

View File

@ -322,7 +322,7 @@ const NV2ABlockInfo* EmuNV2A_Block(xbox::addr_xt addr)
// HACK: Until we implement VGA/proper interrupt generation
// we simulate VBLANK by calling the interrupt at 60Hz
std::thread vblank_thread;
extern std::chrono::time_point<std::chrono::steady_clock, std::chrono::duration<double, std::nano>> GetNextVBlankTime();
extern std::chrono::steady_clock::time_point GetNextVBlankTime();
void _check_gl_reset()
{