forked from ShuriZma/suyu
1
0
Fork 0

x64: cpu_wait: Make use of MWAITX in MicroSleep

MWAITX is equivalent to UMWAIT on Intel's Alder Lake CPUs.
We can emulate TPAUSE by using MONITORX in conjunction with MWAITX to wait for 100K cycles.
This commit is contained in:
Morph 2023-06-28 01:07:59 -04:00
parent 4303ed614d
commit 3d868baaa4
1 changed files with 21 additions and 12 deletions

View File

@ -13,24 +13,30 @@
namespace Common::X64 {
namespace {
// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
// For reference:
// At 1 GHz, 100K cycles is 100us
// At 2 GHz, 100K cycles is 50us
// At 4 GHz, 100K cycles is 25us
constexpr auto PauseCycles = 100'000U;
} // Anonymous namespace
#ifdef _MSC_VER
__forceinline static void TPAUSE() {
// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
// For reference:
// At 1 GHz, 100K cycles is 100us
// At 2 GHz, 100K cycles is 50us
// At 4 GHz, 100K cycles is 25us
static constexpr auto PauseCycles = 100'000;
_tpause(0, FencedRDTSC() + PauseCycles);
}
__forceinline static void MWAITX() {
// monitor_var should be aligned to a cache line.
alignas(64) u64 monitor_var{};
_mm_monitorx(&monitor_var, 0, 0);
_mm_mwaitx(/* extensions*/ 2, /* hints */ 0, /* cycles */ PauseCycles);
}
#else
static void TPAUSE() {
// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
// For reference:
// At 1 GHz, 100K cycles is 100us
// At 2 GHz, 100K cycles is 50us
// At 4 GHz, 100K cycles is 25us
static constexpr auto PauseCycles = 100'000;
const auto tsc = FencedRDTSC() + PauseCycles;
const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF);
const auto edx = static_cast<u32>(tsc >> 32);
@ -40,9 +46,12 @@ static void TPAUSE() {
void MicroSleep() {
static const bool has_waitpkg = GetCPUCaps().waitpkg;
static const bool has_monitorx = GetCPUCaps().monitorx;
if (has_waitpkg) {
TPAUSE();
} else if (has_monitorx) {
MWAITX();
} else {
std::this_thread::yield();
}