Use Win10 CPU Sets API where supported

Using CPU Sets instead of thread affinity allows to ensure
that the entire physical core is reserved ONLY for the game code,
so even the worker threads and third party libraries inside the process
cannot run code on it.

For Windows 7 and 8.1, the existing thread affinity approach is retained.
This commit is contained in:
Silent 2021-03-23 21:26:56 +01:00
parent 187f4a8162
commit 39c938f990
No known key found for this signature in database
GPG Key ID: AE53149BB0C45AF1
18 changed files with 246 additions and 64 deletions

View File

@ -102,9 +102,13 @@ void ClockThread(TimerObject* Timer)
if (!Timer->Name.empty()) {
CxbxSetThreadName(Timer->Name.c_str());
}
if (Timer->CpuAffinity != nullptr) {
InitXboxThread(*Timer->CpuAffinity);
if (Timer->IsXboxTimer) {
InitXboxThread();
g_AffinityPolicy->SetAffinityXbox();
} else {
g_AffinityPolicy->SetAffinityOther();
}
NewExpireTime = GetNextExpireTime(Timer);
while (true) {
@ -133,7 +137,7 @@ void Timer_Exit(TimerObject* Timer)
}
// Allocates the memory for the timer object
TimerObject* Timer_Create(TimerCB Callback, void* Arg, std::string Name, unsigned long* Affinity)
TimerObject* Timer_Create(TimerCB Callback, void* Arg, std::string Name, bool IsXboxTimer)
{
std::lock_guard<std::mutex>lock(TimerMtx);
TimerObject* pTimer = new TimerObject;
@ -142,8 +146,8 @@ TimerObject* Timer_Create(TimerCB Callback, void* Arg, std::string Name, unsigne
pTimer->ExpireTime_MS.store(0);
pTimer->Exit.store(false);
pTimer->Opaque = Arg;
Name.empty() ? pTimer->Name = "Unnamed thread" : pTimer->Name = Name;
pTimer->CpuAffinity = Affinity;
pTimer->Name = Name.empty() ? "Unnamed thread" : std::move(Name);
pTimer->IsXboxTimer = IsXboxTimer;
TimerList.emplace_back(pTimer);
return pTimer;

View File

@ -50,14 +50,14 @@ typedef struct _TimerObject
TimerCB Callback; // function to call when the timer expires
void* Opaque; // opaque argument to pass to the callback
std::string Name; // the name of the timer thread (if any)
unsigned long* CpuAffinity; // the cpu affinity of the timer thread (if any)
bool IsXboxTimer; // indicates that the timer should run on the Xbox CPU
}
TimerObject;
extern uint64_t HostClockFrequency;
/* Timer exported functions */
TimerObject* Timer_Create(TimerCB Callback, void* Arg, std::string Name, unsigned long* Affinity);
TimerObject* Timer_Create(TimerCB Callback, void* Arg, std::string Name, bool IsXboxTimer);
void Timer_Start(TimerObject* Timer, uint64_t Expire_MS);
void Timer_Exit(TimerObject* Timer);
void Timer_ChangeExpireTime(TimerObject* Timer, uint64_t Expire_ms);

View File

@ -80,6 +80,12 @@ void InputDeviceManager::Initialize(bool is_gui, HWND hwnd)
m_hwnd = hwnd;
m_PollingThread = std::thread([this, is_gui]() {
// This code can run in both cxbx.exe and cxbxr-ldr.exe, but will not have
// the affinity policy when running in the former.
if (g_AffinityPolicy) {
g_AffinityPolicy->SetAffinityOther();
}
XInput::Init(m_Mtx);
Sdl::Init(m_Mtx, m_Cv, is_gui);
});

View File

@ -84,8 +84,6 @@ namespace Sdl
UpdateInputEvent_t = CustomEvent_t + 2;
DeviceRemoveAck_t = CustomEvent_t + 3;
SetThreadAffinityMask(GetCurrentThread(), g_CPUOthers);
// Drain all joystick add/remove events to avoid creating duplicated
// devices when we call PopulateDevices
while (SDL_PollEvent(&Event))

View File

@ -23,9 +23,22 @@
// *
// ******************************************************************
// Override _WIN32_WINNT for this .cpp file to gain access to the CPU Sets API
#undef _WIN32_WINNT
#define _WIN32_WINNT _WIN32_WINNT_WIN10
#include <windows.h>
#include "Threads.h"
#include "core/kernel/support/Emu.h"
#include "core/kernel/init/CxbxKrnl.h"
#include <processthreadsapi.h>
#include <vector>
#include <set>
std::unique_ptr<AffinityPolicy> g_AffinityPolicy;
// Exception structure and method from:
// https://msdn.microsoft.com/en-us/library/xcb2z8hs.aspx
@ -74,3 +87,176 @@ void SetCurrentThreadName(const char* szThreadName)
{
SetThreadName(GetCurrentThreadId(), szThreadName);
}
// Windows 10 affinity policy - uses CPU sets to pin threads accordingly
class Win10Policy final : public AffinityPolicy
{
public:
bool Initialize() {
HMODULE kernel32 = GetModuleHandleW(L"kernel32");
auto getSystemCpuSetInformation = reinterpret_cast<decltype(GetSystemCpuSetInformation)*>(GetProcAddress(kernel32, "GetSystemCpuSetInformation"));
auto setProcessDefaultCpuSets = reinterpret_cast<decltype(SetProcessDefaultCpuSets)*>(GetProcAddress(kernel32, "SetProcessDefaultCpuSets"));
m_setThreadSelectedCpuSets = reinterpret_cast<decltype(SetThreadSelectedCpuSets)*>(GetProcAddress(kernel32, "SetThreadSelectedCpuSets"));
// Those functions are available only in Windows 10, so if bail out if they don't exist
if (getSystemCpuSetInformation == nullptr || setProcessDefaultCpuSets == nullptr || m_setThreadSelectedCpuSets == nullptr) {
return false;
}
const HANDLE process = GetCurrentProcess();
ULONG bufSize;
getSystemCpuSetInformation(nullptr, 0, &bufSize, process, 0);
auto buffer = std::make_unique<uint8_t[]>(bufSize);
if (!getSystemCpuSetInformation(reinterpret_cast<PSYSTEM_CPU_SET_INFORMATION>(buffer.get()), bufSize, &bufSize, process, 0)) {
return false;
}
// SYSTEM_CPU_SET_INFORMATION is a variable length structure and may be expanded in the future,
// so "real" pointers to elements need to be calculated and filtered
std::vector<const decltype(SYSTEM_CPU_SET_INFORMATION::CpuSet)*> cpuSets;
const uint8_t* ptr = buffer.get();
for (ULONG size = 0; size < bufSize; ) {
auto info = reinterpret_cast<const SYSTEM_CPU_SET_INFORMATION*>(ptr);
if (info->Type == CpuSetInformation) {
cpuSets.push_back(&info->CpuSet);
}
ptr += info->Size;
size += info->Size;
}
// Count logical and physical CPU cores
size_t numLogicalCores, numPhysicalCores;
{
std::set<BYTE> logicalCores, physicalCores;
for (const auto& info : cpuSets) {
logicalCores.insert(info->LogicalProcessorIndex);
physicalCores.insert(info->CoreIndex);
}
numLogicalCores = logicalCores.size();
numPhysicalCores = physicalCores.size();
}
// Case #1: Single core machines
// Don't change affinity at all, report failure
if (numLogicalCores == 1) {
return false;
}
// Case #2: Single physical core, multiple logical cores
// Assign the first logical core to Xbox, leave the rest to other threads
if (numPhysicalCores == 1 && numLogicalCores != 1) {
m_xboxCPUSet = cpuSets[0]->Id;
cpuSets.erase(cpuSets.begin());
}
// Otherwise: Multiple physical cores
// Assign the first logical and physical core to Xbox, leave the rest of that
// physical core unassigned (if hyperthreading is active), the remaining
// physical cores to other threads
else {
const BYTE physicalCore = cpuSets[0]->CoreIndex;
m_xboxCPUSet = cpuSets[0]->Id;
for (auto it = cpuSets.begin(); it != cpuSets.end(); ) {
if ((*it)->CoreIndex == physicalCore) {
it = cpuSets.erase(it);
} else {
++it;
}
}
}
// Finally, extract the CPU IDs and assign them as a default process group
std::vector<DWORD> cpuIds;
cpuIds.reserve(cpuSets.size());
for (const auto& info : cpuSets) {
cpuIds.push_back(info->Id);
}
return setProcessDefaultCpuSets(process, cpuIds.data(), cpuIds.size()) != FALSE;
}
virtual void SetAffinityXbox(HANDLE thread) const override {
m_setThreadSelectedCpuSets(thread, &m_xboxCPUSet, 1);
}
virtual void SetAffinityOther(HANDLE /*thread*/) const override {
// CPU sets for the process have already been set, so do nothing.
}
private:
ULONG m_xboxCPUSet = 0;
decltype(SetThreadSelectedCpuSets)* m_setThreadSelectedCpuSets = nullptr;
};
// Windows 7/8.1 affinity policy - uses thread affinity to pin threads accordingly
class Win7Policy final : public AffinityPolicy
{
public:
bool Initialize() {
if (!GetProcessAffinityMask(g_CurrentProcessHandle, &CPUXbox, &CPUOthers))
CxbxKrnlCleanupEx(CXBXR_MODULE::INIT, "GetProcessAffinityMask failed.");
// For the other threads, remove one bit from the processor mask:
CPUOthers = ((CPUXbox - 1) & CPUXbox);
// Test if there are any other cores available:
if (CPUOthers == 0) {
// If not, fail the policy
return false;
}
CPUXbox = CPUXbox & (~CPUOthers);
return true;
}
virtual void SetAffinityXbox(HANDLE thread) const override {
SetThreadAffinityMask(thread, CPUXbox);
}
virtual void SetAffinityOther(HANDLE thread) const override {
SetThreadAffinityMask(thread, CPUOthers);
}
private:
DWORD_PTR CPUXbox = 0;
DWORD_PTR CPUOthers = 0;
};
// Empty affinity policy - used on single core host machines and if "All Cores Hack" is enabled
class EmptyPolicy final : public AffinityPolicy
{
public:
virtual void SetAffinityXbox(HANDLE /*thread*/) const override {
}
virtual void SetAffinityOther(HANDLE /*thread*/) const override {
}
};
std::unique_ptr<AffinityPolicy> AffinityPolicy::InitPolicy()
{
std::unique_ptr<AffinityPolicy> result;
if (!g_UseAllCores) {
if (auto win10Policy = std::make_unique<Win10Policy>(); win10Policy->Initialize()) {
result = std::move(win10Policy);
} else if (auto win7Policy = std::make_unique<Win7Policy>(); win7Policy->Initialize()) {
result = std::move(win7Policy);
}
}
if (!result) {
result = std::make_unique<EmptyPolicy>();
}
return result;
}
void AffinityPolicy::SetAffinityXbox() const {
SetAffinityXbox(GetCurrentThread());
}
void AffinityPolicy::SetAffinityOther() const {
SetAffinityOther(GetCurrentThread());
}

View File

@ -25,4 +25,25 @@
#pragma once
#include <memory>
void SetCurrentThreadName(const char* szThreadName);
// A helper class to pin game/other threads to specific CPU cores
// Implemented different depending on the host OS, so exposes itself as an interface
// If "All Cores Hack" is enabled (or the host system is single core), an empty implementation is used
class AffinityPolicy
{
public:
~AffinityPolicy() = default;
virtual void SetAffinityXbox(void* thread) const = 0;
virtual void SetAffinityOther(void* thread) const = 0;
void SetAffinityXbox() const;
void SetAffinityOther() const;
static std::unique_ptr<AffinityPolicy> InitPolicy();
};
extern std::unique_ptr<AffinityPolicy> g_AffinityPolicy;

View File

@ -599,6 +599,7 @@ void CxbxInitWindow(bool bFullInit)
HANDLE hThread = CreateThread(nullptr, 0, EmuUpdateTickCount, nullptr, 0, nullptr);
// We set the priority of this thread a bit higher, to assure reliable timing :
SetThreadPriority(hThread, THREAD_PRIORITY_ABOVE_NORMAL);
g_AffinityPolicy->SetAffinityOther(hThread);
CxbxKrnlRegisterThread(hThread);
CloseHandle(hThread); // CxbxKrnlRegisterThread duplicates the handle so we can close this one
@ -623,7 +624,7 @@ void CxbxInitWindow(bool bFullInit)
EmuShared::Cleanup();
ExitProcess(0);
}
SetThreadAffinityMask(hRenderWindowThread, g_CPUOthers);
g_AffinityPolicy->SetAffinityOther(hRenderWindowThread);
// Wait for the window to create
WaitForSingleObject(hStartEvent, INFINITE);
@ -2092,7 +2093,7 @@ static DWORD WINAPI EmuUpdateTickCount(LPVOID)
CxbxSetThreadName("Cxbx Timing Thread");
// since callbacks come from here
InitXboxThread(g_CPUOthers); // avoid Xbox1 core for lowest possible latency
InitXboxThread();
EmuLog(LOG_LEVEL::DEBUG, "Timing thread is running.");

View File

@ -11,10 +11,6 @@ VertexShaderSource g_VertexShaderSource = VertexShaderSource();
// (And the ResetD3DDevice method should be removed)
ID3DBlob* AsyncCreateVertexShader(IntermediateVertexShader intermediateShader, ShaderKey key) {
// HACK set thread affinity every call to reduce interference with Xbox main thread
// TODO use a thread pool library for better control over workers
SetThreadAffinityMask(GetCurrentThread(), g_CPUOthers);
ID3DBlob* pCompiledShader;
auto hRet = EmuCompileShader(

View File

@ -376,7 +376,7 @@ xbox::void_xt WINAPI xbox::EMUPATCH(DirectSoundDoWork)()
// For Async process purpose only
static void dsound_thread_worker(LPVOID nullPtr)
{
SetThreadAffinityMask(GetCurrentThread(), g_CPUOthers);
g_AffinityPolicy->SetAffinityOther();
while (true) {
// Testcase: Gauntlet Dark Legacy, if Sleep(1) then intro videos start to starved often

View File

@ -142,7 +142,8 @@ xbox::KPCR* WINAPI KeGetPcr()
if (Pcr == nullptr) {
EmuLog(LOG_LEVEL::WARNING, "KeGetPCR returned nullptr: Was this called from a non-xbox thread?");
// Attempt to salvage the situation by calling InitXboxThread to setup KPCR in place
InitXboxThread(g_CPUXbox);
InitXboxThread();
g_AffinityPolicy->SetAffinityXbox();
Pcr = (xbox::PKPCR)__readfsdword(TIB_ArbitraryDataSlot);
}

View File

@ -86,16 +86,6 @@ void InitXboxThread()
_controlfp(_RC_NEAR, _MCW_RC); // Set Rounding control to near (unsure about this)
}
void InitXboxThread(DWORD_PTR cores)
{
InitXboxThread();
if (!g_UseAllCores) {
// Run this thread solely on the indicated core(s) :
SetThreadAffinityMask(GetCurrentThread(), cores);
}
}
// PsCreateSystemThread proxy procedure
// Dxbx Note : The signature of PCSTProxy should conform to System.TThreadFunc !
static unsigned int WINAPI PCSTProxy
@ -272,11 +262,7 @@ XBSYSAPI EXPORTNUM(255) xbox::ntstatus_xt NTAPI xbox::PsCreateSystemThreadEx
if (ThreadId != NULL)
*ThreadId = dwThreadId;
if (!g_UseAllCores) {
// Run this thread solely on the indicated core(s) :
SetThreadAffinityMask(handle, g_CPUXbox);
}
g_AffinityPolicy->SetAffinityXbox(handle);
CxbxKrnlRegisterThread(handle);
// Now that ThreadId is populated and affinity is changed, resume the thread (unless the guest passed CREATE_SUSPENDED)

View File

@ -97,8 +97,6 @@ Xbe* CxbxKrnl_Xbe = NULL;
bool g_bIsChihiro = false;
bool g_bIsDebug = false;
bool g_bIsRetail = false;
DWORD_PTR g_CPUXbox = 0;
DWORD_PTR g_CPUOthers = 0;
// Indicates to disable/enable all interrupts when cli and sti instructions are executed
std::atomic_bool g_bEnableAllInterrupts = true;
@ -377,7 +375,8 @@ static unsigned int WINAPI CxbxKrnlInterruptThread(PVOID param)
CxbxSetThreadName("CxbxKrnl Interrupts");
// Make sure Xbox1 code runs on one core :
InitXboxThread(g_CPUXbox);
InitXboxThread();
g_AffinityPolicy->SetAffinityXbox();
#if 0
InitSoftwareInterrupts();
@ -1437,22 +1436,7 @@ __declspec(noreturn) void CxbxKrnlInit
// Make sure the Xbox1 code runs on one core (as the box itself has only 1 CPU,
// this will better aproximate the environment with regard to multi-threading) :
EmuLogInit(LOG_LEVEL::DEBUG, "Determining CPU affinity.");
{
if (!GetProcessAffinityMask(g_CurrentProcessHandle, &g_CPUXbox, &g_CPUOthers))
CxbxKrnlCleanupEx(LOG_PREFIX_INIT, "GetProcessAffinityMask failed.");
// For the other threads, remove one bit from the processor mask:
g_CPUOthers = ((g_CPUXbox - 1) & g_CPUXbox);
// Test if there are any other cores available :
if (g_CPUOthers > 0) {
// If so, make sure the Xbox threads run on the core NOT running Xbox code :
g_CPUXbox = g_CPUXbox & (~g_CPUOthers);
} else {
// Else the other threads must run on the same core as the Xbox code :
g_CPUOthers = g_CPUXbox;
}
}
g_AffinityPolicy = AffinityPolicy::InitPolicy();
// initialize graphics
EmuLogInit(LOG_LEVEL::DEBUG, "Initializing render window.");
@ -1534,7 +1518,8 @@ __declspec(noreturn) void CxbxKrnlInit
EmuInitFS();
InitXboxThread(g_CPUXbox);
InitXboxThread();
g_AffinityPolicy->SetAffinityXbox();
xbox::ObInitSystem();
xbox::KiInitSystem();
@ -1543,7 +1528,7 @@ __declspec(noreturn) void CxbxKrnlInit
DWORD dwThreadId;
HANDLE hThread = (HANDLE)_beginthreadex(NULL, NULL, CxbxKrnlInterruptThread, NULL, NULL, (unsigned int*)&dwThreadId);
// Start the kernel clock thread
TimerObject* KernelClockThr = Timer_Create(CxbxKrnlClockThread, nullptr, "Kernel clock thread", &g_CPUOthers);
TimerObject* KernelClockThr = Timer_Create(CxbxKrnlClockThread, nullptr, "Kernel clock thread", false);
Timer_Start(KernelClockThr, SCALE_MS_IN_NS);
EmuLogInit(LOG_LEVEL::DEBUG, "Calling XBE entry point...");

View File

@ -204,7 +204,7 @@ extern ULONG g_CxbxFatalErrorCode;
extern size_t g_SystemMaxMemory;
void InitXboxThread(DWORD_PTR cores);
void InitXboxThread();
/*! thread local storage structure */
extern Xbe::TLS *CxbxKrnl_TLS;

View File

@ -74,9 +74,6 @@ extern HWND g_hEmuWindow;
extern PVOID g_pfnThreadNotification[16];
extern int g_iThreadNotificationCount;
extern DWORD_PTR g_CPUXbox;
extern DWORD_PTR g_CPUOthers;
extern HANDLE g_CurrentProcessHandle; // Set in CxbxKrnlMain
// Delta added to host SystemTime, used in KiClockIsr and KeSetSystemTime

View File

@ -478,7 +478,7 @@ void EmuNVNet_Write(xbox::addr_xt addr, uint32_t value, int size)
std::thread NVNetRecvThread;
static void NVNetRecvThreadProc(NvNetState_t *s)
{
SetThreadAffinityMask(GetCurrentThread(), g_CPUOthers);
g_AffinityPolicy->SetAffinityOther();
uint8_t packet[65536];
while (true) {
int size = g_NVNet->PCAPReceive(packet, 65536);

View File

@ -877,7 +877,7 @@ void OHCI::OHCI_StateReset()
void OHCI::OHCI_BusStart()
{
// Create the EOF timer.
m_pEOFtimer = Timer_Create(OHCI_FrameBoundaryWrapper, this, "", nullptr);
m_pEOFtimer = Timer_Create(OHCI_FrameBoundaryWrapper, this, "", false);
EmuLog(LOG_LEVEL::DEBUG, "Operational event");

View File

@ -218,7 +218,7 @@ static void pfifo_run_puller(NV2AState *d)
int pfifo_puller_thread(NV2AState *d)
{
SetThreadAffinityMask(GetCurrentThread(), g_CPUOthers);
g_AffinityPolicy->SetAffinityOther();
CxbxSetThreadName("Cxbx NV2A FIFO puller");
glo_set_current(d->pgraph.gl_context);
@ -453,7 +453,7 @@ static void pfifo_run_pusher(NV2AState *d)
int pfifo_pusher_thread(NV2AState *d)
{
SetThreadAffinityMask(GetCurrentThread(), g_CPUOthers);
g_AffinityPolicy->SetAffinityOther();
CxbxSetThreadName("Cxbx NV2A FIFO pusher");
qemu_mutex_lock(&d->pfifo.pfifo_lock);

View File

@ -54,6 +54,7 @@
#include "core\kernel\support\Emu.h"
#include "core\kernel\exports\EmuKrnl.h"
#include "core\hle\Intercept.hpp"
#include "common/win32/Threads.h"
#include "Logging.h"
#include "vga.h"
@ -1099,7 +1100,7 @@ void NV2ADevice::UpdateHostDisplay(NV2AState *d)
// TODO: Fix this properly
static void nv2a_vblank_thread(NV2AState *d)
{
SetThreadAffinityMask(GetCurrentThread(), g_CPUOthers);
g_AffinityPolicy->SetAffinityOther();
CxbxSetThreadName("Cxbx NV2A VBLANK");
auto nextVBlankTime = GetNextVBlankTime();