From 76b4318b88cb952750da158809e68274adbd9493 Mon Sep 17 00:00:00 2001 From: Shawn Hoffman Date: Mon, 18 Jul 2022 21:45:27 -0700 Subject: [PATCH] CPUDetect: improve win/arm64 support read brand_string on macos/arm64 remove unused flags report family/model info instead of vendor name --- Source/Core/Common/ArmCPUDetect.cpp | 314 ++++++++++++++---- Source/Core/Common/CPUDetect.h | 38 +-- Source/Core/Common/StringUtil.cpp | 7 + Source/Core/Common/StringUtil.h | 2 + Source/Core/Common/x64CPUDetect.cpp | 252 +++++++------- .../PowerPC/Jit64Common/Jit64AsmCommon.cpp | 2 +- Source/Core/VideoCommon/VertexLoaderX64.cpp | 6 +- Source/Core/VideoCommon/VideoConfig.cpp | 4 +- Source/UnitTests/Common/x64EmitterTest.cpp | 26 +- 9 files changed, 422 insertions(+), 229 deletions(-) diff --git a/Source/Core/Common/ArmCPUDetect.cpp b/Source/Core/Common/ArmCPUDetect.cpp index a12deac71a..7fe2d14220 100644 --- a/Source/Core/Common/ArmCPUDetect.cpp +++ b/Source/Core/Common/ArmCPUDetect.cpp @@ -9,45 +9,209 @@ #include #include -#if !defined(_WIN32) && !defined(__APPLE__) +#ifdef __APPLE__ +#include +#elif defined(_WIN32) +#include +#include +#else #ifndef __FreeBSD__ #include #endif #include -#include #endif #include #include "Common/CommonTypes.h" #include "Common/FileUtil.h" +#include "Common/StringUtil.h" -#ifndef WIN32 +#if defined(__APPLE__) || defined(__FreeBSD__) -const char procfile[] = "/proc/cpuinfo"; - -static std::string GetCPUString() +static bool SysctlByName(std::string* value, const std::string& name) { - const std::string marker = "Hardware\t: "; - std::string cpu_string = "Unknown"; + size_t value_len = 0; + if (sysctlbyname(name.c_str(), nullptr, &value_len, nullptr, 0)) + return false; + value->resize(value_len); + if (sysctlbyname(name.c_str(), value->data(), &value_len, nullptr, 0)) + return false; + + TruncateToCString(value); + return true; +} + +#endif + +#if defined(_WIN32) + +static constexpr char SUBKEY_CORE0[] = R"(HARDWARE\DESCRIPTION\System\CentralProcessor\0)"; + +// Identifier: human-readable version of CPUID +// ProcessorNameString: marketing name of the processor +// VendorIdentifier: vendor company name +// There are some other maybe-interesting values nearby, BIOS info etc. +static bool ReadProcessorString(std::string* value, const std::string& name) +{ + const DWORD flags = RRF_RT_REG_SZ | RRF_NOEXPAND; + DWORD value_len = 0; + auto status = RegGetValueA(HKEY_LOCAL_MACHINE, SUBKEY_CORE0, name.c_str(), flags, nullptr, + nullptr, &value_len); + if (status != ERROR_SUCCESS && status != ERROR_MORE_DATA) + return false; + + value->resize(value_len); + status = RegGetValueA(HKEY_LOCAL_MACHINE, SUBKEY_CORE0, name.c_str(), flags, nullptr, + value->data(), &value_len); + if (status != ERROR_SUCCESS) + { + value->clear(); + return false; + } + + TruncateToCString(value); + return true; +} + +// Read cached register values from the registry +static bool ReadPrivilegedCPReg(u64* value, u32 reg) +{ + DWORD value_len = sizeof(*value); + // Not sure if the value name is padded or not + return RegGetValueA(HKEY_LOCAL_MACHINE, SUBKEY_CORE0, fmt::format("CP {:x}", reg).c_str(), + RRF_RT_REG_QWORD, nullptr, value, &value_len) == ERROR_SUCCESS; +} + +static bool Read_MIDR_EL1(u64* value) +{ + return ReadPrivilegedCPReg(value, ARM64_SYSREG(0b11, 0, 0, 0b0000, 0)); +} + +static bool Read_ID_AA64ISAR0_EL1(u64* value) +{ + return ReadPrivilegedCPReg(value, ARM64_SYSREG(0b11, 0, 0, 0b0110, 0)); +} + +static bool Read_ID_AA64MMFR1_EL1(u64* value) +{ + return ReadPrivilegedCPReg(value, ARM64_SYSREG(0b11, 0, 0, 0b0111, 1)); +} + +#endif + +#if defined(__linux__) + +static bool ReadDeviceTree(std::string* value, const std::string& name) +{ + const std::string path = std::string("/proc/device-tree/") + name; + std::ifstream file; + File::OpenFStream(file, path.c_str(), std::ios_base::in); + if (!file) + return false; + + file >> *value; + return true; +} + +static std::string ReadCpuinfoField(const std::string& field) +{ std::string line; std::ifstream file; - File::OpenFStream(file, procfile, std::ios_base::in); - + File::OpenFStream(file, "/proc/cpuinfo", std::ios_base::in); if (!file) - return cpu_string; + return {}; while (std::getline(file, line)) { - if (line.find(marker) != std::string::npos) - { - cpu_string = line.substr(marker.length()); - break; - } + if (!StringBeginsWith(line, field)) + continue; + auto non_tab = line.find_first_not_of("\t", field.length()); + if (non_tab == line.npos) + continue; + if (line[non_tab] != ':') + continue; + auto value_start = line.find_first_not_of(" ", non_tab + 1); + if (value_start == line.npos) + continue; + return line.substr(value_start); } + return {}; +} - return cpu_string; +static bool Read_MIDR_EL1_Sysfs(u64* value) +{ + std::ifstream file; + File::OpenFStream(file, "/sys/devices/system/cpu/cpu0/regs/identification/midr_el1", + std::ios_base::in); + if (!file) + return false; + + file >> std::hex >> *value; + return true; +} + +#endif + +#if defined(__linux__) || defined(__FreeBSD__) + +static u32 ReadHwCap(u32 type) +{ +#if defined(__linux__) + return getauxval(type); +#elif defined(__FreeBSD__) + u_long hwcap = 0; + elf_aux_info(type, &hwcap, sizeof(hwcap)); + return hwcap; +#endif +} + +// For "Direct" reads, value gets filled via emulation, hence: +// "there is no guarantee that the value reflects the processor that it is currently executing on" +// On big.LITTLE systems, the value may be unrelated to the core this is invoked on, and unless +// other measures are taken, executing the instruction may cause the caller to be switched onto a +// different core when it resumes (and of course, caller could be preempted at any other time as +// well). +static inline u64 Read_MIDR_EL1_Direct() +{ + u64 value; + __asm__ __volatile__("mrs %0, MIDR_EL1" : "=r"(value)); + return value; +} + +static bool Read_MIDR_EL1(u64* value) +{ +#ifdef __linux__ + if (Read_MIDR_EL1_Sysfs(value)) + return true; +#endif + + bool id_reg_user_access = ReadHwCap(AT_HWCAP) & HWCAP_CPUID; +#ifdef __FreeBSD__ + // FreeBSD kernel has support but doesn't seem to indicate it? + // see user_mrs_handler + id_reg_user_access = true; +#endif + if (!id_reg_user_access) + return false; + *value = Read_MIDR_EL1_Direct(); + return true; +} + +#endif + +#ifndef __APPLE__ + +static std::string MIDRToString(u64 midr) +{ + u8 implementer = (midr >> 24) & 0xff; + u8 variant = (midr >> 20) & 0xf; + u8 arch = (midr >> 16) & 0xf; + u16 part_num = (midr >> 4) & 0xfff; + u8 revision = midr & 0xf; + return fmt::format("{:02X}:{:X}:{:04b}:{:03X}:{:X}", implementer, variant, arch, part_num, + revision); } #endif @@ -59,82 +223,100 @@ CPUInfo::CPUInfo() Detect(); } -// Detects the various CPU features void CPUInfo::Detect() { - // Set some defaults here - HTT = false; - OS64bit = true; - CPU64bit = true; - Mode64bit = true; vendor = CPUVendor::ARM; bFMA = true; bFlushToZero = true; - bAFP = false; + + num_cores = std::max(static_cast(std::thread::hardware_concurrency()), 1); #ifdef __APPLE__ - num_cores = std::thread::hardware_concurrency(); + SysctlByName(&model_name, "machdep.cpu.brand_string"); // M-series CPUs have all of these - bFP = true; - bASIMD = true; + // Apparently the world has accepted that these can be assumed supported "for all time". + // see https://github.com/golang/go/issues/42747 bAES = true; bSHA1 = true; bSHA2 = true; bCRC32 = true; #elif defined(_WIN32) - num_cores = std::thread::hardware_concurrency(); + // NOTE All this info is from cpu core 0 only. - // Windows does not provide any mechanism for querying the system registers on ARMv8, unlike Linux - // which traps the register reads and emulates them in the kernel. There are environment variables - // containing some of the CPU-specific values, which we could use for a lookup table in the - // future. For now, assume all features are present as all known devices which are Windows-on-ARM - // compatible also support these extensions. - bFP = true; - bASIMD = true; - bAES = true; - bCRC32 = true; - bSHA1 = true; - bSHA2 = true; -#else - // Get the information about the CPU - num_cores = sysconf(_SC_NPROCESSORS_CONF); - strncpy(cpu_string, GetCPUString().c_str(), sizeof(cpu_string)); + ReadProcessorString(&model_name, "ProcessorNameString"); -#ifdef __FreeBSD__ - u_long hwcaps = 0; - elf_aux_info(AT_HWCAP, &hwcaps, sizeof(u_long)); + u64 reg = 0; + // Attempt to be forward-compatible: perform inverted check against disabled feature states. + if (Read_ID_AA64ISAR0_EL1(®)) + { + bAES = ((reg >> 4) & 0xf) != 0; + bSHA1 = ((reg >> 8) & 0xf) != 0; + bSHA2 = ((reg >> 12) & 0xf) != 0; + bCRC32 = ((reg >> 16) & 0xf) != 0; + } + if (Read_ID_AA64MMFR1_EL1(®)) + { + // Introduced in Armv8.7, where AFP must be supported if AdvSIMD and FP both are. + bAFP = ((reg >> 44) & 0xf) != 0; + } + // Pre-decoded MIDR_EL1 could be read with ReadProcessorString(.., "Identifier"), + // but we want format to match across all platforms where possible. + if (Read_MIDR_EL1(®)) + { + cpu_id = MIDRToString(reg); + } #else - unsigned long hwcaps = getauxval(AT_HWCAP); + // Linux, Android, and FreeBSD + +#if defined(__FreeBSD__) + SysctlByName(&model_name, "hw.model"); +#elif defined(__linux__) + if (!ReadDeviceTree(&model_name, "model")) + { + // This doesn't seem to work on modern arm64 kernels + model_name = ReadCpuinfoField("Hardware"); + } #endif - bFP = hwcaps & HWCAP_FP; - bASIMD = hwcaps & HWCAP_ASIMD; - bAES = hwcaps & HWCAP_AES; - bCRC32 = hwcaps & HWCAP_CRC32; - bSHA1 = hwcaps & HWCAP_SHA1; - bSHA2 = hwcaps & HWCAP_SHA2; + + const u32 hwcap = ReadHwCap(AT_HWCAP); + bAES = hwcap & HWCAP_AES; + bCRC32 = hwcap & HWCAP_CRC32; + bSHA1 = hwcap & HWCAP_SHA1; + bSHA2 = hwcap & HWCAP_SHA2; + +#if defined(AT_HWCAP2) && defined(HWCAP2_AFP) + const u32 hwcap2 = ReadHwCap(AT_HWCAP2); + bAFP = hwcap2 & HWCAP2_AFP; #endif + + u64 midr = 0; + if (Read_MIDR_EL1(&midr)) + { + cpu_id = MIDRToString(midr); + } +#endif + + model_name = ReplaceAll(model_name, ",", "_"); + cpu_id = ReplaceAll(cpu_id, ",", "_"); } -// Turn the CPU info into a string we can show std::string CPUInfo::Summarize() { - std::string sum; - if (num_cores == 1) - sum = fmt::format("{}, 1 core", cpu_string); - else - sum = fmt::format("{}, {} cores", cpu_string, num_cores); + std::vector sum; + sum.push_back(model_name); + sum.push_back(cpu_id); + if (bAFP) + sum.push_back("AFP"); if (bAES) - sum += ", AES"; + sum.push_back("AES"); if (bCRC32) - sum += ", CRC32"; + sum.push_back("CRC32"); if (bSHA1) - sum += ", SHA1"; + sum.push_back("SHA1"); if (bSHA2) - sum += ", SHA2"; - if (CPU64bit) - sum += ", 64-bit"; + sum.push_back("SHA2"); - return sum; + return JoinStrings(sum, ","); } diff --git a/Source/Core/Common/CPUDetect.h b/Source/Core/Common/CPUDetect.h index db377a2006..5a4fac2364 100644 --- a/Source/Core/Common/CPUDetect.h +++ b/Source/Core/Common/CPUDetect.h @@ -16,62 +16,52 @@ enum class CPUVendor struct CPUInfo { - CPUVendor vendor = CPUVendor::Intel; + CPUVendor vendor = CPUVendor::Other; - char cpu_string[0x41] = {}; - char brand_string[0x21] = {}; - bool OS64bit = false; - bool CPU64bit = false; - bool Mode64bit = false; + std::string cpu_id; + std::string model_name; bool HTT = false; int num_cores = 0; - bool bSSE = false; - bool bSSE2 = false; bool bSSE3 = false; bool bSSSE3 = false; - bool bPOPCNT = false; bool bSSE4_1 = false; bool bSSE4_2 = false; bool bLZCNT = false; - bool bSSE4A = false; bool bAVX = false; - bool bAVX2 = false; bool bBMI1 = false; bool bBMI2 = false; - // PDEP and PEXT are ridiculously slow on AMD Zen1, Zen1+ and Zen2 (Family 23) - bool bFastBMI2 = false; + // PDEP and PEXT are ridiculously slow on AMD Zen1, Zen1+ and Zen2 (Family 17h) + bool bBMI2FastParallelBitOps = false; bool bFMA = false; bool bFMA4 = false; bool bAES = false; - // FXSAVE/FXRSTOR - bool bFXSR = false; bool bMOVBE = false; // This flag indicates that the hardware supports some mode // in which denormal inputs _and_ outputs are automatically set to (signed) zero. bool bFlushToZero = false; - bool bLAHFSAHF64 = false; - bool bLongMode = false; bool bAtom = false; - bool bZen1p2 = false; - - // ARMv8 specific - bool bFP = false; - bool bASIMD = false; bool bCRC32 = false; bool bSHA1 = false; bool bSHA2 = false; + + // ARMv8 specific bool bAFP = false; // Alternate floating-point behavior // Call Detect() explicit CPUInfo(); - // Turn the CPU info into a string we can show + // The returned string consists of ",," + // Where: + // model_name and cpud_id may be zero-length + // model_name is human-readable marketing name + // cpu_id is ':'-delimited string of id info + // flags are optionally included if the related feature is supported and reporting its enablement + // seems useful to report std::string Summarize(); private: - // Detects the various CPU features void Detect(); }; diff --git a/Source/Core/Common/StringUtil.cpp b/Source/Core/Common/StringUtil.cpp index c498e261fd..2ab4601439 100644 --- a/Source/Core/Common/StringUtil.cpp +++ b/Source/Core/Common/StringUtil.cpp @@ -248,6 +248,13 @@ void ReplaceBreaksWithSpaces(std::string& str) std::replace(str.begin(), str.end(), '\n', ' '); } +void TruncateToCString(std::string* s) +{ + const size_t terminator = s->find_first_of('\0'); + if (terminator != s->npos) + s->resize(terminator); +} + bool TryParse(const std::string& str, bool* const output) { float value; diff --git a/Source/Core/Common/StringUtil.h b/Source/Core/Common/StringUtil.h index 5db7cbfa37..e99e36dab1 100644 --- a/Source/Core/Common/StringUtil.h +++ b/Source/Core/Common/StringUtil.h @@ -53,6 +53,8 @@ std::string ReplaceAll(std::string result, std::string_view src, std::string_vie void ReplaceBreaksWithSpaces(std::string& str); +void TruncateToCString(std::string* s); + bool TryParse(const std::string& str, bool* output); template || std::is_enum_v>* = nullptr> diff --git a/Source/Core/Common/x64CPUDetect.cpp b/Source/Core/Common/x64CPUDetect.cpp index 5f90246302..dedf02c05e 100644 --- a/Source/Core/Common/x64CPUDetect.cpp +++ b/Source/Core/Common/x64CPUDetect.cpp @@ -11,9 +11,12 @@ #include #include +#include + #include "Common/CommonTypes.h" #include "Common/Intrinsics.h" #include "Common/MsgHandler.h" +#include "Common/StringUtil.h" #ifndef _WIN32 @@ -38,20 +41,23 @@ static inline void __cpuidex(int info[4], int function_id, int subfunction_id) #endif } -static inline void __cpuid(int info[4], int function_id) +constexpr u32 XCR_XFEATURE_ENABLED_MASK = 0; + +static u64 xgetbv(u32 index) { - return __cpuidex(info, function_id, 0); + u32 eax, edx; + __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index)); + return ((u64)edx << 32) | eax; } -#endif // ifndef _WIN32 +#else -#ifdef _WIN32 +constexpr u32 XCR_XFEATURE_ENABLED_MASK = _XCR_XFEATURE_ENABLED_MASK; static u64 xgetbv(u32 index) { return _xgetbv(index); } -constexpr u32 XCR_XFEATURE_ENABLED_MASK = _XCR_XFEATURE_ENABLED_MASK; static void WarnIfRunningUnderEmulation() { @@ -76,17 +82,21 @@ static void WarnIfRunningUnderEmulation() "Please run the ARM64 build of Dolphin for a better experience."); } -#else - -static u64 xgetbv(u32 index) -{ - u32 eax, edx; - __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index)); - return ((u64)edx << 32) | eax; -} -constexpr u32 XCR_XFEATURE_ENABLED_MASK = 0; #endif // ifdef _WIN32 +struct CPUIDResult +{ + u32 eax{}, ebx{}, ecx{}, edx{}; +}; +static_assert(sizeof(CPUIDResult) == sizeof(u32) * 4); + +static inline CPUIDResult cpuid(int function_id, int subfunction_id = 0) +{ + CPUIDResult info; + __cpuidex((int*)&info, function_id, subfunction_id); + return info; +} + CPUInfo cpu_info; CPUInfo::CPUInfo() @@ -94,196 +104,174 @@ CPUInfo::CPUInfo() Detect(); } -// Detects the various CPU features void CPUInfo::Detect() { #ifdef _WIN32 WarnIfRunningUnderEmulation(); #endif -#ifdef _M_X86_64 - Mode64bit = true; - OS64bit = true; -#endif - num_cores = 1; - - // Set obvious defaults, for extra safety - if (Mode64bit) - { - bSSE = true; - bSSE2 = true; - bLongMode = true; - } + // This should be much more reliable and easier than trying to get the number of cores out of the + // CPUID data ourselves. + num_cores = std::max(static_cast(std::thread::hardware_concurrency()), 1); // Assume CPU supports the CPUID instruction. Those that don't can barely - // boot modern OS:es anyway. - int cpu_id[4]; + // boot modern OS anyway. - // Detect CPU's CPUID capabilities, and grab CPU string - __cpuid(cpu_id, 0x00000000); - u32 max_std_fn = cpu_id[0]; // EAX - std::memcpy(&brand_string[0], &cpu_id[1], sizeof(int)); - std::memcpy(&brand_string[4], &cpu_id[3], sizeof(int)); - std::memcpy(&brand_string[8], &cpu_id[2], sizeof(int)); - __cpuid(cpu_id, 0x80000000); - u32 max_ex_fn = cpu_id[0]; - if (!strcmp(brand_string, "GenuineIntel")) + // Detect CPU's CPUID capabilities and grab vendor string. + auto info = cpuid(0); + const u32 func_id_max = info.eax; + + std::string vendor_id; + vendor_id.resize(sizeof(u32) * 3); + std::memcpy(&vendor_id[0], &info.ebx, sizeof(u32)); + std::memcpy(&vendor_id[4], &info.edx, sizeof(u32)); + std::memcpy(&vendor_id[8], &info.ecx, sizeof(u32)); + TruncateToCString(&vendor_id); + if (vendor_id == "GenuineIntel") vendor = CPUVendor::Intel; - else if (!strcmp(brand_string, "AuthenticAMD")) + else if (vendor_id == "AuthenticAMD") vendor = CPUVendor::AMD; else vendor = CPUVendor::Other; - // Set reasonable default brand string even if brand string not available. - strcpy(cpu_string, brand_string); - // Detect family and other misc stuff. - bool ht = false; - HTT = ht; - if (max_std_fn >= 1) + bool is_amd_family_17 = false; + bool has_sse = false; + if (func_id_max >= 1) { - __cpuid(cpu_id, 0x00000001); - int family = ((cpu_id[0] >> 8) & 0xf) + ((cpu_id[0] >> 20) & 0xff); - int model = ((cpu_id[0] >> 4) & 0xf) + ((cpu_id[0] >> 12) & 0xf0); + info = cpuid(1); + const u32 version = info.eax; + const u32 family = ((version >> 8) & 0xf) + ((version >> 20) & 0xff); + const u32 model = ((version >> 4) & 0xf) + ((version >> 12) & 0xf0); + const u32 stepping = version & 0xf; + + cpu_id = fmt::format("{:02X}:{:02X}:{:X}", family, model, stepping); + // Detect people unfortunate enough to be running Dolphin on an Atom - if (family == 6 && + if (vendor == CPUVendor::Intel && family == 6 && (model == 0x1C || model == 0x26 || model == 0x27 || model == 0x35 || model == 0x36 || model == 0x37 || model == 0x4A || model == 0x4D || model == 0x5A || model == 0x5D)) bAtom = true; + // Detect AMD Zen1, Zen1+ and Zen2 - if (family == 23) - bZen1p2 = true; - ht = (cpu_id[3] >> 28) & 1; + if (vendor == CPUVendor::AMD && family == 0x17) + is_amd_family_17 = true; // AMD CPUs before Zen faked this flag and didn't actually // implement simultaneous multithreading (SMT; Intel calls it HTT) // but rather some weird middle-ground between 1-2 cores - HTT = ht && (vendor == CPUVendor::Intel || family >= 23); + const bool ht = (info.edx >> 28) & 1; + HTT = ht && (vendor == CPUVendor::Intel || (vendor == CPUVendor::AMD && family >= 0x17)); - if ((cpu_id[3] >> 25) & 1) - bSSE = true; - if ((cpu_id[3] >> 26) & 1) - bSSE2 = true; - if ((cpu_id[2]) & 1) + if ((info.edx >> 25) & 1) + has_sse = true; + if (info.ecx & 1) bSSE3 = true; - if ((cpu_id[2] >> 9) & 1) + if ((info.ecx >> 9) & 1) bSSSE3 = true; - if ((cpu_id[2] >> 19) & 1) + if ((info.ecx >> 19) & 1) bSSE4_1 = true; - if ((cpu_id[2] >> 20) & 1) + if ((info.ecx >> 20) & 1) bSSE4_2 = true; - if ((cpu_id[2] >> 22) & 1) + if ((info.ecx >> 22) & 1) bMOVBE = true; - if ((cpu_id[2] >> 25) & 1) + if ((info.ecx >> 25) & 1) bAES = true; - if ((cpu_id[3] >> 24) & 1) - { - // We can use FXSAVE. - bFXSR = true; - } - // AVX support requires 3 separate checks: // - Is the AVX bit set in CPUID? // - Is the XSAVE bit set in CPUID? // - XGETBV result has the XCR bit set. - if (((cpu_id[2] >> 28) & 1) && ((cpu_id[2] >> 27) & 1)) + if (((info.ecx >> 28) & 1) && ((info.ecx >> 27) & 1)) { - if ((xgetbv(XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6) + // Check that XSAVE can be used for SSE and AVX + if ((xgetbv(XCR_XFEATURE_ENABLED_MASK) & 0b110) == 0b110) { bAVX = true; - if ((cpu_id[2] >> 12) & 1) + if ((info.ecx >> 12) & 1) bFMA = true; } } - if (max_std_fn >= 7) + if (func_id_max >= 7) { - __cpuidex(cpu_id, 0x00000007, 0x00000000); - // careful; we can't enable AVX2 unless the XSAVE/XGETBV checks above passed - if ((cpu_id[1] >> 5) & 1) - bAVX2 = bAVX; - if ((cpu_id[1] >> 3) & 1) + info = cpuid(7); + if ((info.ebx >> 3) & 1) bBMI1 = true; - if ((cpu_id[1] >> 8) & 1) + if ((info.ebx >> 8) & 1) bBMI2 = true; + if ((info.ebx >> 29) & 1) + bSHA1 = bSHA2 = true; } } - bFlushToZero = bSSE; - bFastBMI2 = bBMI2 && !bZen1p2; - - if (max_ex_fn >= 0x80000004) + info = cpuid(0x80000000); + const u32 ext_func_id_max = info.eax; + if (ext_func_id_max >= 0x80000004) { // Extract CPU model string - __cpuid(cpu_id, 0x80000002); - memcpy(cpu_string, cpu_id, sizeof(cpu_id)); - __cpuid(cpu_id, 0x80000003); - memcpy(cpu_string + 16, cpu_id, sizeof(cpu_id)); - __cpuid(cpu_id, 0x80000004); - memcpy(cpu_string + 32, cpu_id, sizeof(cpu_id)); + model_name.resize(sizeof(info) * 3); + for (u32 i = 0; i < 3; i++) + { + info = cpuid(0x80000002 + i); + memcpy(&model_name[sizeof(info) * i], &info, sizeof(info)); + } + TruncateToCString(&model_name); + model_name = StripSpaces(model_name); } - if (max_ex_fn >= 0x80000001) + if (ext_func_id_max >= 0x80000001) { // Check for more features. - __cpuid(cpu_id, 0x80000001); - if (cpu_id[2] & 1) - bLAHFSAHF64 = true; - if ((cpu_id[2] >> 5) & 1) + info = cpuid(0x80000001); + if ((info.ecx >> 5) & 1) bLZCNT = true; - if ((cpu_id[2] >> 16) & 1) + if ((info.ecx >> 16) & 1) bFMA4 = true; - if ((cpu_id[3] >> 29) & 1) - bLongMode = true; } - // this should be much more reliable and easier - // than trying to get the number of cores out of the CPUID data - // ourselves - num_cores = std::max(std::thread::hardware_concurrency(), 1u); + // Computed flags + bFlushToZero = has_sse; + bBMI2FastParallelBitOps = bBMI2 && !is_amd_family_17; + bCRC32 = bSSE4_2; + + model_name = ReplaceAll(model_name, ",", "_"); + cpu_id = ReplaceAll(cpu_id, ",", "_"); } -// Turn the CPU info into a string we can show std::string CPUInfo::Summarize() { - std::string sum(cpu_string); - sum += " ("; - sum += brand_string; - sum += ")"; + std::vector sum; + sum.push_back(model_name); + sum.push_back(cpu_id); - if (bSSE) - sum += ", SSE"; - if (bSSE2) - { - sum += ", SSE2"; - if (!bFlushToZero) - sum += " (but not DAZ!)"; - } if (bSSE3) - sum += ", SSE3"; + sum.push_back("SSE3"); if (bSSSE3) - sum += ", SSSE3"; + sum.push_back("SSSE3"); if (bSSE4_1) - sum += ", SSE4.1"; + sum.push_back("SSE4.1"); if (bSSE4_2) - sum += ", SSE4.2"; + sum.push_back("SSE4.2"); if (HTT) - sum += ", HTT"; + sum.push_back("HTT"); if (bAVX) - sum += ", AVX"; - if (bAVX2) - sum += ", AVX2"; + sum.push_back("AVX"); if (bBMI1) - sum += ", BMI1"; + sum.push_back("BMI1"); if (bBMI2) - sum += ", BMI2"; + sum.push_back("BMI2"); if (bFMA) - sum += ", FMA"; - if (bAES) - sum += ", AES"; + sum.push_back("FMA"); if (bMOVBE) - sum += ", MOVBE"; - if (bLongMode) - sum += ", 64-bit support"; - return sum; + sum.push_back("MOVBE"); + if (bAES) + sum.push_back("AES"); + if (bCRC32) + sum.push_back("CRC32"); + if (bSHA1) + sum.push_back("SHA1"); + if (bSHA2) + sum.push_back("SHA2"); + + return JoinStrings(sum, ","); } diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp index 124218fb7f..1269b0ec23 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp @@ -65,7 +65,7 @@ void CommonAsmRoutines::GenConvertDoubleToSingle() // Don't Denormalize - if (cpu_info.bFastBMI2) + if (cpu_info.bBMI2FastParallelBitOps) { // Extract bits 0-1 and 5-34 MOV(64, R(RSCRATCH), Imm64(0xc7ffffffe0000000)); diff --git a/Source/Core/VideoCommon/VertexLoaderX64.cpp b/Source/Core/VideoCommon/VertexLoaderX64.cpp index 63752429aa..27e24e6f28 100644 --- a/Source/Core/VideoCommon/VertexLoaderX64.cpp +++ b/Source/Core/VideoCommon/VertexLoaderX64.cpp @@ -299,7 +299,7 @@ void VertexLoaderX64::ReadColor(OpArg data, VertexComponentFormat attribute, Col // RRRRRGGG GGGBBBBB // AAAAAAAA BBBBBBBB GGGGGGGG RRRRRRRR LoadAndSwap(16, scratch1, data); - if (cpu_info.bBMI1 && cpu_info.bFastBMI2) + if (cpu_info.bBMI1 && cpu_info.bBMI2FastParallelBitOps) { MOV(32, R(scratch2), Imm32(0x07C3F7C0)); PDEP(32, scratch3, scratch1, R(scratch2)); @@ -339,7 +339,7 @@ void VertexLoaderX64::ReadColor(OpArg data, VertexComponentFormat attribute, Col // RRRRGGGG BBBBAAAA // AAAAAAAA BBBBBBBB GGGGGGGG RRRRRRRR LoadAndSwap(16, scratch1, data); - if (cpu_info.bFastBMI2) + if (cpu_info.bBMI2FastParallelBitOps) { MOV(32, R(scratch2), Imm32(0x0F0F0F0F)); PDEP(32, scratch1, scratch1, R(scratch2)); @@ -368,7 +368,7 @@ void VertexLoaderX64::ReadColor(OpArg data, VertexComponentFormat attribute, Col // AAAAAAAA BBBBBBBB GGGGGGGG RRRRRRRR data.AddMemOffset(-1); // subtract one from address so we can use a 32bit load and bswap LoadAndSwap(32, scratch1, data); - if (cpu_info.bFastBMI2) + if (cpu_info.bBMI2FastParallelBitOps) { MOV(32, R(scratch2), Imm32(0xFCFCFCFC)); PDEP(32, scratch1, scratch1, R(scratch2)); diff --git a/Source/Core/VideoCommon/VideoConfig.cpp b/Source/Core/VideoCommon/VideoConfig.cpp index c1ff163f9c..e13e8d96ac 100644 --- a/Source/Core/VideoCommon/VideoConfig.cpp +++ b/Source/Core/VideoCommon/VideoConfig.cpp @@ -179,8 +179,8 @@ bool VideoConfig::UsingUberShaders() const static u32 GetNumAutoShaderCompilerThreads() { - // Automatic number. We use clamp(cpus - 3, 1, 4). - return static_cast(std::min(std::max(cpu_info.num_cores - 3, 1), 4)); + // Automatic number. + return static_cast(std::clamp(cpu_info.num_cores - 3, 1, 4)); } static u32 GetNumAutoShaderPreCompilerThreads() diff --git a/Source/UnitTests/Common/x64EmitterTest.cpp b/Source/UnitTests/Common/x64EmitterTest.cpp index 439ac5e9d6..79e5eea8a2 100644 --- a/Source/UnitTests/Common/x64EmitterTest.cpp +++ b/Source/UnitTests/Common/x64EmitterTest.cpp @@ -92,7 +92,31 @@ class x64EmitterTest : public testing::Test protected: void SetUp() override { - memset(&cpu_info, 0x01, sizeof(cpu_info)); + // Ensure settings are constant no matter on which actual hardware the test runs. + // Attempt to maximize complex code coverage. Note that this will miss some paths. + cpu_info.vendor = CPUVendor::Intel; + cpu_info.cpu_id = "GenuineIntel"; + cpu_info.model_name = "Unknown"; + cpu_info.HTT = true; + cpu_info.num_cores = 8; + cpu_info.bSSE3 = true; + cpu_info.bSSSE3 = true; + cpu_info.bSSE4_1 = true; + cpu_info.bSSE4_2 = true; + cpu_info.bLZCNT = true; + cpu_info.bAVX = true; + cpu_info.bBMI1 = true; + cpu_info.bBMI2 = true; + cpu_info.bBMI2FastParallelBitOps = true; + cpu_info.bFMA = true; + cpu_info.bFMA4 = true; + cpu_info.bAES = true; + cpu_info.bMOVBE = true; + cpu_info.bFlushToZero = true; + cpu_info.bAtom = false; + cpu_info.bCRC32 = true; + cpu_info.bSHA1 = true; + cpu_info.bSHA2 = true; emitter.reset(new X64CodeBlock()); emitter->AllocCodeSpace(4096);