x86emitter: Purge x86caps

We can use cpuinfo for querying AVX/AVX2.
This commit is contained in:
Stenzek 2023-12-26 18:13:37 +10:00 committed by Connor McLaughlin
parent b121e5af25
commit f461bc9176
13 changed files with 33 additions and 464 deletions

View File

@ -37,7 +37,6 @@ target_sources(common PRIVATE
WindowInfo.cpp WindowInfo.cpp
emitter/avx.cpp emitter/avx.cpp
emitter/bmi.cpp emitter/bmi.cpp
emitter/cpudetect.cpp
emitter/fpu.cpp emitter/fpu.cpp
emitter/groups.cpp emitter/groups.cpp
emitter/jmp.cpp emitter/jmp.cpp
@ -45,8 +44,6 @@ target_sources(common PRIVATE
emitter/legacy_sse.cpp emitter/legacy_sse.cpp
emitter/movs.cpp emitter/movs.cpp
emitter/simd.cpp emitter/simd.cpp
emitter/LnxCpuDetect.cpp
emitter/WinCpuDetect.cpp
emitter/x86emitter.cpp emitter/x86emitter.cpp
Darwin/DarwinThreads.cpp Darwin/DarwinThreads.cpp
Darwin/DarwinMisc.cpp Darwin/DarwinMisc.cpp

View File

@ -88,7 +88,6 @@
<ClCompile Include="Semaphore.cpp" /> <ClCompile Include="Semaphore.cpp" />
<ClCompile Include="emitter\avx.cpp" /> <ClCompile Include="emitter\avx.cpp" />
<ClCompile Include="emitter\bmi.cpp" /> <ClCompile Include="emitter\bmi.cpp" />
<ClCompile Include="emitter\cpudetect.cpp" />
<ClCompile Include="emitter\fpu.cpp" /> <ClCompile Include="emitter\fpu.cpp" />
<ClCompile Include="emitter\groups.cpp" /> <ClCompile Include="emitter\groups.cpp" />
<ClCompile Include="emitter\jmp.cpp" /> <ClCompile Include="emitter\jmp.cpp" />
@ -97,8 +96,6 @@
<ClCompile Include="emitter\movs.cpp" /> <ClCompile Include="emitter\movs.cpp" />
<ClCompile Include="emitter\simd.cpp" /> <ClCompile Include="emitter\simd.cpp" />
<ClCompile Include="emitter\x86emitter.cpp" /> <ClCompile Include="emitter\x86emitter.cpp" />
<ClCompile Include="emitter\LnxCpuDetect.cpp" />
<ClCompile Include="emitter\WinCpuDetect.cpp" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<MASM Include="FastJmp.asm" /> <MASM Include="FastJmp.asm" />

View File

@ -10,9 +10,6 @@
<ClCompile Include="Console.cpp"> <ClCompile Include="Console.cpp">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="emitter\cpudetect.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="emitter\fpu.cpp"> <ClCompile Include="emitter\fpu.cpp">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
@ -28,9 +25,6 @@
<ClCompile Include="emitter\legacy_sse.cpp"> <ClCompile Include="emitter\legacy_sse.cpp">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="emitter\LnxCpuDetect.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="emitter\avx.cpp"> <ClCompile Include="emitter\avx.cpp">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
@ -64,9 +58,6 @@
<ClCompile Include="emitter\simd.cpp"> <ClCompile Include="emitter\simd.cpp">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="emitter\WinCpuDetect.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="Windows\WinHostSys.cpp"> <ClCompile Include="Windows\WinHostSys.cpp">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>

View File

@ -1,23 +0,0 @@
// SPDX-FileCopyrightText: 2002-2023 PCSX2 Dev Team
// SPDX-License-Identifier: LGPL-3.0+
#ifndef _WIN32
#include "common/emitter/tools.h"
#include <unistd.h>
// Note: Apparently this solution is Linux/Solaris only.
// FreeBSD/OsX need something far more complicated (apparently)
void x86capabilities::CountLogicalCores()
{
#ifdef __linux__
// Note : GetCPUCount uses sysconf( _SC_NPROCESSORS_ONLN ) internally, which can return 1
// if sysconf info isn't available (a long standing linux bug). There are no fallbacks or
// alternatives, apparently.
LogicalCores = sysconf(_SC_NPROCESSORS_ONLN);
#else
LogicalCores = 1;
#endif
}
#endif

View File

@ -1,28 +0,0 @@
// SPDX-FileCopyrightText: 2002-2023 PCSX2 Dev Team
// SPDX-License-Identifier: LGPL-3.0+
#if defined(_WIN32)
#include "common/Console.h"
#include "common/emitter/tools.h"
#include "common/RedtapeWindows.h"
void x86capabilities::CountLogicalCores()
{
DWORD_PTR vProcessCPUs;
DWORD_PTR vSystemCPUs;
LogicalCores = 1;
if (!GetProcessAffinityMask(GetCurrentProcess(), &vProcessCPUs, &vSystemCPUs))
return;
uint CPUs = 0;
for (DWORD_PTR bit = 1; bit != 0; bit <<= 1)
if (vSystemCPUs & bit)
CPUs++;
LogicalCores = CPUs;
}
#endif

View File

@ -1,217 +0,0 @@
// SPDX-FileCopyrightText: 2002-2023 PCSX2 Dev Team
// SPDX-License-Identifier: LGPL-3.0+
#include "common/General.h"
#include "common/emitter/tools.h"
#include "common/emitter/internal.h"
#include "common/VectorIntrin.h"
#include <atomic>
// CPU information support
#if defined(_WIN32)
#define cpuid __cpuid
#define cpuidex __cpuidex
#else
#include <cpuid.h>
static __inline__ __attribute__((always_inline)) void cpuidex(int CPUInfo[], const int InfoType, const int count)
{
__cpuid_count(InfoType, count, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
}
static __inline__ __attribute__((always_inline)) void cpuid(int CPUInfo[], const int InfoType)
{
__cpuid(InfoType, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
}
#endif
using namespace x86Emitter;
alignas(16) x86capabilities x86caps;
#if defined(_MSC_VER)
// We disable optimizations for this function, because we need x86capabilities for AVX
// detection, but if we keep opts on, it'll use AVX instructions for inlining memzero.
#pragma optimize("", off)
#endif
x86capabilities::x86capabilities()
: isIdentified(false)
, VendorID(x86Vendor_Unknown)
, FamilyID(0)
, Model(0)
, TypeID(0)
, StepID(0)
, Flags(0)
, Flags2(0)
, EFlags(0)
, EFlags2(0)
, SEFlag(0)
, AllCapabilities(0)
, PhysicalCores(0)
, LogicalCores(0)
{
}
#if defined(_MSC_VER)
#pragma optimize("", on)
#endif
const char* x86capabilities::GetTypeName() const
{
switch (TypeID)
{
case 0:
return "Standard OEM";
case 1:
return "Overdrive";
case 2:
return "Dual";
case 3:
return "Reserved";
default:
return "Unknown";
}
}
void x86capabilities::CountCores()
{
Identify();
// This will assign values into LogicalCores and PhysicalCores
CountLogicalCores();
}
static const char* tbl_x86vendors[] =
{
"GenuineIntel",
"AuthenticAMD",
"Unknown ",
};
// Performs all _cpuid-related activity. This fills *most* of the x86caps structure, except for
// the cpuSpeed and the mxcsr masks. Those must be completed manually.
void x86capabilities::Identify()
{
if (isIdentified)
return;
isIdentified = true;
s32 regs[4];
u32 cmds;
cpuid(regs, 0);
cmds = regs[0];
memcpy(&VendorName[0], &regs[1], 4);
memcpy(&VendorName[4], &regs[3], 4);
memcpy(&VendorName[8], &regs[2], 4);
// Determine Vendor Specifics!
// It's really not recommended that we base much (if anything) on CPU vendor names,
// however it's currently necessary in order to gain a (pseudo)reliable count of cores
// and threads used by the CPU (AMD and Intel can't agree on how to make this info available).
int vid;
for (vid = 0; vid < x86Vendor_Unknown; ++vid)
{
if (memcmp(VendorName, tbl_x86vendors[vid], 12) == 0)
break;
}
VendorID = static_cast<x86VendorType>(vid);
if (cmds >= 0x00000001)
{
cpuid(regs, 0x00000001);
StepID = regs[0] & 0xf;
Model = (regs[0] >> 4) & 0xf;
FamilyID = (regs[0] >> 8) & 0xf;
TypeID = (regs[0] >> 12) & 0x3;
//u32 x86_64_8BITBRANDID = regs[1] & 0xff;
Flags = regs[3];
Flags2 = regs[2];
}
if (cmds >= 0x00000007)
{
// Note: ECX must be 0 for AVX2 detection.
cpuidex(regs, 0x00000007, 0);
SEFlag = regs[1];
}
cpuid(regs, 0x80000000);
cmds = regs[0];
if (cmds >= 0x80000001)
{
cpuid(regs, 0x80000001);
//u32 x86_64_12BITBRANDID = regs[1] & 0xfff;
EFlags2 = regs[2];
EFlags = regs[3];
}
cpuid((int*)FamilyName, 0x80000002);
cpuid((int*)(FamilyName + 16), 0x80000003);
cpuid((int*)(FamilyName + 32), 0x80000004);
hasFloatingPointUnit = (Flags >> 0) & 1;
hasVirtual8086ModeEnhancements = (Flags >> 1) & 1;
hasDebuggingExtensions = (Flags >> 2) & 1;
hasPageSizeExtensions = (Flags >> 3) & 1;
hasTimeStampCounter = (Flags >> 4) & 1;
hasModelSpecificRegisters = (Flags >> 5) & 1;
hasPhysicalAddressExtension = (Flags >> 6) & 1;
hasMachineCheckArchitecture = (Flags >> 7) & 1;
hasCOMPXCHG8BInstruction = (Flags >> 8) & 1;
hasAdvancedProgrammableInterruptController = (Flags >> 9) & 1;
hasSEPFastSystemCall = (Flags >> 11) & 1;
hasMemoryTypeRangeRegisters = (Flags >> 12) & 1;
hasPTEGlobalFlag = (Flags >> 13) & 1;
hasMachineCheckArchitecture = (Flags >> 14) & 1;
hasConditionalMoveAndCompareInstructions = (Flags >> 15) & 1;
hasFGPageAttributeTable = (Flags >> 16) & 1;
has36bitPageSizeExtension = (Flags >> 17) & 1;
hasProcessorSerialNumber = (Flags >> 18) & 1;
hasCFLUSHInstruction = (Flags >> 19) & 1;
hasDebugStore = (Flags >> 21) & 1;
hasACPIThermalMonitorAndClockControl = (Flags >> 22) & 1;
hasFastStreamingSIMDExtensionsSaveRestore = (Flags >> 24) & 1;
hasStreamingSIMDExtensions = (Flags >> 25) & 1; //sse
hasStreamingSIMD2Extensions = (Flags >> 26) & 1; //sse2
hasSelfSnoop = (Flags >> 27) & 1;
hasMultiThreading = (Flags >> 28) & 1;
hasThermalMonitor = (Flags >> 29) & 1;
hasIntel64BitArchitecture = (Flags >> 30) & 1;
// -------------------------------------------------
// --> SSE3 / SSSE3 / SSE4.1 / SSE 4.2 detection <--
// -------------------------------------------------
hasStreamingSIMD3Extensions = (Flags2 >> 0) & 1; //sse3
hasSupplementalStreamingSIMD3Extensions = (Flags2 >> 9) & 1; //ssse3
hasStreamingSIMD4Extensions = (Flags2 >> 19) & 1; //sse4.1
hasStreamingSIMD4Extensions2 = (Flags2 >> 20) & 1; //sse4.2
if ((Flags2 >> 27) & 1) // OSXSAVE
{
// Note: In theory, we should use xgetbv to check OS support
// but all OSes we officially run under support it
// and its intrinsic requires extra compiler flags
hasAVX = (Flags2 >> 28) & 1; //avx
hasFMA = (Flags2 >> 12) & 1; //fma
hasAVX2 = (SEFlag >> 5) & 1; //avx2
}
hasBMI1 = (SEFlag >> 3) & 1;
hasBMI2 = (SEFlag >> 8) & 1;
// Ones only for AMDs:
hasAMD64BitArchitecture = (EFlags >> 29) & 1; //64bit cpu
hasStreamingSIMD4ExtensionsA = (EFlags2 >> 6) & 1; //INSERTQ / EXTRQ / MOVNT
isIdentified = true;
}

View File

@ -5,109 +5,6 @@
#include "common/Pcsx2Defs.h" #include "common/Pcsx2Defs.h"
enum x86VendorType
{
x86Vendor_Intel = 0,
x86Vendor_AMD = 1,
x86Vendor_Unknown = 2
};
// --------------------------------------------------------------------------------------
// x86capabilities
// --------------------------------------------------------------------------------------
class x86capabilities
{
public:
bool isIdentified;
public:
x86VendorType VendorID;
uint FamilyID; // Processor Family
uint Model; // Processor Model
uint TypeID; // Processor Type
uint StepID; // Stepping ID
u32 Flags; // Feature Flags
u32 Flags2; // More Feature Flags
u32 EFlags; // Extended Feature Flags
u32 EFlags2; // Extended Feature Flags pg2
u32 SEFlag; // Structured Extended Feature Flags Enumeration
char VendorName[16] = {}; // Vendor/Creator ID
char FamilyName[50] = {}; // the original cpu name
// ----------------------------------------------------------------------------
// x86 CPU Capabilities Section (all boolean flags!)
// ----------------------------------------------------------------------------
union
{
u64 AllCapabilities = 0;
struct
{
u32 hasFloatingPointUnit : 1;
u32 hasVirtual8086ModeEnhancements : 1;
u32 hasDebuggingExtensions : 1;
u32 hasPageSizeExtensions : 1;
u32 hasTimeStampCounter : 1;
u32 hasModelSpecificRegisters : 1;
u32 hasPhysicalAddressExtension : 1;
u32 hasCOMPXCHG8BInstruction : 1;
u32 hasAdvancedProgrammableInterruptController : 1;
u32 hasSEPFastSystemCall : 1;
u32 hasMemoryTypeRangeRegisters : 1;
u32 hasPTEGlobalFlag : 1;
u32 hasMachineCheckArchitecture : 1;
u32 hasConditionalMoveAndCompareInstructions : 1;
u32 hasFGPageAttributeTable : 1;
u32 has36bitPageSizeExtension : 1;
u32 hasProcessorSerialNumber : 1;
u32 hasCFLUSHInstruction : 1;
u32 hasDebugStore : 1;
u32 hasACPIThermalMonitorAndClockControl : 1;
u32 hasFastStreamingSIMDExtensionsSaveRestore : 1;
u32 hasStreamingSIMDExtensions : 1;
u32 hasStreamingSIMD2Extensions : 1;
u32 hasSelfSnoop : 1;
// is TRUE for both multi-core and Hyperthreaded CPUs.
u32 hasMultiThreading : 1;
u32 hasThermalMonitor : 1;
u32 hasIntel64BitArchitecture : 1;
u32 hasStreamingSIMD3Extensions : 1;
u32 hasSupplementalStreamingSIMD3Extensions : 1;
u32 hasStreamingSIMD4Extensions : 1;
u32 hasStreamingSIMD4Extensions2 : 1;
u32 hasAVX : 1;
u32 hasAVX2 : 1;
u32 hasBMI1 : 1;
u32 hasBMI2 : 1;
u32 hasFMA : 1;
// AMD-specific CPU Features
u32 hasAMD64BitArchitecture : 1;
u32 hasStreamingSIMD4ExtensionsA : 1;
};
};
// Core Counts!
u32 PhysicalCores = 0;
u32 LogicalCores = 0;
public:
x86capabilities();
void Identify();
void CountCores();
const char* GetTypeName() const;
protected:
void CountLogicalCores();
};
enum SSE_RoundMode enum SSE_RoundMode
{ {
SSE_RoundMode_FIRST = 0, SSE_RoundMode_FIRST = 0,
@ -185,5 +82,3 @@ union SSE_MXCSR
operator x86Emitter::xIndirect32() const; operator x86Emitter::xIndirect32() const;
}; };
alignas(16) extern x86capabilities x86caps;

View File

@ -563,7 +563,7 @@ const xRegister32
// Core2/i7 CPUs prefer unaligned addresses. Checking for SSSE3 is a decent filter. // Core2/i7 CPUs prefer unaligned addresses. Checking for SSSE3 is a decent filter.
// (also align in debug modes for disasm convenience) // (also align in debug modes for disasm convenience)
if (IsDebugBuild || !x86caps.hasSupplementalStreamingSIMD3Extensions) if constexpr (IsDebugBuild)
{ {
// - P4's and earlier prefer 16 byte alignment. // - P4's and earlier prefer 16 byte alignment.
// - AMD Athlons and Phenoms prefer 8 byte alignment, but I don't have an easy // - AMD Athlons and Phenoms prefer 8 byte alignment, but I don't have an easy

View File

@ -106,7 +106,6 @@ void SysLogMachineCaps()
GetOSVersionString().c_str(), GetOSVersionString().c_str(),
(u32)(GetPhysicalMemory() / _1mb)); (u32)(GetPhysicalMemory() / _1mb));
cpuinfo_initialize();
Console.Indent().WriteLn("Processor = %s", cpuinfo_get_package(0)->name); Console.Indent().WriteLn("Processor = %s", cpuinfo_get_package(0)->name);
Console.Indent().WriteLn("Core Count = %u cores", cpuinfo_get_cores_count()); Console.Indent().WriteLn("Core Count = %u cores", cpuinfo_get_cores_count());
Console.Indent().WriteLn("Thread Count = %u threads", cpuinfo_get_processors_count()); Console.Indent().WriteLn("Thread Count = %u threads", cpuinfo_get_processors_count());
@ -115,9 +114,9 @@ void SysLogMachineCaps()
std::string features; std::string features;
if (x86caps.hasAVX) if (cpuinfo_has_x86_avx())
features += "AVX "; features += "AVX ";
if (x86caps.hasAVX2) if (cpuinfo_has_x86_avx2())
features += "AVX2 "; features += "AVX2 ";
StringUtil::StripWhitespace(&features); StringUtil::StripWhitespace(&features);

View File

@ -51,6 +51,7 @@
#include "common/Timer.h" #include "common/Timer.h"
#include "IconsFontAwesome5.h" #include "IconsFontAwesome5.h"
#include "cpuinfo.h"
#include "discord_rpc.h" #include "discord_rpc.h"
#include "fmt/core.h" #include "fmt/core.h"
@ -186,10 +187,9 @@ bool VMManager::PerformEarlyHardwareChecks(const char** error)
#if defined(_M_X86) #if defined(_M_X86)
// On Windows, this gets called as a global object constructor, before any of our objects are constructed. // On Windows, this gets called as a global object constructor, before any of our objects are constructed.
// So, we have to put it on the stack instead. // So, we have to put it on the stack instead.
x86capabilities temp_x86_caps; cpuinfo_initialize();
temp_x86_caps.Identify();
if (!temp_x86_caps.hasStreamingSIMD4Extensions) if (!cpuinfo_has_x86_sse4_1())
{ {
*error = *error =
"PCSX2 requires the Streaming SIMD 4.1 Extensions instruction set, which your CPU does not support.\n\n" "PCSX2 requires the Streaming SIMD 4.1 Extensions instruction set, which your CPU does not support.\n\n"
@ -199,7 +199,7 @@ bool VMManager::PerformEarlyHardwareChecks(const char** error)
} }
#if _M_SSE >= 0x0501 #if _M_SSE >= 0x0501
if (!temp_x86_caps.hasAVX || !temp_x86_caps.hasAVX2) if (!cpuinfo_has_x86_avx2())
{ {
*error = "This build of PCSX2 requires the Advanced Vector Extensions 2 instruction set, which your CPU does " *error = "This build of PCSX2 requires the Advanced Vector Extensions 2 instruction set, which your CPU does "
"not support.\n\n" "not support.\n\n"
@ -342,8 +342,9 @@ bool VMManager::Internal::CPUThreadInitialize()
} }
#endif #endif
x86caps.Identify(); if (!cpuinfo_initialize())
x86caps.CountCores(); Console.Error("cpuinfo_initialize() failed.");
SysLogMachineCaps(); SysLogMachineCaps();
if (!SysMemory::Allocate()) if (!SysMemory::Allocate())
@ -2803,8 +2804,6 @@ static std::once_flag s_processor_list_initialized;
#if defined(__linux__) || defined(_WIN32) #if defined(__linux__) || defined(_WIN32)
#include "cpuinfo.h"
static u32 GetProcessorIdForProcessor(const cpuinfo_processor* proc) static u32 GetProcessorIdForProcessor(const cpuinfo_processor* proc)
{ {
#if defined(__linux__) #if defined(__linux__)
@ -2816,14 +2815,8 @@ static u32 GetProcessorIdForProcessor(const cpuinfo_processor* proc)
#endif #endif
} }
static void InitializeCPUInfo() static void InitializeProcessorList()
{ {
if (!cpuinfo_initialize())
{
Console.Error("Failed to initialize cpuinfo");
return;
}
const u32 cluster_count = cpuinfo_get_clusters_count(); const u32 cluster_count = cpuinfo_get_clusters_count();
if (cluster_count == 0) if (cluster_count == 0)
{ {
@ -2922,7 +2915,7 @@ static void SetMTVUAndAffinityControlDefault(SettingsInterface& si)
static u32 s_big_cores; static u32 s_big_cores;
static u32 s_small_cores; static u32 s_small_cores;
static void InitializeCPUInfo() static void InitializeProcessorList()
{ {
s_big_cores = 0; s_big_cores = 0;
s_small_cores = 0; s_small_cores = 0;
@ -2957,7 +2950,7 @@ static void SetMTVUAndAffinityControlDefault(SettingsInterface& si)
#else #else
static void InitializeCPUInfo() static void InitializeProcessorList()
{ {
DevCon.WriteLn("(VMManager) InitializeCPUInfo() not implemented."); DevCon.WriteLn("(VMManager) InitializeCPUInfo() not implemented.");
} }
@ -2970,7 +2963,7 @@ static void SetMTVUAndAffinityControlDefault(SettingsInterface& si)
void VMManager::EnsureCPUInfoInitialized() void VMManager::EnsureCPUInfoInitialized()
{ {
std::call_once(s_processor_list_initialized, InitializeCPUInfo); std::call_once(s_processor_list_initialized, InitializeProcessorList);
} }
void VMManager::SetEmuThreadAffinities() void VMManager::SetEmuThreadAffinities()

View File

@ -2618,18 +2618,8 @@ void recPSRAVW()
xPSRA.D(xRegisterSSE(t1reg), xRegisterSSE(t0reg)); xPSRA.D(xRegisterSSE(t1reg), xRegisterSSE(t0reg));
// merge & sign extend // merge & sign extend
if (x86caps.hasStreamingSIMD4Extensions)
{
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg));
xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
}
else
{
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg));
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_D));
xPSRA.D(xRegisterSSE(t0reg), 31); // get the signs
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
}
_freeXMMreg(t0reg); _freeXMMreg(t0reg);
_freeXMMreg(t1reg); _freeXMMreg(t1reg);
@ -2739,26 +2729,11 @@ void recPMULTUW()
} }
// interleave & sign extend // interleave & sign extend
if (x86caps.hasStreamingSIMD4Extensions)
{
xPSHUF.D(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_HI), 0x88); xPSHUF.D(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_HI), 0x88);
xPSHUF.D(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI), 0xdd); xPSHUF.D(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI), 0xdd);
xPMOVSX.DQ(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO)); xPMOVSX.DQ(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO));
xPMOVSX.DQ(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI)); xPMOVSX.DQ(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI));
} }
else
{
int t0reg = _allocTempXMMreg(XMMT_INT);
xPSHUF.D(xRegisterSSE(t0reg), xRegisterSSE(EEREC_HI), 0xd8);
xMOVDQA(xRegisterSSE(EEREC_LO), xRegisterSSE(t0reg));
xMOVDQA(xRegisterSSE(EEREC_HI), xRegisterSSE(t0reg));
xPSRA.D(xRegisterSSE(t0reg), 31); // get the signs
xPUNPCK.LDQ(xRegisterSSE(EEREC_LO), xRegisterSSE(t0reg));
xPUNPCK.HDQ(xRegisterSSE(EEREC_HI), xRegisterSSE(t0reg));
_freeXMMreg(t0reg);
}
}
_clearNeededXMMregs(); _clearNeededXMMregs();
} }
@ -2805,25 +2780,11 @@ void recPMADDUW()
xPADD.Q(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_LO)); xPADD.Q(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_LO));
// interleave & sign extend // interleave & sign extend
if (x86caps.hasStreamingSIMD4Extensions)
{
xPSHUF.D(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_HI), 0x88); xPSHUF.D(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_HI), 0x88);
xPSHUF.D(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI), 0xdd); xPSHUF.D(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI), 0xdd);
xPMOVSX.DQ(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO)); xPMOVSX.DQ(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO));
xPMOVSX.DQ(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI)); xPMOVSX.DQ(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI));
}
else
{
int t0reg = _allocTempXMMreg(XMMT_INT);
xPSHUF.D(xRegisterSSE(t0reg), xRegisterSSE(EEREC_HI), 0xd8);
xMOVDQA(xRegisterSSE(EEREC_LO), xRegisterSSE(t0reg));
xMOVDQA(xRegisterSSE(EEREC_HI), xRegisterSSE(t0reg));
xPSRA.D(xRegisterSSE(t0reg), 31); // get the signs
xPUNPCK.LDQ(xRegisterSSE(EEREC_LO), xRegisterSSE(t0reg));
xPUNPCK.HDQ(xRegisterSSE(EEREC_HI), xRegisterSSE(t0reg));
_freeXMMreg(t0reg);
}
_clearNeededXMMregs(); _clearNeededXMMregs();
} }

View File

@ -3,6 +3,8 @@
#pragma once #pragma once
#include "cpuinfo.h"
//------------------------------------------------------------------ //------------------------------------------------------------------
// Dispatcher Functions // Dispatcher Functions
//------------------------------------------------------------------ //------------------------------------------------------------------
@ -204,7 +206,7 @@ static void mVUGenerateCopyPipelineState(mV)
{ {
mVU.copyPLState = xGetAlignedCallTarget(); mVU.copyPLState = xGetAlignedCallTarget();
if (x86caps.hasAVX2) if (cpuinfo_has_x86_avx())
{ {
xVMOVAPS(ymm0, ptr[rax]); xVMOVAPS(ymm0, ptr[rax]);
xVMOVAPS(ymm1, ptr[rax + 32u]); xVMOVAPS(ymm1, ptr[rax + 32u]);
@ -249,7 +251,7 @@ static void mVUGenerateCompareState(mV)
{ {
mVU.compareStateF = xGetAlignedCallTarget(); mVU.compareStateF = xGetAlignedCallTarget();
if (!x86caps.hasAVX2) if (!cpuinfo_has_x86_avx2())
{ {
xMOVAPS (xmm0, ptr32[arg1reg]); xMOVAPS (xmm0, ptr32[arg1reg]);
xPCMP.EQD(xmm0, ptr32[arg2reg]); xPCMP.EQD(xmm0, ptr32[arg2reg]);

View File

@ -8,6 +8,8 @@
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <string.h> #include <string.h>
#include "cpuinfo.h"
#ifdef MULTI_ISA_UNSHARED_COMPILATION #ifdef MULTI_ISA_UNSHARED_COMPILATION
#include "common/emitter/tools.h" #include "common/emitter/tools.h"
@ -22,10 +24,10 @@ enum class TestISA
static bool CheckCapabilities(TestISA required_caps) static bool CheckCapabilities(TestISA required_caps)
{ {
x86caps.Identify(); cpuinfo_initialize();
if (required_caps == TestISA::isa_avx && !x86caps.hasAVX) if (required_caps == TestISA::isa_avx && !cpuinfo_has_x86_avx())
return false; return false;
if (required_caps == TestISA::isa_avx2 && !x86caps.hasAVX2) if (required_caps == TestISA::isa_avx2 && !cpuinfo_has_x86_avx2())
return false; return false;
return true; return true;