From 233ed107fe3bb4532e85b8e558a51c67c4becc64 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Mon, 10 Jan 2022 16:18:55 -0800 Subject: [PATCH] [CPU] Remove `use_haswell_instructions` in favor of `x64_extension_mask` Rather than having a single bool to conditionally detect haswell-level instruction features. The granularity is increased with a new `x64_extension_mask` where individual features within the x64 backend can be turned on or off in a bit-mask manner. Since we have an ARM backend on the horizon, I've added this to the new `x64` configuration-group rather than `CPU`. This new pattern will hopefully allow for testing to be more targetted to certain processor features and allows the user to determine if they want certain features to be enabled or disabled(such as avoiding BMI2 on certain AMD processors due to pdep/pext being incredibly slow). The default configuration is to detect and utilize all available features. --- src/xenia/cpu/backend/x64/x64_backend.cc | 23 +++++++++++++++---- src/xenia/cpu/backend/x64/x64_backend.h | 2 +- src/xenia/cpu/backend/x64/x64_emitter.cc | 13 +++++++++-- src/xenia/cpu/backend/x64/x64_emitter.h | 22 +++++++++--------- src/xenia/cpu/ppc/testing/ppc_testing_main.cc | 3 +-- 5 files changed, 42 insertions(+), 21 deletions(-) diff --git a/src/xenia/cpu/backend/x64/x64_backend.cc b/src/xenia/cpu/backend/x64/x64_backend.cc index 130d84456..fe1326920 100644 --- a/src/xenia/cpu/backend/x64/x64_backend.cc +++ b/src/xenia/cpu/backend/x64/x64_backend.cc @@ -26,10 +26,23 @@ #include "xenia/cpu/processor.h" #include "xenia/cpu/stack_walker.h" -DEFINE_bool( - use_haswell_instructions, true, - "Uses the AVX2/FMA/etc instructions on Haswell processors when available.", - "CPU"); +DEFINE_int32(x64_extension_mask, -1, + "Allow the detection and utilization of specific instruction set " + "features.\n" + " 0 = x86_64 + AVX1\n" + " 1 = AVX2\n" + " 2 = FMA\n" + " 4 = LZCNT\n" + " 8 = BMI1\n" + " 16 = BMI2\n" + " 32 = F16C\n" + " 64 = Movbe\n" + " 128 = AVX512F\n" + " 256 = AVX512VL\n" + " 512 = AVX512BW\n" + " 1024 = AVX512DQ\n" + " -1 = Detect and utilize all possible processor features\n", + "x64"); namespace xe { namespace cpu { @@ -84,7 +97,7 @@ bool X64Backend::Initialize(Processor* processor) { } // Need movbe to do advanced LOAD/STORE tricks. - if (cvars::use_haswell_instructions) { + if (cvars::x64_extension_mask & kX64EmitMovbe) { machine_info_.supports_extended_load_store = cpu.has(Xbyak::util::Cpu::tMOVBE); } else { diff --git a/src/xenia/cpu/backend/x64/x64_backend.h b/src/xenia/cpu/backend/x64/x64_backend.h index d3036e875..4cb69e040 100644 --- a/src/xenia/cpu/backend/x64/x64_backend.h +++ b/src/xenia/cpu/backend/x64/x64_backend.h @@ -15,7 +15,7 @@ #include "xenia/base/cvar.h" #include "xenia/cpu/backend/backend.h" -DECLARE_bool(use_haswell_instructions); +DECLARE_int32(x64_extension_mask); namespace xe { class Exception; diff --git a/src/xenia/cpu/backend/x64/x64_emitter.cc b/src/xenia/cpu/backend/x64/x64_emitter.cc index ae9ba1eed..d555ff7eb 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.cc +++ b/src/xenia/cpu/backend/x64/x64_emitter.cc @@ -74,23 +74,32 @@ X64Emitter::X64Emitter(X64Backend* backend, XbyakAllocator* allocator) backend_(backend), code_cache_(backend->code_cache()), allocator_(allocator) { - if (cvars::use_haswell_instructions) { + if (cvars::x64_extension_mask & kX64EmitAVX2) feature_flags_ |= cpu_.has(Xbyak::util::Cpu::tAVX2) ? kX64EmitAVX2 : 0; + if (cvars::x64_extension_mask & kX64EmitFMA) feature_flags_ |= cpu_.has(Xbyak::util::Cpu::tFMA) ? kX64EmitFMA : 0; + if (cvars::x64_extension_mask & kX64EmitLZCNT) feature_flags_ |= cpu_.has(Xbyak::util::Cpu::tLZCNT) ? kX64EmitLZCNT : 0; + if (cvars::x64_extension_mask & kX64EmitBMI1) feature_flags_ |= cpu_.has(Xbyak::util::Cpu::tBMI1) ? kX64EmitBMI1 : 0; + if (cvars::x64_extension_mask & kX64EmitBMI2) feature_flags_ |= cpu_.has(Xbyak::util::Cpu::tBMI2) ? kX64EmitBMI2 : 0; + if (cvars::x64_extension_mask & kX64EmitF16C) feature_flags_ |= cpu_.has(Xbyak::util::Cpu::tF16C) ? kX64EmitF16C : 0; + if (cvars::x64_extension_mask & kX64EmitMovbe) feature_flags_ |= cpu_.has(Xbyak::util::Cpu::tMOVBE) ? kX64EmitMovbe : 0; + if (cvars::x64_extension_mask & kX64EmitAVX512F) feature_flags_ |= cpu_.has(Xbyak::util::Cpu::tAVX512F) ? kX64EmitAVX512F : 0; + if (cvars::x64_extension_mask & kX64EmitAVX512VL) feature_flags_ |= cpu_.has(Xbyak::util::Cpu::tAVX512VL) ? kX64EmitAVX512VL : 0; + if (cvars::x64_extension_mask & kX64EmitAVX512BW) feature_flags_ |= cpu_.has(Xbyak::util::Cpu::tAVX512BW) ? kX64EmitAVX512BW : 0; + if (cvars::x64_extension_mask & kX64EmitAVX512DQ) feature_flags_ |= cpu_.has(Xbyak::util::Cpu::tAVX512DQ) ? kX64EmitAVX512DQ : 0; - } if (!cpu_.has(Xbyak::util::Cpu::tAVX)) { xe::FatalError( diff --git a/src/xenia/cpu/backend/x64/x64_emitter.h b/src/xenia/cpu/backend/x64/x64_emitter.h index b84162d34..be8cd0b1a 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.h +++ b/src/xenia/cpu/backend/x64/x64_emitter.h @@ -125,19 +125,19 @@ class XbyakAllocator : public Xbyak::Allocator { }; enum X64EmitterFeatureFlags { - kX64EmitAVX2 = 1 << 1, - kX64EmitFMA = 1 << 2, - kX64EmitLZCNT = 1 << 3, - kX64EmitBMI1 = 1 << 4, - kX64EmitBMI2 = 1 << 5, - kX64EmitF16C = 1 << 6, - kX64EmitMovbe = 1 << 7, + kX64EmitAVX2 = 1 << 0, + kX64EmitFMA = 1 << 1, + kX64EmitLZCNT = 1 << 2, + kX64EmitBMI1 = 1 << 3, + kX64EmitBMI2 = 1 << 4, + kX64EmitF16C = 1 << 5, + kX64EmitMovbe = 1 << 6, - kX64EmitAVX512F = 1 << 8, - kX64EmitAVX512VL = 1 << 9, + kX64EmitAVX512F = 1 << 7, + kX64EmitAVX512VL = 1 << 8, - kX64EmitAVX512BW = 1 << 10, - kX64EmitAVX512DQ = 1 << 11, + kX64EmitAVX512BW = 1 << 9, + kX64EmitAVX512DQ = 1 << 10, kX64EmitAVX512Ortho = kX64EmitAVX512F | kX64EmitAVX512VL, kX64EmitAVX512Ortho64 = kX64EmitAVX512Ortho | kX64EmitAVX512DQ diff --git a/src/xenia/cpu/ppc/testing/ppc_testing_main.cc b/src/xenia/cpu/ppc/testing/ppc_testing_main.cc index dbd184327..a39c41bd1 100644 --- a/src/xenia/cpu/ppc/testing/ppc_testing_main.cc +++ b/src/xenia/cpu/ppc/testing/ppc_testing_main.cc @@ -422,8 +422,7 @@ bool RunTests(const std::string_view test_name) { int failed_count = 0; int passed_count = 0; - XELOGI("Haswell instruction usage {}.", - cvars::use_haswell_instructions ? "enabled" : "disabled"); + XELOGI("Instruction feature mask {}.", cvars::x64_extension_mask); auto test_path_root = cvars::test_path; std::vector test_files;