[CPU] Remove `use_haswell_instructions` in favor of `x64_extension_mask`
Rather than having a single bool to conditionally detect haswell-level instruction features. The granularity is increased with a new `x64_extension_mask` where individual features within the x64 backend can be turned on or off in a bit-mask manner. Since we have an ARM backend on the horizon, I've added this to the new `x64` configuration-group rather than `CPU`. This new pattern will hopefully allow for testing to be more targetted to certain processor features and allows the user to determine if they want certain features to be enabled or disabled(such as avoiding BMI2 on certain AMD processors due to pdep/pext being incredibly slow). The default configuration is to detect and utilize all available features.
This commit is contained in:
parent
37aa3d129c
commit
233ed107fe
|
@ -26,10 +26,23 @@
|
|||
#include "xenia/cpu/processor.h"
|
||||
#include "xenia/cpu/stack_walker.h"
|
||||
|
||||
DEFINE_bool(
|
||||
use_haswell_instructions, true,
|
||||
"Uses the AVX2/FMA/etc instructions on Haswell processors when available.",
|
||||
"CPU");
|
||||
DEFINE_int32(x64_extension_mask, -1,
|
||||
"Allow the detection and utilization of specific instruction set "
|
||||
"features.\n"
|
||||
" 0 = x86_64 + AVX1\n"
|
||||
" 1 = AVX2\n"
|
||||
" 2 = FMA\n"
|
||||
" 4 = LZCNT\n"
|
||||
" 8 = BMI1\n"
|
||||
" 16 = BMI2\n"
|
||||
" 32 = F16C\n"
|
||||
" 64 = Movbe\n"
|
||||
" 128 = AVX512F\n"
|
||||
" 256 = AVX512VL\n"
|
||||
" 512 = AVX512BW\n"
|
||||
" 1024 = AVX512DQ\n"
|
||||
" -1 = Detect and utilize all possible processor features\n",
|
||||
"x64");
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
|
@ -84,7 +97,7 @@ bool X64Backend::Initialize(Processor* processor) {
|
|||
}
|
||||
|
||||
// Need movbe to do advanced LOAD/STORE tricks.
|
||||
if (cvars::use_haswell_instructions) {
|
||||
if (cvars::x64_extension_mask & kX64EmitMovbe) {
|
||||
machine_info_.supports_extended_load_store =
|
||||
cpu.has(Xbyak::util::Cpu::tMOVBE);
|
||||
} else {
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
#include "xenia/base/cvar.h"
|
||||
#include "xenia/cpu/backend/backend.h"
|
||||
|
||||
DECLARE_bool(use_haswell_instructions);
|
||||
DECLARE_int32(x64_extension_mask);
|
||||
|
||||
namespace xe {
|
||||
class Exception;
|
||||
|
|
|
@ -74,23 +74,32 @@ X64Emitter::X64Emitter(X64Backend* backend, XbyakAllocator* allocator)
|
|||
backend_(backend),
|
||||
code_cache_(backend->code_cache()),
|
||||
allocator_(allocator) {
|
||||
if (cvars::use_haswell_instructions) {
|
||||
if (cvars::x64_extension_mask & kX64EmitAVX2)
|
||||
feature_flags_ |= cpu_.has(Xbyak::util::Cpu::tAVX2) ? kX64EmitAVX2 : 0;
|
||||
if (cvars::x64_extension_mask & kX64EmitFMA)
|
||||
feature_flags_ |= cpu_.has(Xbyak::util::Cpu::tFMA) ? kX64EmitFMA : 0;
|
||||
if (cvars::x64_extension_mask & kX64EmitLZCNT)
|
||||
feature_flags_ |= cpu_.has(Xbyak::util::Cpu::tLZCNT) ? kX64EmitLZCNT : 0;
|
||||
if (cvars::x64_extension_mask & kX64EmitBMI1)
|
||||
feature_flags_ |= cpu_.has(Xbyak::util::Cpu::tBMI1) ? kX64EmitBMI1 : 0;
|
||||
if (cvars::x64_extension_mask & kX64EmitBMI2)
|
||||
feature_flags_ |= cpu_.has(Xbyak::util::Cpu::tBMI2) ? kX64EmitBMI2 : 0;
|
||||
if (cvars::x64_extension_mask & kX64EmitF16C)
|
||||
feature_flags_ |= cpu_.has(Xbyak::util::Cpu::tF16C) ? kX64EmitF16C : 0;
|
||||
if (cvars::x64_extension_mask & kX64EmitMovbe)
|
||||
feature_flags_ |= cpu_.has(Xbyak::util::Cpu::tMOVBE) ? kX64EmitMovbe : 0;
|
||||
if (cvars::x64_extension_mask & kX64EmitAVX512F)
|
||||
feature_flags_ |=
|
||||
cpu_.has(Xbyak::util::Cpu::tAVX512F) ? kX64EmitAVX512F : 0;
|
||||
if (cvars::x64_extension_mask & kX64EmitAVX512VL)
|
||||
feature_flags_ |=
|
||||
cpu_.has(Xbyak::util::Cpu::tAVX512VL) ? kX64EmitAVX512VL : 0;
|
||||
if (cvars::x64_extension_mask & kX64EmitAVX512BW)
|
||||
feature_flags_ |=
|
||||
cpu_.has(Xbyak::util::Cpu::tAVX512BW) ? kX64EmitAVX512BW : 0;
|
||||
if (cvars::x64_extension_mask & kX64EmitAVX512DQ)
|
||||
feature_flags_ |=
|
||||
cpu_.has(Xbyak::util::Cpu::tAVX512DQ) ? kX64EmitAVX512DQ : 0;
|
||||
}
|
||||
|
||||
if (!cpu_.has(Xbyak::util::Cpu::tAVX)) {
|
||||
xe::FatalError(
|
||||
|
|
|
@ -125,19 +125,19 @@ class XbyakAllocator : public Xbyak::Allocator {
|
|||
};
|
||||
|
||||
enum X64EmitterFeatureFlags {
|
||||
kX64EmitAVX2 = 1 << 1,
|
||||
kX64EmitFMA = 1 << 2,
|
||||
kX64EmitLZCNT = 1 << 3,
|
||||
kX64EmitBMI1 = 1 << 4,
|
||||
kX64EmitBMI2 = 1 << 5,
|
||||
kX64EmitF16C = 1 << 6,
|
||||
kX64EmitMovbe = 1 << 7,
|
||||
kX64EmitAVX2 = 1 << 0,
|
||||
kX64EmitFMA = 1 << 1,
|
||||
kX64EmitLZCNT = 1 << 2,
|
||||
kX64EmitBMI1 = 1 << 3,
|
||||
kX64EmitBMI2 = 1 << 4,
|
||||
kX64EmitF16C = 1 << 5,
|
||||
kX64EmitMovbe = 1 << 6,
|
||||
|
||||
kX64EmitAVX512F = 1 << 8,
|
||||
kX64EmitAVX512VL = 1 << 9,
|
||||
kX64EmitAVX512F = 1 << 7,
|
||||
kX64EmitAVX512VL = 1 << 8,
|
||||
|
||||
kX64EmitAVX512BW = 1 << 10,
|
||||
kX64EmitAVX512DQ = 1 << 11,
|
||||
kX64EmitAVX512BW = 1 << 9,
|
||||
kX64EmitAVX512DQ = 1 << 10,
|
||||
|
||||
kX64EmitAVX512Ortho = kX64EmitAVX512F | kX64EmitAVX512VL,
|
||||
kX64EmitAVX512Ortho64 = kX64EmitAVX512Ortho | kX64EmitAVX512DQ
|
||||
|
|
|
@ -422,8 +422,7 @@ bool RunTests(const std::string_view test_name) {
|
|||
int failed_count = 0;
|
||||
int passed_count = 0;
|
||||
|
||||
XELOGI("Haswell instruction usage {}.",
|
||||
cvars::use_haswell_instructions ? "enabled" : "disabled");
|
||||
XELOGI("Instruction feature mask {}.", cvars::x64_extension_mask);
|
||||
|
||||
auto test_path_root = cvars::test_path;
|
||||
std::vector<std::filesystem::path> test_files;
|
||||
|
|
Loading…
Reference in New Issue