[CPU] Remove `use_haswell_instructions` in favor of `x64_extension_mask`

Rather than having a single bool to conditionally detect haswell-level
instruction features. The granularity is increased with a new
`x64_extension_mask` where individual features within the x64 backend
can be turned on or off in a bit-mask manner. Since we have an ARM
backend on the horizon, I've added this to the new `x64`
configuration-group rather than `CPU`. This new pattern will hopefully
allow for testing to be more targetted to certain processor features and
allows the user to determine if they want certain features to be enabled
or disabled(such as avoiding BMI2 on certain AMD processors due to
pdep/pext being incredibly slow). The default configuration is to detect
and utilize all available features.
This commit is contained in:
Wunkolo 2022-01-10 16:18:55 -08:00 committed by Rick Gibbed
parent 37aa3d129c
commit 233ed107fe
5 changed files with 42 additions and 21 deletions

View File

@ -26,10 +26,23 @@
#include "xenia/cpu/processor.h"
#include "xenia/cpu/stack_walker.h"
DEFINE_bool(
use_haswell_instructions, true,
"Uses the AVX2/FMA/etc instructions on Haswell processors when available.",
"CPU");
DEFINE_int32(x64_extension_mask, -1,
"Allow the detection and utilization of specific instruction set "
"features.\n"
" 0 = x86_64 + AVX1\n"
" 1 = AVX2\n"
" 2 = FMA\n"
" 4 = LZCNT\n"
" 8 = BMI1\n"
" 16 = BMI2\n"
" 32 = F16C\n"
" 64 = Movbe\n"
" 128 = AVX512F\n"
" 256 = AVX512VL\n"
" 512 = AVX512BW\n"
" 1024 = AVX512DQ\n"
" -1 = Detect and utilize all possible processor features\n",
"x64");
namespace xe {
namespace cpu {
@ -84,7 +97,7 @@ bool X64Backend::Initialize(Processor* processor) {
}
// Need movbe to do advanced LOAD/STORE tricks.
if (cvars::use_haswell_instructions) {
if (cvars::x64_extension_mask & kX64EmitMovbe) {
machine_info_.supports_extended_load_store =
cpu.has(Xbyak::util::Cpu::tMOVBE);
} else {

View File

@ -15,7 +15,7 @@
#include "xenia/base/cvar.h"
#include "xenia/cpu/backend/backend.h"
DECLARE_bool(use_haswell_instructions);
DECLARE_int32(x64_extension_mask);
namespace xe {
class Exception;

View File

@ -74,23 +74,32 @@ X64Emitter::X64Emitter(X64Backend* backend, XbyakAllocator* allocator)
backend_(backend),
code_cache_(backend->code_cache()),
allocator_(allocator) {
if (cvars::use_haswell_instructions) {
if (cvars::x64_extension_mask & kX64EmitAVX2)
feature_flags_ |= cpu_.has(Xbyak::util::Cpu::tAVX2) ? kX64EmitAVX2 : 0;
if (cvars::x64_extension_mask & kX64EmitFMA)
feature_flags_ |= cpu_.has(Xbyak::util::Cpu::tFMA) ? kX64EmitFMA : 0;
if (cvars::x64_extension_mask & kX64EmitLZCNT)
feature_flags_ |= cpu_.has(Xbyak::util::Cpu::tLZCNT) ? kX64EmitLZCNT : 0;
if (cvars::x64_extension_mask & kX64EmitBMI1)
feature_flags_ |= cpu_.has(Xbyak::util::Cpu::tBMI1) ? kX64EmitBMI1 : 0;
if (cvars::x64_extension_mask & kX64EmitBMI2)
feature_flags_ |= cpu_.has(Xbyak::util::Cpu::tBMI2) ? kX64EmitBMI2 : 0;
if (cvars::x64_extension_mask & kX64EmitF16C)
feature_flags_ |= cpu_.has(Xbyak::util::Cpu::tF16C) ? kX64EmitF16C : 0;
if (cvars::x64_extension_mask & kX64EmitMovbe)
feature_flags_ |= cpu_.has(Xbyak::util::Cpu::tMOVBE) ? kX64EmitMovbe : 0;
if (cvars::x64_extension_mask & kX64EmitAVX512F)
feature_flags_ |=
cpu_.has(Xbyak::util::Cpu::tAVX512F) ? kX64EmitAVX512F : 0;
if (cvars::x64_extension_mask & kX64EmitAVX512VL)
feature_flags_ |=
cpu_.has(Xbyak::util::Cpu::tAVX512VL) ? kX64EmitAVX512VL : 0;
if (cvars::x64_extension_mask & kX64EmitAVX512BW)
feature_flags_ |=
cpu_.has(Xbyak::util::Cpu::tAVX512BW) ? kX64EmitAVX512BW : 0;
if (cvars::x64_extension_mask & kX64EmitAVX512DQ)
feature_flags_ |=
cpu_.has(Xbyak::util::Cpu::tAVX512DQ) ? kX64EmitAVX512DQ : 0;
}
if (!cpu_.has(Xbyak::util::Cpu::tAVX)) {
xe::FatalError(

View File

@ -125,19 +125,19 @@ class XbyakAllocator : public Xbyak::Allocator {
};
enum X64EmitterFeatureFlags {
kX64EmitAVX2 = 1 << 1,
kX64EmitFMA = 1 << 2,
kX64EmitLZCNT = 1 << 3,
kX64EmitBMI1 = 1 << 4,
kX64EmitBMI2 = 1 << 5,
kX64EmitF16C = 1 << 6,
kX64EmitMovbe = 1 << 7,
kX64EmitAVX2 = 1 << 0,
kX64EmitFMA = 1 << 1,
kX64EmitLZCNT = 1 << 2,
kX64EmitBMI1 = 1 << 3,
kX64EmitBMI2 = 1 << 4,
kX64EmitF16C = 1 << 5,
kX64EmitMovbe = 1 << 6,
kX64EmitAVX512F = 1 << 8,
kX64EmitAVX512VL = 1 << 9,
kX64EmitAVX512F = 1 << 7,
kX64EmitAVX512VL = 1 << 8,
kX64EmitAVX512BW = 1 << 10,
kX64EmitAVX512DQ = 1 << 11,
kX64EmitAVX512BW = 1 << 9,
kX64EmitAVX512DQ = 1 << 10,
kX64EmitAVX512Ortho = kX64EmitAVX512F | kX64EmitAVX512VL,
kX64EmitAVX512Ortho64 = kX64EmitAVX512Ortho | kX64EmitAVX512DQ

View File

@ -422,8 +422,7 @@ bool RunTests(const std::string_view test_name) {
int failed_count = 0;
int passed_count = 0;
XELOGI("Haswell instruction usage {}.",
cvars::use_haswell_instructions ? "enabled" : "disabled");
XELOGI("Instruction feature mask {}.", cvars::x64_extension_mask);
auto test_path_root = cvars::test_path;
std::vector<std::filesystem::path> test_files;