diff --git a/src/xenia/cpu/backend/machine_info.h b/src/xenia/cpu/backend/machine_info.h index 7734b2a48..ebfe2abca 100644 --- a/src/xenia/cpu/backend/machine_info.h +++ b/src/xenia/cpu/backend/machine_info.h @@ -17,6 +17,8 @@ namespace cpu { namespace backend { struct MachineInfo { + bool supports_extended_load_store; + struct RegisterSet { enum Types { INT_TYPES = (1 << 1), diff --git a/src/xenia/cpu/backend/x64/x64_backend.cc b/src/xenia/cpu/backend/x64/x64_backend.cc index e02d0a0bb..d77ff60aa 100644 --- a/src/xenia/cpu/backend/x64/x64_backend.cc +++ b/src/xenia/cpu/backend/x64/x64_backend.cc @@ -14,6 +14,11 @@ #include "xenia/cpu/backend/x64/x64_sequences.h" #include "xenia/cpu/backend/x64/x64_thunk_emitter.h" #include "xenia/cpu/processor.h" +#include "third_party/xbyak/xbyak/xbyak_util.h" + +DEFINE_bool( + enable_haswell_instructions, true, + "Uses the AVX2/FMA/etc instructions on Haswell processors, if available."); namespace xe { namespace cpu { @@ -38,6 +43,15 @@ bool X64Backend::Initialize() { RegisterSequences(); + // Need movbe to do advanced LOAD/STORE tricks. + if (FLAGS_enable_haswell_instructions) { + Xbyak::util::Cpu cpu; + machine_info_.supports_extended_load_store = + cpu.has(Xbyak::util::Cpu::tMOVBE); + } else { + machine_info_.supports_extended_load_store = false; + } + machine_info_.register_sets[0] = { 0, "gpr", MachineInfo::RegisterSet::INT_TYPES, X64Emitter::GPR_COUNT, }; diff --git a/src/xenia/cpu/backend/x64/x64_backend.h b/src/xenia/cpu/backend/x64/x64_backend.h index 6c85d834e..0bbd5c01b 100644 --- a/src/xenia/cpu/backend/x64/x64_backend.h +++ b/src/xenia/cpu/backend/x64/x64_backend.h @@ -10,8 +10,12 @@ #ifndef XENIA_BACKEND_X64_X64_BACKEND_H_ #define XENIA_BACKEND_X64_X64_BACKEND_H_ +#include + #include "xenia/cpu/backend/backend.h" +DECLARE_bool(enable_haswell_instructions); + namespace xe { namespace cpu { namespace backend { diff --git a/src/xenia/cpu/backend/x64/x64_emitter.cc b/src/xenia/cpu/backend/x64/x64_emitter.cc index 247a1fb9c..d923806bd 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.cc +++ b/src/xenia/cpu/backend/x64/x64_emitter.cc @@ -31,10 +31,6 @@ #include "xenia/cpu/thread_state.h" #include "xenia/profiling.h" -DEFINE_bool( - enable_haswell_instructions, true, - "Uses the AVX2/FMA/etc instructions on Haswell processors, if available."); - DEFINE_bool(enable_debugprint_log, false, "Log debugprint traps to the active debugger"); @@ -87,10 +83,10 @@ X64Emitter::X64Emitter(X64Backend* backend, XbyakAllocator* allocator) feature_flags_ |= cpu_.has(Xbyak::util::Cpu::tLZCNT) ? kX64EmitLZCNT : 0; feature_flags_ |= cpu_.has(Xbyak::util::Cpu::tBMI2) ? kX64EmitBMI2 : 0; feature_flags_ |= cpu_.has(Xbyak::util::Cpu::tF16C) ? kX64EmitF16C : 0; + feature_flags_ |= cpu_.has(Xbyak::util::Cpu::tMOVBE) ? kX64EmitMovbe : 0; } - if (!cpu_.has(Xbyak::util::Cpu::tAVX) || - !cpu_.has(Xbyak::util::Cpu::tMOVBE)) { + if (!cpu_.has(Xbyak::util::Cpu::tAVX)) { XEFATAL( "Your CPU is too old to support Xenia. See the FAQ for system " "requirements at http://xenia.jp"); diff --git a/src/xenia/cpu/backend/x64/x64_emitter.h b/src/xenia/cpu/backend/x64/x64_emitter.h index 9038e6ea1..c5e895472 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.h +++ b/src/xenia/cpu/backend/x64/x64_emitter.h @@ -104,6 +104,7 @@ enum X64EmitterFeatureFlags { kX64EmitLZCNT = 1 << 3, kX64EmitBMI2 = 1 << 4, kX64EmitF16C = 1 << 5, + kX64EmitMovbe = 1 << 6, }; class X64Emitter : public Xbyak::CodeGenerator { diff --git a/src/xenia/cpu/backend/x64/x64_sequences.cc b/src/xenia/cpu/backend/x64/x64_sequences.cc index b8ab2456d..b2cc4a7de 100644 --- a/src/xenia/cpu/backend/x64/x64_sequences.cc +++ b/src/xenia/cpu/backend/x64/x64_sequences.cc @@ -1524,7 +1524,12 @@ EMITTER(LOAD_I16, MATCH(I, I64<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { auto addr = ComputeMemoryAddress(e, i.src1); if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) { - e.movbe(i.dest, e.word[addr]); + if (e.IsFeatureEnabled(kX64EmitMovbe)) { + e.movbe(i.dest, e.word[addr]); + } else { + e.mov(i.dest, e.word[addr]); + e.ror(i.dest, 8); + } } else { e.mov(i.dest, e.word[addr]); } @@ -1539,7 +1544,12 @@ EMITTER(LOAD_I32, MATCH(I, I64<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { auto addr = ComputeMemoryAddress(e, i.src1); if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) { - e.movbe(i.dest, e.dword[addr]); + if (e.IsFeatureEnabled(kX64EmitMovbe)) { + e.movbe(i.dest, e.dword[addr]); + } else { + e.mov(i.dest, e.dword[addr]); + e.bswap(i.dest); + } } else { e.mov(i.dest, e.dword[addr]); } @@ -1554,7 +1564,12 @@ EMITTER(LOAD_I64, MATCH(I, I64<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { auto addr = ComputeMemoryAddress(e, i.src1); if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) { - e.movbe(i.dest, e.qword[addr]); + if (e.IsFeatureEnabled(kX64EmitMovbe)) { + e.movbe(i.dest, e.qword[addr]); + } else { + e.mov(i.dest, e.qword[addr]); + e.bswap(i.dest); + } } else { e.mov(i.dest, e.qword[addr]); } @@ -1645,7 +1660,11 @@ EMITTER(STORE_I16, MATCH(I, I16<>>)) { auto addr = ComputeMemoryAddress(e, i.src1); if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) { assert_false(i.src2.is_constant); - e.movbe(e.word[addr], i.src2); + if (e.IsFeatureEnabled(kX64EmitMovbe)) { + e.movbe(e.word[addr], i.src2); + } else { + assert_always("not implemented"); + } } else { if (i.src2.is_constant) { e.mov(e.word[addr], i.src2.constant()); @@ -1666,7 +1685,11 @@ EMITTER(STORE_I32, MATCH(I, I32<>>)) { auto addr = ComputeMemoryAddress(e, i.src1); if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) { assert_false(i.src2.is_constant); - e.movbe(e.dword[addr], i.src2); + if (e.IsFeatureEnabled(kX64EmitMovbe)) { + e.movbe(e.dword[addr], i.src2); + } else { + assert_always("not implemented"); + } } else { if (i.src2.is_constant) { e.mov(e.dword[addr], i.src2.constant()); @@ -1687,7 +1710,11 @@ EMITTER(STORE_I64, MATCH(I, I64<>>)) { auto addr = ComputeMemoryAddress(e, i.src1); if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) { assert_false(i.src2.is_constant); - e.movbe(e.qword[addr], i.src2); + if (e.IsFeatureEnabled(kX64EmitMovbe)) { + e.movbe(e.qword[addr], i.src2); + } else { + assert_always("not implemented"); + } } else { if (i.src2.is_constant) { e.MovMem64(addr, i.src2.constant()); diff --git a/src/xenia/cpu/frontend/ppc_translator.cc b/src/xenia/cpu/frontend/ppc_translator.cc index e962839ef..2e51280ee 100644 --- a/src/xenia/cpu/frontend/ppc_translator.cc +++ b/src/xenia/cpu/frontend/ppc_translator.cc @@ -60,8 +60,12 @@ PPCTranslator::PPCTranslator(PPCFrontend* frontend) : frontend_(frontend) { if (validate) compiler_->AddPass(std::make_unique()); compiler_->AddPass(std::make_unique()); if (validate) compiler_->AddPass(std::make_unique()); - compiler_->AddPass(std::make_unique()); - if (validate) compiler_->AddPass(std::make_unique()); + if (backend->machine_info()->supports_extended_load_store) { + // Backend supports the advanced LOAD/STORE instructions. + // These will save us a lot of HIR opcodes. + compiler_->AddPass(std::make_unique()); + if (validate) compiler_->AddPass(std::make_unique()); + } compiler_->AddPass(std::make_unique()); if (validate) compiler_->AddPass(std::make_unique()); // compiler_->AddPass(std::make_unique());