From 2a6ada2a3cbca4bef6b26306d9d118052875d545 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Mon, 15 Jun 2015 18:59:17 -0700 Subject: [PATCH] Optimization to merge LOAD+SWAP and SWAP+STORE into flagged opcodes. --- libxenia.vcxproj | 2 + libxenia.vcxproj.filters | 6 + src/xenia/cpu/compiler/compiler_passes.h | 1 + .../memory_sequence_combination_pass.cc | 146 ++++++++++++++++++ .../passes/memory_sequence_combination_pass.h | 38 +++++ src/xenia/cpu/frontend/ppc_translator.cc | 2 + 6 files changed, 195 insertions(+) create mode 100644 src/xenia/cpu/compiler/passes/memory_sequence_combination_pass.cc create mode 100644 src/xenia/cpu/compiler/passes/memory_sequence_combination_pass.h diff --git a/libxenia.vcxproj b/libxenia.vcxproj index c7a93d683..9bfcc742a 100644 --- a/libxenia.vcxproj +++ b/libxenia.vcxproj @@ -56,6 +56,7 @@ + @@ -317,6 +318,7 @@ + diff --git a/libxenia.vcxproj.filters b/libxenia.vcxproj.filters index 851054935..1c4a2eac9 100644 --- a/libxenia.vcxproj.filters +++ b/libxenia.vcxproj.filters @@ -772,6 +772,9 @@ src\xenia\gpu + + src\xenia\cpu\compiler\passes + @@ -1491,6 +1494,9 @@ third_party\capstone + + src\xenia\cpu\compiler\passes + diff --git a/src/xenia/cpu/compiler/compiler_passes.h b/src/xenia/cpu/compiler/compiler_passes.h index dfd714bf7..90196c7a4 100644 --- a/src/xenia/cpu/compiler/compiler_passes.h +++ b/src/xenia/cpu/compiler/compiler_passes.h @@ -18,6 +18,7 @@ #include "xenia/cpu/compiler/passes/dead_code_elimination_pass.h" //#include "xenia/cpu/compiler/passes/dead_store_elimination_pass.h" #include "xenia/cpu/compiler/passes/finalization_pass.h" +#include "xenia/cpu/compiler/passes/memory_sequence_combination_pass.h" #include "xenia/cpu/compiler/passes/register_allocation_pass.h" #include "xenia/cpu/compiler/passes/simplification_pass.h" #include "xenia/cpu/compiler/passes/validation_pass.h" diff --git a/src/xenia/cpu/compiler/passes/memory_sequence_combination_pass.cc b/src/xenia/cpu/compiler/passes/memory_sequence_combination_pass.cc new file mode 100644 index 000000000..11ba9561a --- /dev/null +++ b/src/xenia/cpu/compiler/passes/memory_sequence_combination_pass.cc @@ -0,0 +1,146 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2015 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/cpu/compiler/passes/memory_sequence_combination_pass.h" + +#include "xenia/profiling.h" + +namespace xe { +namespace cpu { +namespace compiler { +namespace passes { + +// TODO(benvanik): remove when enums redefined. +using namespace xe::cpu::hir; + +using xe::cpu::hir::HIRBuilder; +using xe::cpu::hir::Instr; +using xe::cpu::hir::Value; + +MemorySequenceCombinationPass::MemorySequenceCombinationPass() + : CompilerPass() {} + +MemorySequenceCombinationPass::~MemorySequenceCombinationPass() = default; + +bool MemorySequenceCombinationPass::Run(HIRBuilder* builder) { + // Run over all loads and stores and see if we can collapse sequences into the + // fat opcodes. See the respective utility functions for examples. + auto block = builder->first_block(); + while (block) { + auto i = block->instr_head; + while (i) { + if (i->opcode == &OPCODE_LOAD_info) { + CombineLoadSequence(i); + } else if (i->opcode == &OPCODE_STORE_info) { + CombineStoreSequence(i); + } + i = i->next; + } + block = block->next; + } + return true; +} + +void MemorySequenceCombinationPass::CombineLoadSequence(Instr* i) { + // Load with swap: + // v1.i32 = load v0 + // v2.i32 = byte_swap v1.i32 + // becomes: + // v1.i32 = load v0, [swap] + // + // Load with swap and extend: + // v1.i32 = load v0 + // v2.i32 = byte_swap v1.i32 + // v3.i64 = zero_extend v2.i32 + // becomes: + // v1.i64 = load_convert v0, [swap|i32->i64,zero] + + if (!i->dest->use_head) { + // No uses of the load result - ignore. Will be killed by DCE. + return; + } + + // Ensure all uses of the load result are BYTE_SWAP - if it's mixed we + // shouldn't transform as we'd have to introduce new swaps! + auto use = i->dest->use_head; + while (use) { + if (use->instr->opcode != &OPCODE_BYTE_SWAP_info) { + // Not a swap. + return; + } + // TODO(benvanik): allow uses by STORE (we can make that swap). + use = use->next; + } + + // Merge byte swap into load. + // Note that we may have already been a swapped operation - this inverts that. + i->flags ^= LoadStoreFlags::LOAD_STORE_BYTE_SWAP; + + // Replace use of byte swap value with loaded value. + // It's byte_swap vN -> assign vN, so not much to do. + use = i->dest->use_head; + while (use) { + auto next_use = use->next; + use->instr->opcode = &OPCODE_ASSIGN_info; + use->instr->flags = 0; + use = next_use; + } + + // TODO(benvanik): merge in extend/truncate. +} + +void MemorySequenceCombinationPass::CombineStoreSequence(Instr* i) { + // Store with swap: + // v1.i32 = ... + // v2.i32 = byte_swap v1.i32 + // store v0, v2.i32 + // becomes: + // store v0, v1.i32, [swap] + // + // Store with truncate and swap: + // v1.i64 = ... + // v2.i32 = truncate v1.i64 + // v3.i32 = byte_swap v2.i32 + // store v0, v3.i32 + // becomes: + // store_convert v0, v1.i64, [swap|i64->i32,trunc] + + auto src = i->src2.value; + if (src->IsConstant()) { + // Constant value write - ignore. + return; + } + + // Find source and ensure it is a byte swap. + auto def = src->def; + while (def && def->opcode == &OPCODE_ASSIGN_info) { + // Skip asignments. + def = def->src1.value->def; + } + if (!def || def->opcode != &OPCODE_BYTE_SWAP_info) { + // Not a swap/not defined? + return; + } + + // Merge byte swap into store. + // Note that we may have already been a swapped operation - this inverts + // that. + i->flags ^= LoadStoreFlags::LOAD_STORE_BYTE_SWAP; + + // Pull the original value (from before the byte swap). + // The byte swap itself will go away in DCE. + i->set_src2(def->src1.value); + + // TODO(benvanik): extend/truncate. +} + +} // namespace passes +} // namespace compiler +} // namespace cpu +} // namespace xe diff --git a/src/xenia/cpu/compiler/passes/memory_sequence_combination_pass.h b/src/xenia/cpu/compiler/passes/memory_sequence_combination_pass.h new file mode 100644 index 000000000..9d1f47495 --- /dev/null +++ b/src/xenia/cpu/compiler/passes/memory_sequence_combination_pass.h @@ -0,0 +1,38 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2015 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_COMPILER_PASSES_MEMORY_SEQUENCE_COMBINATION_PASS_H_ +#define XENIA_COMPILER_PASSES_MEMORY_SEQUENCE_COMBINATION_PASS_H_ + +#include "xenia/cpu/compiler/compiler_pass.h" + +namespace xe { +namespace cpu { +namespace compiler { +namespace passes { + +class MemorySequenceCombinationPass : public CompilerPass { + public: + MemorySequenceCombinationPass(); + ~MemorySequenceCombinationPass() override; + + bool Run(hir::HIRBuilder* builder) override; + + private: + void CombineMemorySequences(hir::HIRBuilder* builder); + void CombineLoadSequence(hir::Instr* i); + void CombineStoreSequence(hir::Instr* i); +}; + +} // namespace passes +} // namespace compiler +} // namespace cpu +} // namespace xe + +#endif // XENIA_COMPILER_PASSES_MEMORY_SEQUENCE_COMBINATION_PASS_H_ diff --git a/src/xenia/cpu/frontend/ppc_translator.cc b/src/xenia/cpu/frontend/ppc_translator.cc index a606e8227..e962839ef 100644 --- a/src/xenia/cpu/frontend/ppc_translator.cc +++ b/src/xenia/cpu/frontend/ppc_translator.cc @@ -60,6 +60,8 @@ PPCTranslator::PPCTranslator(PPCFrontend* frontend) : frontend_(frontend) { if (validate) compiler_->AddPass(std::make_unique()); compiler_->AddPass(std::make_unique()); if (validate) compiler_->AddPass(std::make_unique()); + compiler_->AddPass(std::make_unique()); + if (validate) compiler_->AddPass(std::make_unique()); compiler_->AddPass(std::make_unique()); if (validate) compiler_->AddPass(std::make_unique()); // compiler_->AddPass(std::make_unique());