Optimization to merge LOAD+SWAP and SWAP+STORE into flagged opcodes.
This commit is contained in:
parent
ddaf08ca8d
commit
2a6ada2a3c
|
@ -56,6 +56,7 @@
|
|||
<ClCompile Include="src\xenia\cpu\compiler\passes\data_flow_analysis_pass.cc" />
|
||||
<ClCompile Include="src\xenia\cpu\compiler\passes\dead_code_elimination_pass.cc" />
|
||||
<ClCompile Include="src\xenia\cpu\compiler\passes\finalization_pass.cc" />
|
||||
<ClCompile Include="src\xenia\cpu\compiler\passes\memory_sequence_combination_pass.cc" />
|
||||
<ClCompile Include="src\xenia\cpu\compiler\passes\register_allocation_pass.cc" />
|
||||
<ClCompile Include="src\xenia\cpu\compiler\passes\simplification_pass.cc" />
|
||||
<ClCompile Include="src\xenia\cpu\compiler\passes\validation_pass.cc" />
|
||||
|
@ -317,6 +318,7 @@
|
|||
<ClInclude Include="src\xenia\cpu\compiler\passes\data_flow_analysis_pass.h" />
|
||||
<ClInclude Include="src\xenia\cpu\compiler\passes\dead_code_elimination_pass.h" />
|
||||
<ClInclude Include="src\xenia\cpu\compiler\passes\finalization_pass.h" />
|
||||
<ClInclude Include="src\xenia\cpu\compiler\passes\memory_sequence_combination_pass.h" />
|
||||
<ClInclude Include="src\xenia\cpu\compiler\passes\register_allocation_pass.h" />
|
||||
<ClInclude Include="src\xenia\cpu\compiler\passes\simplification_pass.h" />
|
||||
<ClInclude Include="src\xenia\cpu\compiler\passes\validation_pass.h" />
|
||||
|
|
|
@ -772,6 +772,9 @@
|
|||
<ClCompile Include="src\xenia\gpu\tracing.cc">
|
||||
<Filter>src\xenia\gpu</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="src\xenia\cpu\compiler\passes\memory_sequence_combination_pass.cc">
|
||||
<Filter>src\xenia\cpu\compiler\passes</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="src\xenia\emulator.h">
|
||||
|
@ -1491,6 +1494,9 @@
|
|||
<ClInclude Include="third_party\capstone\utils.h">
|
||||
<Filter>third_party\capstone</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="src\xenia\cpu\compiler\passes\memory_sequence_combination_pass.h">
|
||||
<Filter>src\xenia\cpu\compiler\passes</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="src\xenia\cpu\backend\x64\x64_sequence.inl">
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "xenia/cpu/compiler/passes/dead_code_elimination_pass.h"
|
||||
//#include "xenia/cpu/compiler/passes/dead_store_elimination_pass.h"
|
||||
#include "xenia/cpu/compiler/passes/finalization_pass.h"
|
||||
#include "xenia/cpu/compiler/passes/memory_sequence_combination_pass.h"
|
||||
#include "xenia/cpu/compiler/passes/register_allocation_pass.h"
|
||||
#include "xenia/cpu/compiler/passes/simplification_pass.h"
|
||||
#include "xenia/cpu/compiler/passes/validation_pass.h"
|
||||
|
|
|
@ -0,0 +1,146 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2015 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/cpu/compiler/passes/memory_sequence_combination_pass.h"
|
||||
|
||||
#include "xenia/profiling.h"
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace compiler {
|
||||
namespace passes {
|
||||
|
||||
// TODO(benvanik): remove when enums redefined.
|
||||
using namespace xe::cpu::hir;
|
||||
|
||||
using xe::cpu::hir::HIRBuilder;
|
||||
using xe::cpu::hir::Instr;
|
||||
using xe::cpu::hir::Value;
|
||||
|
||||
MemorySequenceCombinationPass::MemorySequenceCombinationPass()
|
||||
: CompilerPass() {}
|
||||
|
||||
MemorySequenceCombinationPass::~MemorySequenceCombinationPass() = default;
|
||||
|
||||
bool MemorySequenceCombinationPass::Run(HIRBuilder* builder) {
|
||||
// Run over all loads and stores and see if we can collapse sequences into the
|
||||
// fat opcodes. See the respective utility functions for examples.
|
||||
auto block = builder->first_block();
|
||||
while (block) {
|
||||
auto i = block->instr_head;
|
||||
while (i) {
|
||||
if (i->opcode == &OPCODE_LOAD_info) {
|
||||
CombineLoadSequence(i);
|
||||
} else if (i->opcode == &OPCODE_STORE_info) {
|
||||
CombineStoreSequence(i);
|
||||
}
|
||||
i = i->next;
|
||||
}
|
||||
block = block->next;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void MemorySequenceCombinationPass::CombineLoadSequence(Instr* i) {
|
||||
// Load with swap:
|
||||
// v1.i32 = load v0
|
||||
// v2.i32 = byte_swap v1.i32
|
||||
// becomes:
|
||||
// v1.i32 = load v0, [swap]
|
||||
//
|
||||
// Load with swap and extend:
|
||||
// v1.i32 = load v0
|
||||
// v2.i32 = byte_swap v1.i32
|
||||
// v3.i64 = zero_extend v2.i32
|
||||
// becomes:
|
||||
// v1.i64 = load_convert v0, [swap|i32->i64,zero]
|
||||
|
||||
if (!i->dest->use_head) {
|
||||
// No uses of the load result - ignore. Will be killed by DCE.
|
||||
return;
|
||||
}
|
||||
|
||||
// Ensure all uses of the load result are BYTE_SWAP - if it's mixed we
|
||||
// shouldn't transform as we'd have to introduce new swaps!
|
||||
auto use = i->dest->use_head;
|
||||
while (use) {
|
||||
if (use->instr->opcode != &OPCODE_BYTE_SWAP_info) {
|
||||
// Not a swap.
|
||||
return;
|
||||
}
|
||||
// TODO(benvanik): allow uses by STORE (we can make that swap).
|
||||
use = use->next;
|
||||
}
|
||||
|
||||
// Merge byte swap into load.
|
||||
// Note that we may have already been a swapped operation - this inverts that.
|
||||
i->flags ^= LoadStoreFlags::LOAD_STORE_BYTE_SWAP;
|
||||
|
||||
// Replace use of byte swap value with loaded value.
|
||||
// It's byte_swap vN -> assign vN, so not much to do.
|
||||
use = i->dest->use_head;
|
||||
while (use) {
|
||||
auto next_use = use->next;
|
||||
use->instr->opcode = &OPCODE_ASSIGN_info;
|
||||
use->instr->flags = 0;
|
||||
use = next_use;
|
||||
}
|
||||
|
||||
// TODO(benvanik): merge in extend/truncate.
|
||||
}
|
||||
|
||||
void MemorySequenceCombinationPass::CombineStoreSequence(Instr* i) {
|
||||
// Store with swap:
|
||||
// v1.i32 = ...
|
||||
// v2.i32 = byte_swap v1.i32
|
||||
// store v0, v2.i32
|
||||
// becomes:
|
||||
// store v0, v1.i32, [swap]
|
||||
//
|
||||
// Store with truncate and swap:
|
||||
// v1.i64 = ...
|
||||
// v2.i32 = truncate v1.i64
|
||||
// v3.i32 = byte_swap v2.i32
|
||||
// store v0, v3.i32
|
||||
// becomes:
|
||||
// store_convert v0, v1.i64, [swap|i64->i32,trunc]
|
||||
|
||||
auto src = i->src2.value;
|
||||
if (src->IsConstant()) {
|
||||
// Constant value write - ignore.
|
||||
return;
|
||||
}
|
||||
|
||||
// Find source and ensure it is a byte swap.
|
||||
auto def = src->def;
|
||||
while (def && def->opcode == &OPCODE_ASSIGN_info) {
|
||||
// Skip asignments.
|
||||
def = def->src1.value->def;
|
||||
}
|
||||
if (!def || def->opcode != &OPCODE_BYTE_SWAP_info) {
|
||||
// Not a swap/not defined?
|
||||
return;
|
||||
}
|
||||
|
||||
// Merge byte swap into store.
|
||||
// Note that we may have already been a swapped operation - this inverts
|
||||
// that.
|
||||
i->flags ^= LoadStoreFlags::LOAD_STORE_BYTE_SWAP;
|
||||
|
||||
// Pull the original value (from before the byte swap).
|
||||
// The byte swap itself will go away in DCE.
|
||||
i->set_src2(def->src1.value);
|
||||
|
||||
// TODO(benvanik): extend/truncate.
|
||||
}
|
||||
|
||||
} // namespace passes
|
||||
} // namespace compiler
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
|
@ -0,0 +1,38 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2015 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_COMPILER_PASSES_MEMORY_SEQUENCE_COMBINATION_PASS_H_
|
||||
#define XENIA_COMPILER_PASSES_MEMORY_SEQUENCE_COMBINATION_PASS_H_
|
||||
|
||||
#include "xenia/cpu/compiler/compiler_pass.h"
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace compiler {
|
||||
namespace passes {
|
||||
|
||||
class MemorySequenceCombinationPass : public CompilerPass {
|
||||
public:
|
||||
MemorySequenceCombinationPass();
|
||||
~MemorySequenceCombinationPass() override;
|
||||
|
||||
bool Run(hir::HIRBuilder* builder) override;
|
||||
|
||||
private:
|
||||
void CombineMemorySequences(hir::HIRBuilder* builder);
|
||||
void CombineLoadSequence(hir::Instr* i);
|
||||
void CombineStoreSequence(hir::Instr* i);
|
||||
};
|
||||
|
||||
} // namespace passes
|
||||
} // namespace compiler
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_COMPILER_PASSES_MEMORY_SEQUENCE_COMBINATION_PASS_H_
|
|
@ -60,6 +60,8 @@ PPCTranslator::PPCTranslator(PPCFrontend* frontend) : frontend_(frontend) {
|
|||
if (validate) compiler_->AddPass(std::make_unique<passes::ValidationPass>());
|
||||
compiler_->AddPass(std::make_unique<passes::ConstantPropagationPass>());
|
||||
if (validate) compiler_->AddPass(std::make_unique<passes::ValidationPass>());
|
||||
compiler_->AddPass(std::make_unique<passes::MemorySequenceCombinationPass>());
|
||||
if (validate) compiler_->AddPass(std::make_unique<passes::ValidationPass>());
|
||||
compiler_->AddPass(std::make_unique<passes::SimplificationPass>());
|
||||
if (validate) compiler_->AddPass(std::make_unique<passes::ValidationPass>());
|
||||
// compiler_->AddPass(std::make_unique<passes::DeadStoreEliminationPass>());
|
||||
|
|
Loading…
Reference in New Issue