SPU LLVM: improve constant propagation

Propagate constants in non-volatile registers between chunks
Disable function table in Mega mode
This commit is contained in:
Nekotekina 2018-07-22 19:09:25 +03:00
parent a424fcfcf7
commit d3ad44aec4
1 changed files with 75 additions and 25 deletions

View File

@ -1553,9 +1553,28 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
std::array<llvm::StoreInst*, s_reg_max> store{};
};
struct chunk_info
{
// Callable function
llvm::Function* func;
// Constants in non-volatile registers at the entry point
std::array<llvm::Value*, s_reg_max> reg{};
chunk_info() = default;
chunk_info(llvm::Function* func)
: func(func)
{
}
};
// Current block
block_info* m_block;
// Current chunk
chunk_info* m_finfo;
// All blocks in the current function chunk
std::unordered_map<u32, block_info, value_hash<u32, 2>> m_blocks;
@ -1563,7 +1582,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
std::vector<u32> m_block_queue;
// All function chunks in current SPU compile unit
std::unordered_map<u32, llvm::Function*, value_hash<u32, 2>> m_functions;
std::unordered_map<u32, chunk_info, value_hash<u32, 2>> m_functions;
// Function chunk list for processing
std::vector<u32> m_function_queue;
@ -1584,9 +1603,28 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
result->addAttribute(2, llvm::Attribute::NoAlias);
// Enqueue if necessary
if (m_functions.emplace(addr, result).second)
const auto empl = m_functions.emplace(addr, chunk_info{result});
if (empl.second)
{
m_function_queue.push_back(addr);
if (m_block && g_cfg.core.spu_block_size != spu_block_size_type::safe)
{
// Initialize constants for non-volatile registers (TODO)
auto& regs = empl.first->second.reg;
for (u32 i = 80; i <= 127; i++)
{
if (auto c = llvm::dyn_cast_or_null<llvm::Constant>(m_block->reg[i]))
{
if (!(find_reg_origin(addr, i, false) >> 31))
{
regs[i] = c;
}
}
}
}
}
return result;
@ -1600,6 +1638,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
m_reg_addr.fill(nullptr);
m_block = nullptr;
m_finfo = nullptr;
m_blocks.clear();
m_block_queue.clear();
m_ir->SetInsertPoint(llvm::BasicBlock::Create(m_context, "", m_function));
@ -1769,12 +1808,12 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
}
// Return either basic block addr with single dominating value, or negative number of PHI entries
u32 find_reg_origin(u32 addr, u32 index)
u32 find_reg_origin(u32 addr, u32 index, bool chunk_only = true)
{
u32 result = -1;
// Handle entry point specially
if (m_entry_info[addr / 4])
if (chunk_only && m_entry_info[addr / 4])
{
result = addr;
}
@ -1791,10 +1830,12 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
{
for (u32 pred : pfound->second)
{
if (m_entry_map[pred / 4] == root)
if (chunk_only && m_entry_map[pred / 4] != root)
{
m_scan_queue.push_back(pred);
continue;
}
m_scan_queue.push_back(pred);
}
}
@ -1831,7 +1872,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
// Enqueue predecessors if register is not modified there
for (u32 pred : pfound->second)
{
if (m_entry_map[pred / 4] != root)
if (chunk_only && m_entry_map[pred / 4] != root)
{
continue;
}
@ -1847,7 +1888,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
break;
}
if (regmod || m_entry_info[addr / 4])
if (regmod || (chunk_only && m_entry_info[addr / 4]))
{
if (result == -1)
{
@ -2200,7 +2241,8 @@ public:
{
// Initialize function info
m_entry = m_function_queue[fi];
set_function(m_functions[m_entry]);
set_function(m_functions[m_entry].func);
m_finfo = &m_functions[m_entry];
m_ir->CreateBr(add_block(m_entry));
// Emit instructions for basic blocks
@ -2251,9 +2293,10 @@ public:
if (!value)
{
// Value hasn't been loaded yet
value = m_ir->CreateLoad(regptr);
value = m_finfo->reg[i] ? m_finfo->reg[i] : m_ir->CreateLoad(regptr);
}
else if (i < 128 && llvm::isa<llvm::Constant>(value))
if (i < 128 && llvm::isa<llvm::Constant>(value))
{
// Bitcast the constant
value = make_const_vector(get_const_vector(llvm::cast<llvm::Constant>(value), baddr, i), _phi->getType());
@ -2279,7 +2322,7 @@ public:
const auto regptr = init_vr(i);
const auto cblock = m_ir->GetInsertBlock();
m_ir->SetInsertPoint(m_function->getEntryBlock().getTerminator());
const auto value = m_ir->CreateLoad(regptr);
const auto value = m_finfo->reg[i] ? m_finfo->reg[i] : m_ir->CreateLoad(regptr);
m_ir->SetInsertPoint(cblock);
_phi->addIncoming(value, &m_function->getEntryBlock());
}
@ -2295,6 +2338,11 @@ public:
m_block->reg[i] = bfound->second.reg[i];
}
}
else if (baddr == m_entry)
{
// Passthrough constant from a different chunk
m_block->reg[i] = m_finfo->reg[i];
}
}
// Emit state check if necessary (TODO: more conditions)
@ -2380,10 +2428,19 @@ public:
}
chunks.push_back(null);
continue;
}
else
chunks.push_back(found->second.func);
// If a chunk has incoming constants, we can't add it to the function table (TODO)
for (const auto c : found->second.reg)
{
chunks.push_back(found->second);
if (c != nullptr)
{
chunks.back() = null;
break;
}
}
}
@ -2406,7 +2463,7 @@ public:
for (const auto& func : m_functions)
{
pm.run(*func.second);
pm.run(*func.second.func);
}
// Clear context (TODO)
@ -4498,18 +4555,11 @@ public:
llvm::Value* ptr = m_ir->CreateGEP(disp, m_ir->CreateLShr(ad64, 2, "", true));
if (g_cfg.core.spu_block_size != spu_block_size_type::safe)
if (g_cfg.core.spu_block_size == spu_block_size_type::giga)
{
// Try to load chunk address from the function table
llvm::Value* index = ad64;
if (g_cfg.core.spu_block_size != spu_block_size_type::giga)
{
index = m_ir->CreateSub(ad64, m_ir->getInt64(m_function_queue[0]));
}
const auto use_ftable = m_ir->CreateICmpULT(index, m_ir->getInt64(m_size));
ptr = m_ir->CreateSelect(use_ftable, m_ir->CreateGEP(m_function_table, {m_ir->getInt64(0), m_ir->CreateLShr(index, 2, "", true)}), ptr);
const auto use_ftable = m_ir->CreateICmpULT(ad64, m_ir->getInt64(m_size));
ptr = m_ir->CreateSelect(use_ftable, m_ir->CreateGEP(m_function_table, {m_ir->getInt64(0), m_ir->CreateLShr(ad64, 2, "", true)}), ptr);
}
tail(m_ir->CreateLoad(ptr));