mirror of https://github.com/RPCS3/rpcs3.git
PPU LLVM: New "stack" strategy
Minor pessimization: all memory accesses are volatile Special handling of prologues and epilogues Minor optimizing assumption for SP
This commit is contained in:
parent
712c04b2ad
commit
5c9f83c3a8
|
@ -585,6 +585,32 @@ extern void ppu_initialize(const std::string& name, const std::vector<ppu_functi
|
||||||
ReplaceInstWithInst(ci, CallInst::Create(f, {ci->getArgOperand(0)}));
|
ReplaceInstWithInst(ci, CallInst::Create(f, {ci->getArgOperand(0)}));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (const auto li = dyn_cast<LoadInst>(inst))
|
||||||
|
{
|
||||||
|
// TODO: more careful check
|
||||||
|
if (li->getNumUses() == 0)
|
||||||
|
{
|
||||||
|
// Remove unreferenced volatile loads
|
||||||
|
li->eraseFromParent();
|
||||||
|
}
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (const auto si = dyn_cast<StoreInst>(inst))
|
||||||
|
{
|
||||||
|
// TODO: more careful check
|
||||||
|
if (isa<UndefValue>(si->getOperand(0)) && si->getParent() == &func->getEntryBlock())
|
||||||
|
{
|
||||||
|
// Remove undef volatile stores
|
||||||
|
si->eraseFromParent();
|
||||||
|
}
|
||||||
|
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -595,6 +621,7 @@ extern void ppu_initialize(const std::string& name, const std::vector<ppu_functi
|
||||||
// Remove unused functions, structs, global variables, etc
|
// Remove unused functions, structs, global variables, etc
|
||||||
mpm.add(createStripDeadPrototypesPass());
|
mpm.add(createStripDeadPrototypesPass());
|
||||||
//mpm.add(createFunctionInliningPass());
|
//mpm.add(createFunctionInliningPass());
|
||||||
|
mpm.add(createDeadInstEliminationPass());
|
||||||
mpm.run(*module);
|
mpm.run(*module);
|
||||||
|
|
||||||
std::string result;
|
std::string result;
|
||||||
|
|
|
@ -116,7 +116,6 @@ Function* PPUTranslator::TranslateToIR(const ppu_function& info, be_t<u32>* bin,
|
||||||
m_start_addr = info.addr;
|
m_start_addr = info.addr;
|
||||||
m_end_addr = info.addr + info.size;
|
m_end_addr = info.addr + info.size;
|
||||||
m_blocks.clear();
|
m_blocks.clear();
|
||||||
m_value_usage.clear();
|
|
||||||
std::fill(std::begin(m_globals), std::end(m_globals), nullptr);
|
std::fill(std::begin(m_globals), std::end(m_globals), nullptr);
|
||||||
std::fill(std::begin(m_locals), std::end(m_locals), nullptr);
|
std::fill(std::begin(m_locals), std::end(m_locals), nullptr);
|
||||||
|
|
||||||
|
@ -129,9 +128,10 @@ Function* PPUTranslator::TranslateToIR(const ppu_function& info, be_t<u32>* bin,
|
||||||
|
|
||||||
// Non-volatile registers with special meaning (TODO)
|
// Non-volatile registers with special meaning (TODO)
|
||||||
if (info.attr & ppu_attr::uses_r0) m_g_gpr[0] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 0, ".r0g");
|
if (info.attr & ppu_attr::uses_r0) m_g_gpr[0] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 0, ".r0g");
|
||||||
m_g_gpr[1] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 1, ".sp");
|
m_g_gpr[1] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 1, ".spg");
|
||||||
m_g_gpr[2] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 2, ".rtoc");
|
m_g_gpr[2] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 2, ".rtoc");
|
||||||
m_g_gpr[13] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 13, ".tls");
|
m_g_gpr[13] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 13, ".tls");
|
||||||
|
m_gpr[1] = m_ir->CreateAlloca(GetType<u64>(), nullptr, ".sp");
|
||||||
|
|
||||||
// Registers used for args or results (TODO)
|
// Registers used for args or results (TODO)
|
||||||
for (u32 i = 3; i <= 10; i++) m_g_gpr[i] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + i, fmt::format(".r%u", i));
|
for (u32 i = 3; i <= 10; i++) m_g_gpr[i] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + i, fmt::format(".r%u", i));
|
||||||
|
@ -139,9 +139,9 @@ Function* PPUTranslator::TranslateToIR(const ppu_function& info, be_t<u32>* bin,
|
||||||
for (u32 i = 2; i <= 13; i++) m_g_vr[i] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 65 + i, fmt::format(".v%u", i));
|
for (u32 i = 2; i <= 13; i++) m_g_vr[i] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 65 + i, fmt::format(".v%u", i));
|
||||||
|
|
||||||
/* Create local variables */
|
/* Create local variables */
|
||||||
for (u32 i = 0; i < 32; i++) m_gpr[i] = m_g_gpr[i] ? m_g_gpr[i] : m_ir->CreateAlloca(GetType<u64>(), nullptr, fmt::format(".r%d", i));
|
for (u32 i = 0; i < 32; i++) if (!m_gpr[i]) m_gpr[i] = m_g_gpr[i] ? m_g_gpr[i] : m_ir->CreateAlloca(GetType<u64>(), nullptr, fmt::format(".r%d", i));
|
||||||
for (u32 i = 0; i < 32; i++) m_fpr[i] = m_g_fpr[i] ? m_g_fpr[i] : m_ir->CreateAlloca(GetType<f64>(), nullptr, fmt::format(".f%d", i));
|
for (u32 i = 0; i < 32; i++) if (!m_fpr[i]) m_fpr[i] = m_g_fpr[i] ? m_g_fpr[i] : m_ir->CreateAlloca(GetType<f64>(), nullptr, fmt::format(".f%d", i));
|
||||||
for (u32 i = 0; i < 32; i++) m_vr[i] = m_g_vr[i] ? m_g_vr[i] : m_ir->Insert(new AllocaInst(GetType<u32[4]>(), nullptr, 16, fmt::format(".v%d", i)));
|
for (u32 i = 0; i < 32; i++) if (!m_vr[i]) m_vr[i] = m_g_vr[i] ? m_g_vr[i] : m_ir->Insert(new AllocaInst(GetType<u32[4]>(), nullptr, 16, fmt::format(".v%d", i)));
|
||||||
|
|
||||||
for (u32 i = 0; i < 32; i++)
|
for (u32 i = 0; i < 32; i++)
|
||||||
{
|
{
|
||||||
|
@ -201,6 +201,7 @@ Function* PPUTranslator::TranslateToIR(const ppu_function& info, be_t<u32>* bin,
|
||||||
//m_fpscr_rnl = m_fpscr[31] = m_ir->CreateAlloca(GetType<bool>(), nullptr, "fpscr.rn.lsb");
|
//m_fpscr_rnl = m_fpscr[31] = m_ir->CreateAlloca(GetType<bool>(), nullptr, "fpscr.rn.lsb");
|
||||||
|
|
||||||
/* Initialize local variables */
|
/* Initialize local variables */
|
||||||
|
m_ir->CreateStore(m_ir->CreateLoad(m_g_gpr[1]), m_gpr[1]); // SP
|
||||||
m_ir->CreateStore(m_ir->getFalse(), m_xer_so); // XER.SO
|
m_ir->CreateStore(m_ir->getFalse(), m_xer_so); // XER.SO
|
||||||
m_ir->CreateStore(m_ir->getFalse(), m_vscr_sat); // VSCR.SAT
|
m_ir->CreateStore(m_ir->getFalse(), m_vscr_sat); // VSCR.SAT
|
||||||
m_ir->CreateStore(m_ir->getTrue(), m_vscr_nj);
|
m_ir->CreateStore(m_ir->getTrue(), m_vscr_nj);
|
||||||
|
@ -275,23 +276,6 @@ Function* PPUTranslator::TranslateToIR(const ppu_function& info, be_t<u32>* bin,
|
||||||
CallFunction(0, true, _ctr);
|
CallFunction(0, true, _ctr);
|
||||||
}
|
}
|
||||||
|
|
||||||
//for (auto i = inst_begin(*m_function), end = inst_end(*m_function); i != end;)
|
|
||||||
//{
|
|
||||||
// const auto inst = &*i++;
|
|
||||||
|
|
||||||
// // Remove unnecessary stores of global variables created by PrepareGlobalArguments() and similar functions
|
|
||||||
// if (const auto si = dyn_cast<StoreInst>(inst))
|
|
||||||
// {
|
|
||||||
// const auto g = dyn_cast<GlobalVariable>(si->getOperand(1));
|
|
||||||
|
|
||||||
// if (g && m_value_usage[g] == 0)
|
|
||||||
// {
|
|
||||||
// si->eraseFromParent();
|
|
||||||
// continue;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//}
|
|
||||||
|
|
||||||
return m_function;
|
return m_function;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -572,12 +556,12 @@ Value* PPUTranslator::ReadMemory(Value* addr, Type* type, bool is_be, u32 align)
|
||||||
{
|
{
|
||||||
// Read, byteswap, bitcast
|
// Read, byteswap, bitcast
|
||||||
const auto int_type = m_ir->getIntNTy(size);
|
const auto int_type = m_ir->getIntNTy(size);
|
||||||
const auto value = m_ir->CreateAlignedLoad(GetMemory(addr, int_type), align, !IsStackAddr(addr));
|
const auto value = m_ir->CreateAlignedLoad(GetMemory(addr, int_type), align, true);
|
||||||
return m_ir->CreateBitCast(Call(int_type, fmt::format("llvm.bswap.i%u", size), value), type);
|
return m_ir->CreateBitCast(Call(int_type, fmt::format("llvm.bswap.i%u", size), value), type);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read normally
|
// Read normally
|
||||||
return m_ir->CreateAlignedLoad(GetMemory(addr, type), align, !IsStackAddr(addr));
|
return m_ir->CreateAlignedLoad(GetMemory(addr, type), align, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align)
|
void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align)
|
||||||
|
@ -593,7 +577,7 @@ void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write
|
// Write
|
||||||
m_ir->CreateAlignedStore(value, GetMemory(addr, value->getType()), align, !IsStackAddr(addr));
|
m_ir->CreateAlignedStore(value, GetMemory(addr, value->getType()), align, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PPUTranslator::CompilationError(const std::string& error)
|
void PPUTranslator::CompilationError(const std::string& error)
|
||||||
|
@ -3942,20 +3926,27 @@ void PPUTranslator::UNK(ppu_opcode_t op)
|
||||||
|
|
||||||
Value* PPUTranslator::GetGpr(u32 r, u32 num_bits)
|
Value* PPUTranslator::GetGpr(u32 r, u32 num_bits)
|
||||||
{
|
{
|
||||||
m_value_usage[m_gpr[r]]++;
|
|
||||||
return m_ir->CreateTrunc(m_ir->CreateLoad(m_gpr[r]), m_ir->getIntNTy(num_bits));
|
return m_ir->CreateTrunc(m_ir->CreateLoad(m_gpr[r]), m_ir->getIntNTy(num_bits));
|
||||||
}
|
}
|
||||||
|
|
||||||
void PPUTranslator::SetGpr(u32 r, Value* value)
|
void PPUTranslator::SetGpr(u32 r, Value* value)
|
||||||
{
|
{
|
||||||
m_ir->CreateStore(m_ir->CreateZExt(value, GetType<u64>()), m_gpr[r]);
|
const auto i64_val = m_ir->CreateZExt(value, GetType<u64>());
|
||||||
m_value_usage[m_gpr[r]]++;
|
|
||||||
|
if (true) // Update local: all regs
|
||||||
|
{
|
||||||
|
m_ir->CreateStore(i64_val, m_gpr[r]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (r == 1) // Update global: SP
|
||||||
|
{
|
||||||
|
m_ir->CreateStore(i64_val, m_g_gpr[r]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Value* PPUTranslator::GetFpr(u32 r, u32 bits, bool as_int)
|
Value* PPUTranslator::GetFpr(u32 r, u32 bits, bool as_int)
|
||||||
{
|
{
|
||||||
const auto value = m_ir->CreateAlignedLoad(m_fpr[r], 8);
|
const auto value = m_ir->CreateAlignedLoad(m_fpr[r], 8);
|
||||||
m_value_usage[m_fpr[r]]++;
|
|
||||||
|
|
||||||
if (!as_int && bits == 64)
|
if (!as_int && bits == 64)
|
||||||
{
|
{
|
||||||
|
@ -3979,13 +3970,11 @@ void PPUTranslator::SetFpr(u32 r, Value* val)
|
||||||
val->getType() == GetType<f32>() ? m_ir->CreateFPExt(val, GetType<f64>()) : val;
|
val->getType() == GetType<f32>() ? m_ir->CreateFPExt(val, GetType<f64>()) : val;
|
||||||
|
|
||||||
m_ir->CreateAlignedStore(f64_val, m_fpr[r], 8);
|
m_ir->CreateAlignedStore(f64_val, m_fpr[r], 8);
|
||||||
m_value_usage[m_fpr[r]]++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Value* PPUTranslator::GetVr(u32 vr, VrType type)
|
Value* PPUTranslator::GetVr(u32 vr, VrType type)
|
||||||
{
|
{
|
||||||
const auto value = m_ir->CreateAlignedLoad(m_vr[vr], 16);
|
const auto value = m_ir->CreateAlignedLoad(m_vr[vr], 16);
|
||||||
m_value_usage[m_vr[vr]]++;
|
|
||||||
|
|
||||||
switch (type)
|
switch (type)
|
||||||
{
|
{
|
||||||
|
@ -4019,7 +4008,6 @@ void PPUTranslator::SetVr(u32 vr, Value* value)
|
||||||
}
|
}
|
||||||
|
|
||||||
m_ir->CreateAlignedStore(m_ir->CreateBitCast(value, GetType<u32[4]>()), m_vr[vr], 16);
|
m_ir->CreateAlignedStore(m_ir->CreateBitCast(value, GetType<u32[4]>()), m_vr[vr], 16);
|
||||||
m_value_usage[m_vr[vr]]++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Value* PPUTranslator::GetCrb(u32 crb)
|
Value* PPUTranslator::GetCrb(u32 crb)
|
||||||
|
@ -4230,31 +4218,4 @@ Value* PPUTranslator::CheckBranchCondition(u32 bo, u32 bi)
|
||||||
return use_ctr ? use_ctr : use_cond;
|
return use_ctr ? use_ctr : use_cond;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool PPUTranslator::IsStackAddr(Value* addr)
|
|
||||||
{
|
|
||||||
// Analyse various binary ops
|
|
||||||
if (const auto bin_op = dyn_cast<BinaryOperator>(addr))
|
|
||||||
{
|
|
||||||
if (bin_op->isBinaryOp(Instruction::Add) || bin_op->isBinaryOp(Instruction::And) || bin_op->isBinaryOp(Instruction::Or) || bin_op->isBinaryOp(Instruction::Xor))
|
|
||||||
{
|
|
||||||
return IsStackAddr(bin_op->getOperand(0)) || IsStackAddr(bin_op->getOperand(1));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bin_op->isBinaryOp(Instruction::Sub))
|
|
||||||
{
|
|
||||||
return IsStackAddr(bin_op->getOperand(0));
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO
|
|
||||||
}
|
|
||||||
|
|
||||||
// Detect load instruction
|
|
||||||
if (const auto load_op = dyn_cast<LoadInst>(addr))
|
|
||||||
{
|
|
||||||
return load_op->getOperand(0) == m_gpr[1];
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -149,9 +149,6 @@ class PPUTranslator final //: public CPUTranslator
|
||||||
|
|
||||||
/* Variables */
|
/* Variables */
|
||||||
|
|
||||||
// Explicit register usage counter
|
|
||||||
std::unordered_map<llvm::Value*, u64> m_value_usage;
|
|
||||||
|
|
||||||
// Memory base
|
// Memory base
|
||||||
llvm::Value* m_base;
|
llvm::Value* m_base;
|
||||||
|
|
||||||
|
@ -386,9 +383,6 @@ public:
|
||||||
// Branch to next instruction if condition failed, never branch on nullptr
|
// Branch to next instruction if condition failed, never branch on nullptr
|
||||||
void UseCondition(llvm::Value* = nullptr);
|
void UseCondition(llvm::Value* = nullptr);
|
||||||
|
|
||||||
// Check whether the address is stack
|
|
||||||
bool IsStackAddr(llvm::Value* addr);
|
|
||||||
|
|
||||||
// Get memory pointer
|
// Get memory pointer
|
||||||
llvm::Value* GetMemory(llvm::Value* addr, llvm::Type* type);
|
llvm::Value* GetMemory(llvm::Value* addr, llvm::Type* type);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue