PPU LLVM: disable DSE pass and use volatile store/loads

This commit is contained in:
Ivan Chikish 2023-04-13 11:30:53 +03:00 committed by Ivan
parent 79d09d02ed
commit 22bd7dcc42
3 changed files with 14 additions and 10 deletions

View File

@ -21,6 +21,7 @@
#include "llvm/IR/Module.h" #include "llvm/IR/Module.h"
#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetMachine.h"
#include "llvm/Support/KnownBits.h" #include "llvm/Support/KnownBits.h"
#include "llvm/Support/ModRef.h"
#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/IntrinsicsX86.h"
@ -3051,7 +3052,7 @@ public:
} }
template <typename RT, DSLValue... Args> template <typename RT, DSLValue... Args>
auto call(llvm::Function* func, Args&&... args) auto callf(llvm::Function* func, Args&&... args)
{ {
llvm_value_t<RT> r; llvm_value_t<RT> r;
r.value = m_ir->CreateCall(func, {std::forward<Args>(args).eval(m_ir)...}); r.value = m_ir->CreateCall(func, {std::forward<Args>(args).eval(m_ir)...});

View File

@ -3996,7 +3996,7 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co
//pm.add(createLICMPass()); //pm.add(createLICMPass());
//pm.add(createLoopInstSimplifyPass()); //pm.add(createLoopInstSimplifyPass());
//pm.add(createNewGVNPass()); //pm.add(createNewGVNPass());
pm.add(createDeadStoreEliminationPass()); //pm.add(createDeadStoreEliminationPass());
//pm.add(createSCCPPass()); //pm.add(createSCCPPass());
//pm.add(createReassociatePass()); //pm.add(createReassociatePass());
//pm.add(createInstructionCombiningPass()); //pm.add(createInstructionCombiningPass());

View File

@ -372,7 +372,7 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
if (indirect) if (indirect)
{ {
m_ir->CreateStore(Trunc(indirect, GetType<u32>()), m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_cia - m_locals)), true); m_ir->CreateStore(Trunc(indirect, GetType<u32>()), m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_cia - m_locals)));
// Try to optimize // Try to optimize
if (auto inst = dyn_cast_or_null<Instruction>(indirect)) if (auto inst = dyn_cast_or_null<Instruction>(indirect))
@ -605,11 +605,14 @@ Value* PPUTranslator::ReadMemory(Value* addr, Type* type, bool is_be, u32 align)
// Read, byteswap, bitcast // Read, byteswap, bitcast
const auto int_type = m_ir->getIntNTy(size); const auto int_type = m_ir->getIntNTy(size);
const auto value = m_ir->CreateAlignedLoad(int_type, GetMemory(addr), llvm::MaybeAlign{align}); const auto value = m_ir->CreateAlignedLoad(int_type, GetMemory(addr), llvm::MaybeAlign{align});
value->setVolatile(true);
return bitcast(Call(int_type, fmt::format("llvm.bswap.i%u", size), value), type); return bitcast(Call(int_type, fmt::format("llvm.bswap.i%u", size), value), type);
} }
// Read normally // Read normally
return m_ir->CreateAlignedLoad(type, GetMemory(addr), llvm::MaybeAlign{align}); const auto r = m_ir->CreateAlignedLoad(type, GetMemory(addr), llvm::MaybeAlign{align});
r->setVolatile(true);
return r;
} }
void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align) void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align)
@ -625,7 +628,7 @@ void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align
} }
// Write // Write
m_ir->CreateAlignedStore(value, GetMemory(addr), llvm::MaybeAlign{align}); m_ir->CreateAlignedStore(value, GetMemory(addr), llvm::MaybeAlign{align})->setVolatile(true);
} }
void PPUTranslator::CompilationError(const std::string& error) void PPUTranslator::CompilationError(const std::string& error)
@ -1393,22 +1396,22 @@ void PPUTranslator::VREFP(ppu_opcode_t op)
void PPUTranslator::VRFIM(ppu_opcode_t op) void PPUTranslator::VRFIM(ppu_opcode_t op)
{ {
set_vr(op.vd, vec_handle_result(call<f32[4]>(get_intrinsic<f32[4]>(Intrinsic::floor), get_vr<f32[4]>(op.vb)))); set_vr(op.vd, vec_handle_result(callf<f32[4]>(get_intrinsic<f32[4]>(Intrinsic::floor), get_vr<f32[4]>(op.vb))));
} }
void PPUTranslator::VRFIN(ppu_opcode_t op) void PPUTranslator::VRFIN(ppu_opcode_t op)
{ {
set_vr(op.vd, vec_handle_result(call<f32[4]>(get_intrinsic<f32[4]>(Intrinsic::roundeven), get_vr<f32[4]>(op.vb)))); set_vr(op.vd, vec_handle_result(callf<f32[4]>(get_intrinsic<f32[4]>(Intrinsic::roundeven), get_vr<f32[4]>(op.vb))));
} }
void PPUTranslator::VRFIP(ppu_opcode_t op) void PPUTranslator::VRFIP(ppu_opcode_t op)
{ {
set_vr(op.vd, vec_handle_result(call<f32[4]>(get_intrinsic<f32[4]>(Intrinsic::ceil), get_vr<f32[4]>(op.vb)))); set_vr(op.vd, vec_handle_result(callf<f32[4]>(get_intrinsic<f32[4]>(Intrinsic::ceil), get_vr<f32[4]>(op.vb))));
} }
void PPUTranslator::VRFIZ(ppu_opcode_t op) void PPUTranslator::VRFIZ(ppu_opcode_t op)
{ {
set_vr(op.vd, vec_handle_result(call<f32[4]>(get_intrinsic<f32[4]>(Intrinsic::trunc), get_vr<f32[4]>(op.vb)))); set_vr(op.vd, vec_handle_result(callf<f32[4]>(get_intrinsic<f32[4]>(Intrinsic::trunc), get_vr<f32[4]>(op.vb))));
} }
void PPUTranslator::VRLB(ppu_opcode_t op) void PPUTranslator::VRLB(ppu_opcode_t op)
@ -1431,7 +1434,7 @@ void PPUTranslator::VRLW(ppu_opcode_t op)
void PPUTranslator::VRSQRTEFP(ppu_opcode_t op) void PPUTranslator::VRSQRTEFP(ppu_opcode_t op)
{ {
set_vr(op.vd, vec_handle_result(fsplat<f32[4]>(1.0) / call<f32[4]>(get_intrinsic<f32[4]>(Intrinsic::sqrt), get_vr<f32[4]>(op.vb)))); set_vr(op.vd, vec_handle_result(fsplat<f32[4]>(1.0) / callf<f32[4]>(get_intrinsic<f32[4]>(Intrinsic::sqrt), get_vr<f32[4]>(op.vb))));
} }
void PPUTranslator::VSEL(ppu_opcode_t op) void PPUTranslator::VSEL(ppu_opcode_t op)