diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp index 943b276d7f..cd454b299a 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.cpp @@ -33,52 +33,51 @@ u64 Compiler::s_rotate_mask[64][64]; bool Compiler::s_rotate_mask_inited = false; Compiler::Compiler(RecompilationEngine & recompilation_engine, const Executable execute_unknown_function, - const Executable execute_unknown_block, bool (*poll_status_function)(PPUThread * ppu_state)) - : m_recompilation_engine(recompilation_engine) - , m_poll_status_function(poll_status_function) { - InitializeNativeTarget(); - InitializeNativeTargetAsmPrinter(); - InitializeNativeTargetDisassembler(); + const Executable execute_unknown_block, bool(*poll_status_function)(PPUThread * ppu_state)) + : m_recompilation_engine(recompilation_engine) + , m_poll_status_function(poll_status_function) { + InitializeNativeTarget(); + InitializeNativeTargetAsmPrinter(); + InitializeNativeTargetDisassembler(); - m_llvm_context = new LLVMContext(); - m_ir_builder = new IRBuilder<>(*m_llvm_context); + m_llvm_context = new LLVMContext(); + m_ir_builder = new IRBuilder<>(*m_llvm_context); - std::vector arg_types; - arg_types.push_back(m_ir_builder->getInt8PtrTy()); - arg_types.push_back(m_ir_builder->getInt64Ty()); - m_compiled_function_type = FunctionType::get(m_ir_builder->getInt32Ty(), arg_types, false); + std::vector arg_types; + arg_types.push_back(m_ir_builder->getInt8PtrTy()); + arg_types.push_back(m_ir_builder->getInt64Ty()); + m_compiled_function_type = FunctionType::get(m_ir_builder->getInt32Ty(), arg_types, false); - m_executableMap["execute_unknown_function"] = execute_unknown_function; - m_executableMap["execute_unknown_block"] = execute_unknown_block; + m_executableMap["execute_unknown_function"] = execute_unknown_function; + m_executableMap["execute_unknown_block"] = execute_unknown_block; - if (!s_rotate_mask_inited) { - InitRotateMask(); - s_rotate_mask_inited = true; - } + if (!s_rotate_mask_inited) { + InitRotateMask(); + s_rotate_mask_inited = true; + } } Compiler::~Compiler() { - delete m_ir_builder; - delete m_llvm_context; + delete m_ir_builder; + delete m_llvm_context; } - class CustomSectionMemoryManager : public llvm::SectionMemoryManager { private: - std::unordered_map &executableMap; + std::unordered_map &executableMap; public: - CustomSectionMemoryManager(std::unordered_map &map) : - executableMap(map) - {} - ~CustomSectionMemoryManager() override {} + CustomSectionMemoryManager(std::unordered_map &map) : + executableMap(map) + {} + ~CustomSectionMemoryManager() override {} - virtual uint64_t getSymbolAddress(const std::string &Name) - { - std::unordered_map::const_iterator It = executableMap.find(Name); - if (It != executableMap.end()) - return (uint64_t)It->second; - return 0; - } + virtual uint64_t getSymbolAddress(const std::string &Name) + { + std::unordered_map::const_iterator It = executableMap.find(Name); + if (It != executableMap.end()) + return (uint64_t)It->second; + return 0; + } }; //ugly @@ -126,663 +125,665 @@ std::pair Compiler::getFpm } std::pair Compiler::Compile(const std::string & name, const ControlFlowGraph & cfg, bool generate_linkable_exits) { - auto compilation_start = std::chrono::high_resolution_clock::now(); + auto compilation_start = std::chrono::high_resolution_clock::now(); - m_module = new llvm::Module("Module", *m_llvm_context); - m_execute_unknown_function = (Function *)m_module->getOrInsertFunction("execute_unknown_function", m_compiled_function_type); - m_execute_unknown_function->setCallingConv(CallingConv::X86_64_Win64); + m_module = new llvm::Module("Module", *m_llvm_context); + m_execute_unknown_function = (Function *)m_module->getOrInsertFunction("execute_unknown_function", m_compiled_function_type); + m_execute_unknown_function->setCallingConv(CallingConv::X86_64_Win64); - m_execute_unknown_block = (Function *)m_module->getOrInsertFunction("execute_unknown_block", m_compiled_function_type); - m_execute_unknown_block->setCallingConv(CallingConv::X86_64_Win64); + m_execute_unknown_block = (Function *)m_module->getOrInsertFunction("execute_unknown_block", m_compiled_function_type); + m_execute_unknown_block->setCallingConv(CallingConv::X86_64_Win64); - std::string targetTriple = "x86_64-pc-windows-elf"; - m_module->setTargetTriple(targetTriple); + std::string targetTriple = "x86_64-pc-windows-elf"; + m_module->setTargetTriple(targetTriple); - llvm::ExecutionEngine *execution_engine = - EngineBuilder(std::unique_ptr(m_module)) - .setEngineKind(EngineKind::JIT) - .setMCJITMemoryManager(std::unique_ptr(new CustomSectionMemoryManager(m_executableMap))) - .setOptLevel(llvm::CodeGenOpt::Aggressive) - .setMCPU("nehalem") - .create(); - m_module->setDataLayout(execution_engine->getDataLayout()); + llvm::ExecutionEngine *execution_engine = + EngineBuilder(std::unique_ptr(m_module)) + .setEngineKind(EngineKind::JIT) + .setMCJITMemoryManager(std::unique_ptr(new CustomSectionMemoryManager(m_executableMap))) + .setOptLevel(llvm::CodeGenOpt::Aggressive) + .setMCPU("nehalem") + .create(); + m_module->setDataLayout(execution_engine->getDataLayout()); - llvm::FunctionPassManager *fpm = new llvm::FunctionPassManager(m_module); - fpm->add(createNoAAPass()); - fpm->add(createBasicAliasAnalysisPass()); - fpm->add(createNoTargetTransformInfoPass()); - fpm->add(createEarlyCSEPass()); - fpm->add(createTailCallEliminationPass()); - fpm->add(createReassociatePass()); - fpm->add(createInstructionCombiningPass()); - fpm->add(new DominatorTreeWrapperPass()); - fpm->add(new MemoryDependenceAnalysis()); - fpm->add(createGVNPass()); - fpm->add(createInstructionCombiningPass()); - fpm->add(new MemoryDependenceAnalysis()); - fpm->add(createDeadStoreEliminationPass()); - fpm->add(new LoopInfo()); - fpm->add(new ScalarEvolution()); - fpm->add(createSLPVectorizerPass()); - fpm->add(createInstructionCombiningPass()); - fpm->add(createCFGSimplificationPass()); - fpm->doInitialization(); + llvm::FunctionPassManager *fpm = new llvm::FunctionPassManager(m_module); + fpm->add(createNoAAPass()); + fpm->add(createBasicAliasAnalysisPass()); + fpm->add(createNoTargetTransformInfoPass()); + fpm->add(createEarlyCSEPass()); + fpm->add(createTailCallEliminationPass()); + fpm->add(createReassociatePass()); + fpm->add(createInstructionCombiningPass()); + fpm->add(new DominatorTreeWrapperPass()); + fpm->add(new MemoryDependenceAnalysis()); + fpm->add(createGVNPass()); + fpm->add(createInstructionCombiningPass()); + fpm->add(new MemoryDependenceAnalysis()); + fpm->add(createDeadStoreEliminationPass()); + fpm->add(new LoopInfo()); + fpm->add(new ScalarEvolution()); + fpm->add(createSLPVectorizerPass()); + fpm->add(createInstructionCombiningPass()); + fpm->add(createCFGSimplificationPass()); + fpm->doInitialization(); - m_state.cfg = &cfg; - m_state.generate_linkable_exits = generate_linkable_exits; + m_state.cfg = &cfg; + m_state.generate_linkable_exits = generate_linkable_exits; - // Create the function - m_state.function = (Function *)m_module->getOrInsertFunction(name, m_compiled_function_type); - m_state.function->setCallingConv(CallingConv::X86_64_Win64); - auto arg_i = m_state.function->arg_begin(); - arg_i->setName("ppu_state"); - m_state.args[CompileTaskState::Args::State] = arg_i; - (++arg_i)->setName("context"); - m_state.args[CompileTaskState::Args::Context] = arg_i; + // Create the function + m_state.function = (Function *)m_module->getOrInsertFunction(name, m_compiled_function_type); + m_state.function->setCallingConv(CallingConv::X86_64_Win64); + auto arg_i = m_state.function->arg_begin(); + arg_i->setName("ppu_state"); + m_state.args[CompileTaskState::Args::State] = arg_i; + (++arg_i)->setName("context"); + m_state.args[CompileTaskState::Args::Context] = arg_i; - // Create the entry block and add code to branch to the first instruction - m_ir_builder->SetInsertPoint(GetBasicBlockFromAddress(0)); - m_ir_builder->CreateBr(GetBasicBlockFromAddress(cfg.start_address)); + // Create the entry block and add code to branch to the first instruction + m_ir_builder->SetInsertPoint(GetBasicBlockFromAddress(0)); + m_ir_builder->CreateBr(GetBasicBlockFromAddress(cfg.start_address)); - // Convert each instruction in the CFG to LLVM IR - std::vector exit_instr_list; - for (u32 instr_i : cfg.instruction_addresses) { - m_state.hit_branch_instruction = false; - m_state.current_instruction_address = instr_i; - BasicBlock *instr_bb = GetBasicBlockFromAddress(m_state.current_instruction_address); - m_ir_builder->SetInsertPoint(instr_bb); + // Convert each instruction in the CFG to LLVM IR + std::vector exit_instr_list; + for (u32 instr_i : cfg.instruction_addresses) { + m_state.hit_branch_instruction = false; + m_state.current_instruction_address = instr_i; + BasicBlock *instr_bb = GetBasicBlockFromAddress(m_state.current_instruction_address); + m_ir_builder->SetInsertPoint(instr_bb); - if (instr_bb->empty()) { - u32 instr = vm::ps3::read32(m_state.current_instruction_address); - Decode(instr); - if (!m_state.hit_branch_instruction) - m_ir_builder->CreateBr(GetBasicBlockFromAddress(m_state.current_instruction_address + 4)); - } - } + if (instr_bb->empty()) { + u32 instr = vm::ps3::read32(m_state.current_instruction_address); + Decode(instr); + if (!m_state.hit_branch_instruction) + m_ir_builder->CreateBr(GetBasicBlockFromAddress(m_state.current_instruction_address + 4)); + } + } - // Generate exit logic for all empty blocks - const std::string &default_exit_block_name = GetBasicBlockNameFromAddress(0xFFFFFFFF); - for (BasicBlock &block_i : *m_state.function) { - if (!block_i.getInstList().empty() || block_i.getName() == default_exit_block_name) - continue; + // Generate exit logic for all empty blocks + const std::string &default_exit_block_name = GetBasicBlockNameFromAddress(0xFFFFFFFF); + for (BasicBlock &block_i : *m_state.function) { + if (!block_i.getInstList().empty() || block_i.getName() == default_exit_block_name) + continue; - // Found an empty block - m_state.current_instruction_address = GetAddressFromBasicBlockName(block_i.getName()); + // Found an empty block + m_state.current_instruction_address = GetAddressFromBasicBlockName(block_i.getName()); - m_ir_builder->SetInsertPoint(&block_i); - PHINode *exit_instr_i32 = m_ir_builder->CreatePHI(m_ir_builder->getInt32Ty(), 0); - exit_instr_list.push_back(exit_instr_i32); + m_ir_builder->SetInsertPoint(&block_i); + PHINode *exit_instr_i32 = m_ir_builder->CreatePHI(m_ir_builder->getInt32Ty(), 0); + exit_instr_list.push_back(exit_instr_i32); - SetPc(m_ir_builder->getInt32(m_state.current_instruction_address)); + SetPc(m_ir_builder->getInt32(m_state.current_instruction_address)); - if (generate_linkable_exits) { - Value *context_i64 = m_ir_builder->CreateZExt(exit_instr_i32, m_ir_builder->getInt64Ty()); - context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32); - Value *ret_i32 = IndirectCall(m_state.current_instruction_address, context_i64, false); - Value *cmp_i1 = m_ir_builder->CreateICmpNE(ret_i32, m_ir_builder->getInt32(0)); - BasicBlock *then_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "then_0"); - BasicBlock *merge_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "merge_0"); - m_ir_builder->CreateCondBr(cmp_i1, then_bb, merge_bb); + if (generate_linkable_exits) { + Value *context_i64 = m_ir_builder->CreateZExt(exit_instr_i32, m_ir_builder->getInt64Ty()); + context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32); + Value *ret_i32 = IndirectCall(m_state.current_instruction_address, context_i64, false); + Value *cmp_i1 = m_ir_builder->CreateICmpNE(ret_i32, m_ir_builder->getInt32(0)); + BasicBlock *then_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "then_0"); + BasicBlock *merge_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "merge_0"); + m_ir_builder->CreateCondBr(cmp_i1, then_bb, merge_bb); - m_ir_builder->SetInsertPoint(then_bb); - context_i64 = m_ir_builder->CreateZExt(ret_i32, m_ir_builder->getInt64Ty()); - context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32); - m_ir_builder->CreateCall2(m_execute_unknown_block, m_state.args[CompileTaskState::Args::State], context_i64); - m_ir_builder->CreateBr(merge_bb); + m_ir_builder->SetInsertPoint(then_bb); + context_i64 = m_ir_builder->CreateZExt(ret_i32, m_ir_builder->getInt64Ty()); + context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32); + m_ir_builder->CreateCall2(m_execute_unknown_block, m_state.args[CompileTaskState::Args::State], context_i64); + m_ir_builder->CreateBr(merge_bb); - m_ir_builder->SetInsertPoint(merge_bb); - m_ir_builder->CreateRet(m_ir_builder->getInt32(0)); - } else { - m_ir_builder->CreateRet(exit_instr_i32); - } - } + m_ir_builder->SetInsertPoint(merge_bb); + m_ir_builder->CreateRet(m_ir_builder->getInt32(0)); + } + else { + m_ir_builder->CreateRet(exit_instr_i32); + } + } - // If the function has a default exit block then generate code for it - BasicBlock *default_exit_bb = GetBasicBlockFromAddress(0xFFFFFFFF, "", false); - if (default_exit_bb) { - m_ir_builder->SetInsertPoint(default_exit_bb); - PHINode *exit_instr_i32 = m_ir_builder->CreatePHI(m_ir_builder->getInt32Ty(), 0); - exit_instr_list.push_back(exit_instr_i32); + // If the function has a default exit block then generate code for it + BasicBlock *default_exit_bb = GetBasicBlockFromAddress(0xFFFFFFFF, "", false); + if (default_exit_bb) { + m_ir_builder->SetInsertPoint(default_exit_bb); + PHINode *exit_instr_i32 = m_ir_builder->CreatePHI(m_ir_builder->getInt32Ty(), 0); + exit_instr_list.push_back(exit_instr_i32); - if (generate_linkable_exits) { - Value *cmp_i1 = m_ir_builder->CreateICmpNE(exit_instr_i32, m_ir_builder->getInt32(0)); - BasicBlock *then_bb = GetBasicBlockFromAddress(0xFFFFFFFF, "then_0"); - BasicBlock *merge_bb = GetBasicBlockFromAddress(0xFFFFFFFF, "merge_0"); - m_ir_builder->CreateCondBr(cmp_i1, then_bb, merge_bb); + if (generate_linkable_exits) { + Value *cmp_i1 = m_ir_builder->CreateICmpNE(exit_instr_i32, m_ir_builder->getInt32(0)); + BasicBlock *then_bb = GetBasicBlockFromAddress(0xFFFFFFFF, "then_0"); + BasicBlock *merge_bb = GetBasicBlockFromAddress(0xFFFFFFFF, "merge_0"); + m_ir_builder->CreateCondBr(cmp_i1, then_bb, merge_bb); - m_ir_builder->SetInsertPoint(then_bb); - Value *context_i64 = m_ir_builder->CreateZExt(exit_instr_i32, m_ir_builder->getInt64Ty()); - context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32); - m_ir_builder->CreateCall2(m_execute_unknown_block, m_state.args[CompileTaskState::Args::State], context_i64); - m_ir_builder->CreateBr(merge_bb); + m_ir_builder->SetInsertPoint(then_bb); + Value *context_i64 = m_ir_builder->CreateZExt(exit_instr_i32, m_ir_builder->getInt64Ty()); + context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32); + m_ir_builder->CreateCall2(m_execute_unknown_block, m_state.args[CompileTaskState::Args::State], context_i64); + m_ir_builder->CreateBr(merge_bb); - m_ir_builder->SetInsertPoint(merge_bb); - m_ir_builder->CreateRet(m_ir_builder->getInt32(0)); - } else { - m_ir_builder->CreateRet(exit_instr_i32); - } - } + m_ir_builder->SetInsertPoint(merge_bb); + m_ir_builder->CreateRet(m_ir_builder->getInt32(0)); + } + else { + m_ir_builder->CreateRet(exit_instr_i32); + } + } - // Add incoming values for all exit instr PHI nodes - for (PHINode *exit_instr_i : exit_instr_list) { - BasicBlock *block = exit_instr_i->getParent(); - for (pred_iterator pred_i = pred_begin(block); pred_i != pred_end(block); pred_i++) { - u32 pred_address = GetAddressFromBasicBlockName((*pred_i)->getName()); - exit_instr_i->addIncoming(m_ir_builder->getInt32(pred_address), *pred_i); - } - } + // Add incoming values for all exit instr PHI nodes + for (PHINode *exit_instr_i : exit_instr_list) { + BasicBlock *block = exit_instr_i->getParent(); + for (pred_iterator pred_i = pred_begin(block); pred_i != pred_end(block); pred_i++) { + u32 pred_address = GetAddressFromBasicBlockName((*pred_i)->getName()); + exit_instr_i->addIncoming(m_ir_builder->getInt32(pred_address), *pred_i); + } + } + m_recompilation_engine.Log() << *m_module; - m_recompilation_engine.Log() << *m_module; + std::string verify; + raw_string_ostream verify_ostream(verify); + if (verifyFunction(*m_state.function, &verify_ostream)) { + m_recompilation_engine.Log() << "Verification failed: " << verify << "\n"; + } - std::string verify; - raw_string_ostream verify_ostream(verify); - if (verifyFunction(*m_state.function, &verify_ostream)) { - m_recompilation_engine.Log() << "Verification failed: " << verify << "\n"; - } + auto ir_build_end = std::chrono::high_resolution_clock::now(); + m_stats.ir_build_time += std::chrono::duration_cast(ir_build_end - compilation_start); + // Optimize this function + fpm->run(*m_state.function); + auto optimize_end = std::chrono::high_resolution_clock::now(); + m_stats.optimization_time += std::chrono::duration_cast(optimize_end - ir_build_end); - auto ir_build_end = std::chrono::high_resolution_clock::now(); - m_stats.ir_build_time += std::chrono::duration_cast(ir_build_end - compilation_start); + // Translate to machine code + execution_engine->finalizeObject(); + void *function = execution_engine->getPointerToFunction(m_state.function); + auto translate_end = std::chrono::high_resolution_clock::now(); + m_stats.translation_time += std::chrono::duration_cast(translate_end - optimize_end); - // Optimize this function - fpm->run(*m_state.function); - auto optimize_end = std::chrono::high_resolution_clock::now(); - m_stats.optimization_time += std::chrono::duration_cast(optimize_end - ir_build_end); + /* m_recompilation_engine.Log() << "\nDisassembly:\n"; + auto disassembler = LLVMCreateDisasm(sys::getProcessTriple().c_str(), nullptr, 0, nullptr, nullptr); + for (size_t pc = 0; pc < mci.size();) { + char str[1024]; - // Translate to machine code - execution_engine->finalizeObject(); - void *function = execution_engine->getPointerToFunction(m_state.function); - auto translate_end = std::chrono::high_resolution_clock::now(); - m_stats.translation_time += std::chrono::duration_cast(translate_end - optimize_end); + auto size = LLVMDisasmInstruction(disassembler, ((u8 *)mci.address()) + pc, mci.size() - pc, (uint64_t)(((u8 *)mci.address()) + pc), str, sizeof(str)); + m_recompilation_engine.Log() << fmt::Format("0x%08X: ", (u64)(((u8 *)mci.address()) + pc)) << str << '\n'; + pc += size; + } -/* m_recompilation_engine.Log() << "\nDisassembly:\n"; - auto disassembler = LLVMCreateDisasm(sys::getProcessTriple().c_str(), nullptr, 0, nullptr, nullptr); - for (size_t pc = 0; pc < mci.size();) { - char str[1024]; + LLVMDisasmDispose(disassembler);*/ - auto size = LLVMDisasmInstruction(disassembler, ((u8 *)mci.address()) + pc, mci.size() - pc, (uint64_t)(((u8 *)mci.address()) + pc), str, sizeof(str)); - m_recompilation_engine.Log() << fmt::Format("0x%08X: ", (u64)(((u8 *)mci.address()) + pc)) << str << '\n'; - pc += size; - } + auto compilation_end = std::chrono::high_resolution_clock::now(); + m_stats.total_time += std::chrono::duration_cast(compilation_end - compilation_start); + delete fpm; - LLVMDisasmDispose(disassembler);*/ - - auto compilation_end = std::chrono::high_resolution_clock::now(); - m_stats.total_time += std::chrono::duration_cast(compilation_end - compilation_start); - delete fpm; - - assert(function != nullptr); - return std::make_pair((Executable)function, execution_engine); + assert(function != nullptr); + return std::make_pair((Executable)function, execution_engine); } Compiler::Stats Compiler::GetStats() { - return m_stats; + return m_stats; } void Compiler::Decode(const u32 code) { - (*PPU_instr::main_list)(this, code); + (*PPU_instr::main_list)(this, code); } std::mutex RecompilationEngine::s_mutex; std::shared_ptr RecompilationEngine::s_the_instance = nullptr; RecompilationEngine::RecompilationEngine() - : m_log(nullptr) - , m_last_cache_clear_time(std::chrono::high_resolution_clock::now()) - , m_compiler(*this, CPUHybridDecoderRecompiler::ExecuteFunction, CPUHybridDecoderRecompiler::ExecuteTillReturn, CPUHybridDecoderRecompiler::PollStatus) { - m_compiler.RunAllTests(); + : m_log(nullptr) + , m_last_cache_clear_time(std::chrono::high_resolution_clock::now()) + , m_compiler(*this, CPUHybridDecoderRecompiler::ExecuteFunction, CPUHybridDecoderRecompiler::ExecuteTillReturn, CPUHybridDecoderRecompiler::PollStatus) { + m_compiler.RunAllTests(); } RecompilationEngine::~RecompilationEngine() { - m_address_to_function.clear(); - join(); + m_address_to_function.clear(); + join(); } Executable executeFunc; Executable executeUntilReturn; const Executable *RecompilationEngine::GetExecutable(u32 address, bool isFunction) { - return isFunction ? &executeFunc : &executeUntilReturn; + return isFunction ? &executeFunc : &executeUntilReturn; } const Executable *RecompilationEngine::GetCompiledExecutableIfAvailable(u32 address, std::mutex *mut) { - std::lock_guard lock(m_address_to_function_lock); - std::unordered_map::iterator It = m_address_to_function.find(address); - if (It == m_address_to_function.end()) - return nullptr; - if(std::get<1>(It->second) == nullptr) - return nullptr; - mut = &(std::get<3>(It->second)); - return &(std::get<0>(It->second)); + std::lock_guard lock(m_address_to_function_lock); + std::unordered_map::iterator It = m_address_to_function.find(address); + if (It == m_address_to_function.end()) + return nullptr; + if (std::get<1>(It->second) == nullptr) + return nullptr; + mut = &(std::get<3>(It->second)); + return &(std::get<0>(It->second)); } void RecompilationEngine::RemoveUnusedEntriesFromCache() { - auto now = std::chrono::high_resolution_clock::now(); - if (std::chrono::duration_cast(now - m_last_cache_clear_time).count() > 10000) { - for (auto i = m_address_to_function.begin(); i != m_address_to_function.end();) { - auto tmp = i; - i++; - if (std::get<2>(tmp->second) == 0) - m_address_to_function.erase(tmp); - else - std::get<2>(tmp->second) = 0; - } + auto now = std::chrono::high_resolution_clock::now(); + if (std::chrono::duration_cast(now - m_last_cache_clear_time).count() > 10000) { + for (auto i = m_address_to_function.begin(); i != m_address_to_function.end();) { + auto tmp = i; + i++; + if (std::get<2>(tmp->second) == 0) + m_address_to_function.erase(tmp); + else + std::get<2>(tmp->second) = 0; + } - m_last_cache_clear_time = now; - } + m_last_cache_clear_time = now; + } } void RecompilationEngine::NotifyTrace(ExecutionTrace * execution_trace) { - { - std::lock_guard lock(m_pending_execution_traces_lock); - m_pending_execution_traces.push_back(execution_trace); - } + { + std::lock_guard lock(m_pending_execution_traces_lock); + m_pending_execution_traces.push_back(execution_trace); + } - if (!joinable()) { - start(WRAP_EXPR("PPU Recompilation Engine"), WRAP_EXPR(Task())); - } + if (!joinable()) { + start(WRAP_EXPR("PPU Recompilation Engine"), WRAP_EXPR(Task())); + } - cv.notify_one(); - // TODO: Increase the priority of the recompilation engine thread + cv.notify_one(); + // TODO: Increase the priority of the recompilation engine thread } raw_fd_ostream & RecompilationEngine::Log() { - if (!m_log) { - std::error_code error; - m_log = new raw_fd_ostream("PPULLVMRecompiler.log", error, sys::fs::F_Text); - m_log->SetUnbuffered(); - } + if (!m_log) { + std::error_code error; + m_log = new raw_fd_ostream("PPULLVMRecompiler.log", error, sys::fs::F_Text); + m_log->SetUnbuffered(); + } - return *m_log; + return *m_log; } void RecompilationEngine::Task() { - bool is_idling = false; - std::chrono::nanoseconds idling_time(0); - std::chrono::nanoseconds recompiling_time(0); + bool is_idling = false; + std::chrono::nanoseconds idling_time(0); + std::chrono::nanoseconds recompiling_time(0); - auto start = std::chrono::high_resolution_clock::now(); - while (joinable() && !Emu.IsStopped()) { - bool work_done_this_iteration = false; - ExecutionTrace * execution_trace = nullptr; + auto start = std::chrono::high_resolution_clock::now(); + while (joinable() && !Emu.IsStopped()) { + bool work_done_this_iteration = false; + ExecutionTrace * execution_trace = nullptr; - { - std::lock_guard lock(m_pending_execution_traces_lock); + { + std::lock_guard lock(m_pending_execution_traces_lock); - auto i = m_pending_execution_traces.begin(); - if (i != m_pending_execution_traces.end()) { - execution_trace = *i; - m_pending_execution_traces.erase(i); - } - } + auto i = m_pending_execution_traces.begin(); + if (i != m_pending_execution_traces.end()) { + execution_trace = *i; + m_pending_execution_traces.erase(i); + } + } - if (execution_trace) { - ProcessExecutionTrace(*execution_trace); - delete execution_trace; - work_done_this_iteration = true; - } + if (execution_trace) { + ProcessExecutionTrace(*execution_trace); + delete execution_trace; + work_done_this_iteration = true; + } - if (!work_done_this_iteration) { - // TODO: Reduce the priority of the recompilation engine thread if its set to high priority - } else { - is_idling = false; - } + if (!work_done_this_iteration) { + // TODO: Reduce the priority of the recompilation engine thread if its set to high priority + } + else { + is_idling = false; + } - if (is_idling) { - auto recompiling_start = std::chrono::high_resolution_clock::now(); + if (is_idling) { + auto recompiling_start = std::chrono::high_resolution_clock::now(); - // Recompile the function whose CFG has changed the most since the last time it was compiled - auto candidate = (BlockEntry *)nullptr; - size_t max_diff = 0; - for (auto block : m_block_table) { - if (block->IsFunction() && block->is_compiled) { - auto diff = block->cfg.GetSize() - block->last_compiled_cfg_size; - if (diff > max_diff) { - candidate = block; - max_diff = diff; - } - } - } + // Recompile the function whose CFG has changed the most since the last time it was compiled + auto candidate = (BlockEntry *)nullptr; + size_t max_diff = 0; + for (auto block : m_block_table) { + if (block->IsFunction() && block->is_compiled) { + auto diff = block->cfg.GetSize() - block->last_compiled_cfg_size; + if (diff > max_diff) { + candidate = block; + max_diff = diff; + } + } + } - if (candidate != nullptr) { - Log() << "Recompiling: " << candidate->ToString() << "\n"; - CompileBlock(*candidate); - work_done_this_iteration = true; - } + if (candidate != nullptr) { + Log() << "Recompiling: " << candidate->ToString() << "\n"; + CompileBlock(*candidate); + work_done_this_iteration = true; + } - auto recompiling_end = std::chrono::high_resolution_clock::now(); - recompiling_time += std::chrono::duration_cast(recompiling_end - recompiling_start); - } + auto recompiling_end = std::chrono::high_resolution_clock::now(); + recompiling_time += std::chrono::duration_cast(recompiling_end - recompiling_start); + } - if (!work_done_this_iteration) { - is_idling = true; + if (!work_done_this_iteration) { + is_idling = true; - // Wait a few ms for something to happen - auto idling_start = std::chrono::high_resolution_clock::now(); - std::unique_lock lock(mutex); - cv.wait_for(lock, std::chrono::milliseconds(250)); - auto idling_end = std::chrono::high_resolution_clock::now(); - idling_time += std::chrono::duration_cast(idling_end - idling_start); - } - } + // Wait a few ms for something to happen + auto idling_start = std::chrono::high_resolution_clock::now(); + std::unique_lock lock(mutex); + cv.wait_for(lock, std::chrono::milliseconds(250)); + auto idling_end = std::chrono::high_resolution_clock::now(); + idling_time += std::chrono::duration_cast(idling_end - idling_start); + } + } - std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now(); - auto total_time = std::chrono::duration_cast(end - start); - auto compiler_stats = m_compiler.GetStats(); + std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now(); + auto total_time = std::chrono::duration_cast(end - start); + auto compiler_stats = m_compiler.GetStats(); - Log() << "Total time = " << total_time.count() / 1000000 << "ms\n"; - Log() << " Time spent compiling = " << compiler_stats.total_time.count() / 1000000 << "ms\n"; - Log() << " Time spent building IR = " << compiler_stats.ir_build_time.count() / 1000000 << "ms\n"; - Log() << " Time spent optimizing = " << compiler_stats.optimization_time.count() / 1000000 << "ms\n"; - Log() << " Time spent translating = " << compiler_stats.translation_time.count() / 1000000 << "ms\n"; - Log() << " Time spent recompiling = " << recompiling_time.count() / 1000000 << "ms\n"; - Log() << " Time spent idling = " << idling_time.count() / 1000000 << "ms\n"; - Log() << " Time spent doing misc tasks = " << (total_time.count() - idling_time.count() - compiler_stats.total_time.count()) / 1000000 << "ms\n"; + Log() << "Total time = " << total_time.count() / 1000000 << "ms\n"; + Log() << " Time spent compiling = " << compiler_stats.total_time.count() / 1000000 << "ms\n"; + Log() << " Time spent building IR = " << compiler_stats.ir_build_time.count() / 1000000 << "ms\n"; + Log() << " Time spent optimizing = " << compiler_stats.optimization_time.count() / 1000000 << "ms\n"; + Log() << " Time spent translating = " << compiler_stats.translation_time.count() / 1000000 << "ms\n"; + Log() << " Time spent recompiling = " << recompiling_time.count() / 1000000 << "ms\n"; + Log() << " Time spent idling = " << idling_time.count() / 1000000 << "ms\n"; + Log() << " Time spent doing misc tasks = " << (total_time.count() - idling_time.count() - compiler_stats.total_time.count()) / 1000000 << "ms\n"; - LOG_NOTICE(PPU, "PPU LLVM Recompilation thread exiting."); - s_the_instance = nullptr; // Can cause deadlock if this is the last instance. Need to fix this. + LOG_NOTICE(PPU, "PPU LLVM Recompilation thread exiting."); + s_the_instance = nullptr; // Can cause deadlock if this is the last instance. Need to fix this. } void RecompilationEngine::ProcessExecutionTrace(const ExecutionTrace & execution_trace) { - auto execution_trace_id = execution_trace.GetId(); - auto processed_execution_trace_i = m_processed_execution_traces.find(execution_trace_id); - if (processed_execution_trace_i == m_processed_execution_traces.end()) { - Log() << "Trace: " << execution_trace.ToString() << "\n"; - // Find the function block - BlockEntry key(execution_trace.function_address, execution_trace.function_address); - auto block_i = m_block_table.find(&key); - if (block_i == m_block_table.end()) { - block_i = m_block_table.insert(m_block_table.end(), new BlockEntry(key.cfg.start_address, key.cfg.function_address)); - } + auto execution_trace_id = execution_trace.GetId(); + auto processed_execution_trace_i = m_processed_execution_traces.find(execution_trace_id); + if (processed_execution_trace_i == m_processed_execution_traces.end()) { + Log() << "Trace: " << execution_trace.ToString() << "\n"; + // Find the function block + BlockEntry key(execution_trace.function_address, execution_trace.function_address); + auto block_i = m_block_table.find(&key); + if (block_i == m_block_table.end()) { + block_i = m_block_table.insert(m_block_table.end(), new BlockEntry(key.cfg.start_address, key.cfg.function_address)); + } - auto function_block = *block_i; - block_i = m_block_table.end(); - auto split_trace = false; - std::vector tmp_block_list; - for (auto trace_i = execution_trace.entries.begin(); trace_i != execution_trace.entries.end(); trace_i++) { - if (trace_i->type == ExecutionTraceEntry::Type::CompiledBlock) { - block_i = m_block_table.end(); - split_trace = true; - } + auto function_block = *block_i; + block_i = m_block_table.end(); + auto split_trace = false; + std::vector tmp_block_list; + for (auto trace_i = execution_trace.entries.begin(); trace_i != execution_trace.entries.end(); trace_i++) { + if (trace_i->type == ExecutionTraceEntry::Type::CompiledBlock) { + block_i = m_block_table.end(); + split_trace = true; + } - if (block_i == m_block_table.end()) { - BlockEntry key(trace_i->GetPrimaryAddress(), execution_trace.function_address); - block_i = m_block_table.find(&key); - if (block_i == m_block_table.end()) { - block_i = m_block_table.insert(m_block_table.end(), new BlockEntry(key.cfg.start_address, key.cfg.function_address)); - } + if (block_i == m_block_table.end()) { + BlockEntry key(trace_i->GetPrimaryAddress(), execution_trace.function_address); + block_i = m_block_table.find(&key); + if (block_i == m_block_table.end()) { + block_i = m_block_table.insert(m_block_table.end(), new BlockEntry(key.cfg.start_address, key.cfg.function_address)); + } - tmp_block_list.push_back(*block_i); - } + tmp_block_list.push_back(*block_i); + } - const ExecutionTraceEntry * next_trace = nullptr; - if (trace_i + 1 != execution_trace.entries.end()) { - next_trace = &(*(trace_i + 1)); - } else if (!split_trace && execution_trace.type == ExecutionTrace::Type::Loop) { - next_trace = &(*(execution_trace.entries.begin())); - } + const ExecutionTraceEntry * next_trace = nullptr; + if (trace_i + 1 != execution_trace.entries.end()) { + next_trace = &(*(trace_i + 1)); + } + else if (!split_trace && execution_trace.type == ExecutionTrace::Type::Loop) { + next_trace = &(*(execution_trace.entries.begin())); + } - UpdateControlFlowGraph((*block_i)->cfg, *trace_i, next_trace); - if (*block_i != function_block) { - UpdateControlFlowGraph(function_block->cfg, *trace_i, next_trace); - } - } + UpdateControlFlowGraph((*block_i)->cfg, *trace_i, next_trace); + if (*block_i != function_block) { + UpdateControlFlowGraph(function_block->cfg, *trace_i, next_trace); + } + } - processed_execution_trace_i = m_processed_execution_traces.insert(m_processed_execution_traces.end(), std::make_pair(execution_trace_id, std::move(tmp_block_list))); - } + processed_execution_trace_i = m_processed_execution_traces.insert(m_processed_execution_traces.end(), std::make_pair(execution_trace_id, std::move(tmp_block_list))); + } - for (auto i = processed_execution_trace_i->second.begin(); i != processed_execution_trace_i->second.end(); i++) { - if (!(*i)->is_compiled) { - (*i)->num_hits++; - if ((*i)->num_hits >= 1000) { // TODO: Make this configurable - CompileBlock(*(*i)); - } - } - } - // TODO:: Syphurith: It is said that just remove_if would cause some troubles.. I don't know if that would cause Memleak. From CppCheck: - // The return value of std::remove_if() is ignored. This function returns an iterator to the end of the range containing those elements that should be kept. - // Elements past new end remain valid but with unspecified values. Use the erase method of the container to delete them. - std::remove_if(processed_execution_trace_i->second.begin(), processed_execution_trace_i->second.end(), [](const BlockEntry * b)->bool { return b->is_compiled; }); + for (auto i = processed_execution_trace_i->second.begin(); i != processed_execution_trace_i->second.end(); i++) { + if (!(*i)->is_compiled) { + (*i)->num_hits++; + if ((*i)->num_hits >= 1000) { // TODO: Make this configurable + CompileBlock(*(*i)); + } + } + } + // TODO:: Syphurith: It is said that just remove_if would cause some troubles.. I don't know if that would cause Memleak. From CppCheck: + // The return value of std::remove_if() is ignored. This function returns an iterator to the end of the range containing those elements that should be kept. + // Elements past new end remain valid but with unspecified values. Use the erase method of the container to delete them. + std::remove_if(processed_execution_trace_i->second.begin(), processed_execution_trace_i->second.end(), [](const BlockEntry * b)->bool { return b->is_compiled; }); } void RecompilationEngine::UpdateControlFlowGraph(ControlFlowGraph & cfg, const ExecutionTraceEntry & this_entry, const ExecutionTraceEntry * next_entry) { - if (this_entry.type == ExecutionTraceEntry::Type::Instruction) { - cfg.instruction_addresses.insert(this_entry.GetPrimaryAddress()); + if (this_entry.type == ExecutionTraceEntry::Type::Instruction) { + cfg.instruction_addresses.insert(this_entry.GetPrimaryAddress()); - if (next_entry) { - if (next_entry->type == ExecutionTraceEntry::Type::Instruction || next_entry->type == ExecutionTraceEntry::Type::CompiledBlock) { - if (next_entry->GetPrimaryAddress() != (this_entry.GetPrimaryAddress() + 4)) { - cfg.branches[this_entry.GetPrimaryAddress()].insert(next_entry->GetPrimaryAddress()); - } - } else if (next_entry->type == ExecutionTraceEntry::Type::FunctionCall) { - cfg.calls[this_entry.data.instruction.address].insert(next_entry->GetPrimaryAddress()); - } - } - } else if (this_entry.type == ExecutionTraceEntry::Type::CompiledBlock) { - if (next_entry) { - if (next_entry->type == ExecutionTraceEntry::Type::Instruction || next_entry->type == ExecutionTraceEntry::Type::CompiledBlock) { - cfg.branches[this_entry.data.compiled_block.exit_address].insert(next_entry->GetPrimaryAddress()); - } else if (next_entry->type == ExecutionTraceEntry::Type::FunctionCall) { - cfg.calls[this_entry.data.compiled_block.exit_address].insert(next_entry->GetPrimaryAddress()); - } - } - } + if (next_entry) { + if (next_entry->type == ExecutionTraceEntry::Type::Instruction || next_entry->type == ExecutionTraceEntry::Type::CompiledBlock) { + if (next_entry->GetPrimaryAddress() != (this_entry.GetPrimaryAddress() + 4)) { + cfg.branches[this_entry.GetPrimaryAddress()].insert(next_entry->GetPrimaryAddress()); + } + } + else if (next_entry->type == ExecutionTraceEntry::Type::FunctionCall) { + cfg.calls[this_entry.data.instruction.address].insert(next_entry->GetPrimaryAddress()); + } + } + } + else if (this_entry.type == ExecutionTraceEntry::Type::CompiledBlock) { + if (next_entry) { + if (next_entry->type == ExecutionTraceEntry::Type::Instruction || next_entry->type == ExecutionTraceEntry::Type::CompiledBlock) { + cfg.branches[this_entry.data.compiled_block.exit_address].insert(next_entry->GetPrimaryAddress()); + } + else if (next_entry->type == ExecutionTraceEntry::Type::FunctionCall) { + cfg.calls[this_entry.data.compiled_block.exit_address].insert(next_entry->GetPrimaryAddress()); + } + } + } } void RecompilationEngine::CompileBlock(BlockEntry & block_entry) { - Log() << "Compile: " << block_entry.ToString() << "\n"; - Log() << "CFG: " << block_entry.cfg.ToString() << "\n"; + Log() << "Compile: " << block_entry.ToString() << "\n"; + Log() << "CFG: " << block_entry.cfg.ToString() << "\n"; - const std::pair &compileResult = - m_compiler.Compile(fmt::Format("fn_0x%08X_%u", block_entry.cfg.start_address, block_entry.revision++), block_entry.cfg, - block_entry.IsFunction() ? true : false /*generate_linkable_exits*/); + const std::pair &compileResult = + m_compiler.Compile(fmt::Format("fn_0x%08X_%u", block_entry.cfg.start_address, block_entry.revision++), block_entry.cfg, + block_entry.IsFunction() ? true : false /*generate_linkable_exits*/); - // If entry doesn't exist, create it (using lock) - std::unordered_map::iterator It = m_address_to_function.find(block_entry.cfg.start_address); - if (It == m_address_to_function.end()) - { - std::lock_guard lock(m_address_to_function_lock); - std::get<1>(m_address_to_function[block_entry.cfg.start_address]) = nullptr; - } - - // Prevent access on this block - std::lock_guard lock(std::get<3>(m_address_to_function[block_entry.cfg.start_address])); - - std::get<1>(m_address_to_function[block_entry.cfg.start_address]) = std::unique_ptr(compileResult.second); - std::get<0>(m_address_to_function[block_entry.cfg.start_address]) = compileResult.first; - block_entry.last_compiled_cfg_size = block_entry.cfg.GetSize(); - block_entry.is_compiled = true; + // If entry doesn't exist, create it (using lock) + std::unordered_map::iterator It = m_address_to_function.find(block_entry.cfg.start_address); + if (It == m_address_to_function.end()) + { + std::lock_guard lock(m_address_to_function_lock); + std::get<1>(m_address_to_function[block_entry.cfg.start_address]) = nullptr; + } + // Prevent access on this block + std::lock_guard lock(std::get<3>(m_address_to_function[block_entry.cfg.start_address])); + std::get<1>(m_address_to_function[block_entry.cfg.start_address]) = std::unique_ptr(compileResult.second); + std::get<0>(m_address_to_function[block_entry.cfg.start_address]) = compileResult.first; + block_entry.last_compiled_cfg_size = block_entry.cfg.GetSize(); + block_entry.is_compiled = true; } std::shared_ptr RecompilationEngine::GetInstance() { - std::lock_guard lock(s_mutex); + std::lock_guard lock(s_mutex); - if (s_the_instance == nullptr) { - s_the_instance = std::shared_ptr(new RecompilationEngine()); - } + if (s_the_instance == nullptr) { + s_the_instance = std::shared_ptr(new RecompilationEngine()); + } - return s_the_instance; + return s_the_instance; } Tracer::Tracer() - : m_recompilation_engine(RecompilationEngine::GetInstance()) { - m_stack.reserve(100); + : m_recompilation_engine(RecompilationEngine::GetInstance()) { + m_stack.reserve(100); } Tracer::~Tracer() { - Terminate(); + Terminate(); } void Tracer::Trace(TraceType trace_type, u32 arg1, u32 arg2) { - ExecutionTrace * execution_trace = nullptr; + ExecutionTrace * execution_trace = nullptr; - switch (trace_type) { - case TraceType::CallFunction: - // arg1 is address of the function - m_stack.back()->entries.push_back(ExecutionTraceEntry(ExecutionTraceEntry::Type::FunctionCall, arg1)); - break; - case TraceType::EnterFunction: - // arg1 is address of the function - m_stack.push_back(new ExecutionTrace(arg1)); - break; - case TraceType::ExitFromCompiledFunction: - // arg1 is address of function. - // arg2 is the address of the exit instruction. - if (arg2) { - m_stack.push_back(new ExecutionTrace(arg1)); - m_stack.back()->entries.push_back(ExecutionTraceEntry(ExecutionTraceEntry::Type::CompiledBlock, arg1, arg2)); - } - break; - case TraceType::Return: - // No args used - execution_trace = m_stack.back(); - execution_trace->type = ExecutionTrace::Type::Linear; - m_stack.pop_back(); - break; - case TraceType::Instruction: - // arg1 is the address of the instruction - for (int i = (int)m_stack.back()->entries.size() - 1; i >= 0; i--) { - if ((m_stack.back()->entries[i].type == ExecutionTraceEntry::Type::Instruction && m_stack.back()->entries[i].data.instruction.address == arg1) || - (m_stack.back()->entries[i].type == ExecutionTraceEntry::Type::CompiledBlock && m_stack.back()->entries[i].data.compiled_block.entry_address == arg1)) { - // Found a loop - execution_trace = new ExecutionTrace(m_stack.back()->function_address); - execution_trace->type = ExecutionTrace::Type::Loop; - std::copy(m_stack.back()->entries.begin() + i, m_stack.back()->entries.end(), std::back_inserter(execution_trace->entries)); - m_stack.back()->entries.erase(m_stack.back()->entries.begin() + i + 1, m_stack.back()->entries.end()); - break; - } - } + switch (trace_type) { + case TraceType::CallFunction: + // arg1 is address of the function + m_stack.back()->entries.push_back(ExecutionTraceEntry(ExecutionTraceEntry::Type::FunctionCall, arg1)); + break; + case TraceType::EnterFunction: + // arg1 is address of the function + m_stack.push_back(new ExecutionTrace(arg1)); + break; + case TraceType::ExitFromCompiledFunction: + // arg1 is address of function. + // arg2 is the address of the exit instruction. + if (arg2) { + m_stack.push_back(new ExecutionTrace(arg1)); + m_stack.back()->entries.push_back(ExecutionTraceEntry(ExecutionTraceEntry::Type::CompiledBlock, arg1, arg2)); + } + break; + case TraceType::Return: + // No args used + execution_trace = m_stack.back(); + execution_trace->type = ExecutionTrace::Type::Linear; + m_stack.pop_back(); + break; + case TraceType::Instruction: + // arg1 is the address of the instruction + for (int i = (int)m_stack.back()->entries.size() - 1; i >= 0; i--) { + if ((m_stack.back()->entries[i].type == ExecutionTraceEntry::Type::Instruction && m_stack.back()->entries[i].data.instruction.address == arg1) || + (m_stack.back()->entries[i].type == ExecutionTraceEntry::Type::CompiledBlock && m_stack.back()->entries[i].data.compiled_block.entry_address == arg1)) { + // Found a loop + execution_trace = new ExecutionTrace(m_stack.back()->function_address); + execution_trace->type = ExecutionTrace::Type::Loop; + std::copy(m_stack.back()->entries.begin() + i, m_stack.back()->entries.end(), std::back_inserter(execution_trace->entries)); + m_stack.back()->entries.erase(m_stack.back()->entries.begin() + i + 1, m_stack.back()->entries.end()); + break; + } + } - if (!execution_trace) { - // A loop was not found - m_stack.back()->entries.push_back(ExecutionTraceEntry(ExecutionTraceEntry::Type::Instruction, arg1)); - } - break; - case TraceType::ExitFromCompiledBlock: - // arg1 is address of the compiled block. - // arg2 is the address of the exit instruction. - m_stack.back()->entries.push_back(ExecutionTraceEntry(ExecutionTraceEntry::Type::CompiledBlock, arg1, arg2)); + if (!execution_trace) { + // A loop was not found + m_stack.back()->entries.push_back(ExecutionTraceEntry(ExecutionTraceEntry::Type::Instruction, arg1)); + } + break; + case TraceType::ExitFromCompiledBlock: + // arg1 is address of the compiled block. + // arg2 is the address of the exit instruction. + m_stack.back()->entries.push_back(ExecutionTraceEntry(ExecutionTraceEntry::Type::CompiledBlock, arg1, arg2)); - if (arg2 == 0) { - // Return from function - execution_trace = m_stack.back(); - execution_trace->type = ExecutionTrace::Type::Linear; - m_stack.pop_back(); - } - break; - default: - assert(0); - break; - } + if (arg2 == 0) { + // Return from function + execution_trace = m_stack.back(); + execution_trace->type = ExecutionTrace::Type::Linear; + m_stack.pop_back(); + } + break; + default: + assert(0); + break; + } - if (execution_trace) { - m_recompilation_engine->NotifyTrace(execution_trace); - } + if (execution_trace) { + m_recompilation_engine->NotifyTrace(execution_trace); + } } void Tracer::Terminate() { - // TODO: Notify recompilation engine + // TODO: Notify recompilation engine } ppu_recompiler_llvm::CPUHybridDecoderRecompiler::CPUHybridDecoderRecompiler(PPUThread & ppu) - : m_ppu(ppu) - , m_interpreter(new PPUInterpreter(ppu)) - , m_decoder(m_interpreter) - , m_recompilation_engine(RecompilationEngine::GetInstance()) { - executeFunc = CPUHybridDecoderRecompiler::ExecuteFunction; - executeUntilReturn = CPUHybridDecoderRecompiler::ExecuteTillReturn; + : m_ppu(ppu) + , m_interpreter(new PPUInterpreter(ppu)) + , m_decoder(m_interpreter) + , m_recompilation_engine(RecompilationEngine::GetInstance()) { + executeFunc = CPUHybridDecoderRecompiler::ExecuteFunction; + executeUntilReturn = CPUHybridDecoderRecompiler::ExecuteTillReturn; } ppu_recompiler_llvm::CPUHybridDecoderRecompiler::~CPUHybridDecoderRecompiler() { - } u32 ppu_recompiler_llvm::CPUHybridDecoderRecompiler::DecodeMemory(const u32 address) { - - ExecuteFunction(&m_ppu, 0); - return 0; + ExecuteFunction(&m_ppu, 0); + return 0; } u32 ppu_recompiler_llvm::CPUHybridDecoderRecompiler::ExecuteFunction(PPUThread * ppu_state, u64 context) { - auto execution_engine = (CPUHybridDecoderRecompiler *)ppu_state->GetDecoder(); - execution_engine->m_tracer.Trace(Tracer::TraceType::EnterFunction, ppu_state->PC, 0); - return ExecuteTillReturn(ppu_state, 0); + auto execution_engine = (CPUHybridDecoderRecompiler *)ppu_state->GetDecoder(); + execution_engine->m_tracer.Trace(Tracer::TraceType::EnterFunction, ppu_state->PC, 0); + return ExecuteTillReturn(ppu_state, 0); } /// Get the branch type from a branch instruction static BranchType GetBranchTypeFromInstruction(u32 instruction) { - u32 field1 = instruction >> 26; - u32 lk = instruction & 1; + u32 field1 = instruction >> 26; + u32 lk = instruction & 1; - if (field1 == 16 || field1 == 18) - return lk ? BranchType::FunctionCall : BranchType::LocalBranch; - if (field1 == 19) { - u32 field2 = (instruction >> 1) & 0x3FF; - if (field2 == 16) - return lk ? BranchType::FunctionCall : BranchType::Return; - if (field2 == 528) - return lk ? BranchType::FunctionCall : BranchType::LocalBranch; - return BranchType::NonBranch; - } - if (field1 == 1 && (instruction & EIF_PERFORM_BLR)) // classify HACK instruction - return instruction & EIF_USE_BRANCH ? BranchType::FunctionCall : BranchType::Return; - if (field1 == 1 && (instruction & EIF_USE_BRANCH)) - return BranchType::LocalBranch; - return BranchType::NonBranch; + if (field1 == 16 || field1 == 18) + return lk ? BranchType::FunctionCall : BranchType::LocalBranch; + if (field1 == 19) { + u32 field2 = (instruction >> 1) & 0x3FF; + if (field2 == 16) + return lk ? BranchType::FunctionCall : BranchType::Return; + if (field2 == 528) + return lk ? BranchType::FunctionCall : BranchType::LocalBranch; + return BranchType::NonBranch; + } + if (field1 == 1 && (instruction & EIF_PERFORM_BLR)) // classify HACK instruction + return instruction & EIF_USE_BRANCH ? BranchType::FunctionCall : BranchType::Return; + if (field1 == 1 && (instruction & EIF_USE_BRANCH)) + return BranchType::LocalBranch; + return BranchType::NonBranch; } u32 ppu_recompiler_llvm::CPUHybridDecoderRecompiler::ExecuteTillReturn(PPUThread * ppu_state, u64 context) { - CPUHybridDecoderRecompiler *execution_engine = (CPUHybridDecoderRecompiler *)ppu_state->GetDecoder(); + CPUHybridDecoderRecompiler *execution_engine = (CPUHybridDecoderRecompiler *)ppu_state->GetDecoder(); - if (context) - execution_engine->m_tracer.Trace(Tracer::TraceType::ExitFromCompiledFunction, context >> 32, context & 0xFFFFFFFF); + if (context) + execution_engine->m_tracer.Trace(Tracer::TraceType::ExitFromCompiledFunction, context >> 32, context & 0xFFFFFFFF); - while (PollStatus(ppu_state) == false) { - std::mutex mut; - const Executable *executable = execution_engine->m_recompilation_engine->GetCompiledExecutableIfAvailable(ppu_state->PC, &mut); - if (executable) { - std::lock_guard lock(mut); - auto entry = ppu_state->PC; - u32 exit = (u32)(*executable)(ppu_state, 0); - execution_engine->m_tracer.Trace(Tracer::TraceType::ExitFromCompiledBlock, entry, exit); - if (exit == 0) - return 0; - } else { - execution_engine->m_tracer.Trace(Tracer::TraceType::Instruction, ppu_state->PC, 0); - u32 instruction = vm::ps3::read32(ppu_state->PC); - u32 oldPC = ppu_state->PC; - execution_engine->m_decoder.Decode(instruction); - auto branch_type = ppu_state->PC != oldPC ? GetBranchTypeFromInstruction(instruction) : BranchType::NonBranch; - ppu_state->PC += 4; + while (PollStatus(ppu_state) == false) { + std::mutex mut; + const Executable *executable = execution_engine->m_recompilation_engine->GetCompiledExecutableIfAvailable(ppu_state->PC, &mut); + if (executable) { + std::lock_guard lock(mut); + auto entry = ppu_state->PC; + u32 exit = (u32)(*executable)(ppu_state, 0); + execution_engine->m_tracer.Trace(Tracer::TraceType::ExitFromCompiledBlock, entry, exit); + if (exit == 0) + return 0; + } + else { + execution_engine->m_tracer.Trace(Tracer::TraceType::Instruction, ppu_state->PC, 0); + u32 instruction = vm::ps3::read32(ppu_state->PC); + u32 oldPC = ppu_state->PC; + execution_engine->m_decoder.Decode(instruction); + auto branch_type = ppu_state->PC != oldPC ? GetBranchTypeFromInstruction(instruction) : BranchType::NonBranch; + ppu_state->PC += 4; - switch (branch_type) { - case BranchType::Return: - execution_engine->m_tracer.Trace(Tracer::TraceType::Return, 0, 0); - if (Emu.GetCPUThreadStop() == ppu_state->PC) ppu_state->fast_stop(); - return 0; - case BranchType::FunctionCall: - execution_engine->m_tracer.Trace(Tracer::TraceType::CallFunction, ppu_state->PC, 0); - executable = execution_engine->m_recompilation_engine->GetExecutable(ppu_state->PC, true); - (*executable)(ppu_state, 0); - break; - case BranchType::LocalBranch: - break; - case BranchType::NonBranch: - break; - default: - assert(0); - break; - } - } - } + switch (branch_type) { + case BranchType::Return: + execution_engine->m_tracer.Trace(Tracer::TraceType::Return, 0, 0); + if (Emu.GetCPUThreadStop() == ppu_state->PC) ppu_state->fast_stop(); + return 0; + case BranchType::FunctionCall: + execution_engine->m_tracer.Trace(Tracer::TraceType::CallFunction, ppu_state->PC, 0); + executable = execution_engine->m_recompilation_engine->GetExecutable(ppu_state->PC, true); + (*executable)(ppu_state, 0); + break; + case BranchType::LocalBranch: + break; + case BranchType::NonBranch: + break; + default: + assert(0); + break; + } + } + } - return 0; + return 0; } bool ppu_recompiler_llvm::CPUHybridDecoderRecompiler::PollStatus(PPUThread * ppu_state) { - return ppu_state->check_status(); -} + return ppu_state->check_status(); +} \ No newline at end of file diff --git a/rpcs3/Emu/Cell/PPULLVMRecompiler.h b/rpcs3/Emu/Cell/PPULLVMRecompiler.h index 4800743f30..5f924c2d76 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompiler.h +++ b/rpcs3/Emu/Cell/PPULLVMRecompiler.h @@ -17,1209 +17,1209 @@ #include "llvm/PassManager.h" namespace ppu_recompiler_llvm { - class Compiler; - class RecompilationEngine; - class Tracer; - class ExecutionEngine; - struct PPUState; - - /// An entry in an execution trace - struct ExecutionTraceEntry { - /// Data associated with the entry. This is discriminated by type. - union { - struct Instruction { - u32 address; - } instruction; - - struct FunctionCall { - u32 address; - } function_call; - - struct CompiledBlock { - u32 entry_address; - u32 exit_address; - } compiled_block; - } data; - - /// The type of the entry - enum class Type { - FunctionCall, - Instruction, - CompiledBlock, - } type; - - ExecutionTraceEntry(Type type, u32 arg1, u32 arg2 = 0) - : type(type) { - switch (type) { - case Type::Instruction: - data.instruction.address = arg1; - break; - case Type::FunctionCall: - data.function_call.address = arg1; - break; - case Type::CompiledBlock: - data.compiled_block.entry_address = arg1; - data.compiled_block.exit_address = arg2; - break; - default: - assert(0); - break; - } - } - - u32 GetPrimaryAddress() const { - switch (type) { - case Type::Instruction: - return data.instruction.address; - case Type::FunctionCall: - return data.function_call.address; - case Type::CompiledBlock: - return data.compiled_block.entry_address; - default: - assert(0); - return 0; - } - } - - std::string ToString() const { - switch (type) { - case Type::Instruction: - return fmt::Format("I:0x%08X", data.instruction.address); - case Type::FunctionCall: - return fmt::Format("F:0x%08X", data.function_call.address); - case Type::CompiledBlock: - return fmt::Format("C:0x%08X-0x%08X", data.compiled_block.entry_address, data.compiled_block.exit_address); - default: - assert(0); - return ""; - } - } - - u64 hash() const { - u64 hash = ((u64)type << 32); - switch (type) { - case Type::Instruction: - hash |= data.instruction.address; - break; - case Type::FunctionCall: - hash |= data.function_call.address; - break; - case Type::CompiledBlock: - hash = data.compiled_block.exit_address; - hash <<= 32; - hash |= data.compiled_block.entry_address; - break; - default: - assert(0); - break; - } - - return hash; - } - }; - - /// An execution trace. - struct ExecutionTrace { - /// Unique id of an execution trace; - typedef u64 Id; - - /// The function to which this trace belongs - u32 function_address; - - /// Execution trace type - enum class Type { - Linear, - Loop, - } type; - - /// entries in the trace - std::vector entries; - - ExecutionTrace(u32 address) - : function_address(address) { - } - - std::string ToString() const { - auto s = fmt::Format("0x%08X %s ->", function_address, type == ExecutionTrace::Type::Loop ? "Loop" : "Linear"); - for (auto i = 0; i < entries.size(); i++) { - s += " " + entries[i].ToString(); - } - - return s; - } - - Id GetId() const { - Id id = 0; - - for (auto i = entries.begin(); i != entries.end(); i++) { - id ^= i->hash(); - id <<= 1; - } - - return id; - } - }; - - /// A control flow graph - struct ControlFlowGraph { - /// Address of the first instruction - u32 start_address; - - /// Address of the function to which this CFG belongs to - u32 function_address; - - /// Set of addresses of the instructions in the CFG - std::set instruction_addresses; - - /// Branches in the CFG. - /// Key is the address of an instruction - /// Data is the set of all instructions to which this instruction branches to. - std::map> branches; - - /// Function calls in the CFG - /// Key is the address of an instruction - /// Data is the set of all functions which this instruction invokes. - std::map> calls; - - ControlFlowGraph(u32 start_address, u32 function_address) - : start_address(start_address) - , function_address(function_address) { - } - - void operator += (const ControlFlowGraph & other) { - for (auto i = other.instruction_addresses.begin(); i != other.instruction_addresses.end(); i++) { - instruction_addresses.insert(*i); - } - - for (auto i = other.branches.begin(); i != other.branches.end(); i++) { - auto j = branches.find(i->first); - if (j == branches.end()) { - j = branches.insert(branches.begin(), std::make_pair(i->first, std::set())); - } - - for (auto k = i->second.begin(); k != i->second.end(); k++) { - j->second.insert(*k); - } - } - - for (auto i = other.calls.begin(); i != other.calls.end(); i++) { - auto j = calls.find(i->first); - if (j == calls.end()) { - j = calls.insert(calls.begin(), std::make_pair(i->first, std::set())); - } - - for (auto k = i->second.begin(); k != i->second.end(); k++) { - j->second.insert(*k); - } - } - } - - std::string ToString() const { - auto s = fmt::Format("0x%08X (0x%08X): Size=%u ->", start_address, function_address, GetSize()); - for (auto i = instruction_addresses.begin(); i != instruction_addresses.end(); i++) { - s += fmt::Format(" 0x%08X", *i); - } - - s += "\nBranches:"; - for (auto i = branches.begin(); i != branches.end(); i++) { - s += fmt::Format("\n0x%08X ->", i->first); - for (auto j = i->second.begin(); j != i->second.end(); j++) { - s += fmt::Format(" 0x%08X", *j); - } - } - - s += "\nCalls:"; - for (auto i = calls.begin(); i != calls.end(); i++) { - s += fmt::Format("\n0x%08X ->", i->first); - for (auto j = i->second.begin(); j != i->second.end(); j++) { - s += fmt::Format(" 0x%08X", *j); - } - } - - return s; - } - - /// Get the size of the CFG. The size is a score of how large the CFG is and increases everytime - /// a node or an edge is added to the CFG. - size_t GetSize() const { - return instruction_addresses.size() + branches.size() + calls.size(); - } - }; - - enum class BranchType { - NonBranch, - LocalBranch, - FunctionCall, - Return, - }; - - /// Pointer to an executable - typedef u32(*Executable)(PPUThread * ppu_state, u64 context); - - /// PPU compiler that uses LLVM for code generation and optimization - class Compiler : protected PPUOpcodes, protected PPCDecoder { - public: - struct Stats { - /// Time spent building the LLVM IR - std::chrono::nanoseconds ir_build_time; - - /// Time spent optimizing - std::chrono::nanoseconds optimization_time; - - /// Time spent translating LLVM IR to machine code - std::chrono::nanoseconds translation_time; - - /// Total time - std::chrono::nanoseconds total_time; - }; - - Compiler(RecompilationEngine & recompilation_engine, const Executable execute_unknown_function, - const Executable execute_unknown_block, bool (*poll_status_function)(PPUThread * ppu_state)); - - Compiler(const Compiler & other) = delete; - Compiler(Compiler && other) = delete; - - virtual ~Compiler(); - - Compiler & operator = (const Compiler & other) = delete; - Compiler & operator = (Compiler && other) = delete; - - /** - * Compile a code fragment described by a cfg and return an executable and the ExecutionEngine storing it - * Pointer to function can be retrieved with getPointerToFunction - */ - std::pair Compile(const std::string & name, const ControlFlowGraph & cfg, bool generate_linkable_exits); - - /// Retrieve compiler stats - Stats GetStats(); - - /// Execute all tests - void RunAllTests(); - - protected: - void Decode(const u32 code) override; - - void NULL_OP() override; - void NOP() override; - - void TDI(u32 to, u32 ra, s32 simm16) override; - void TWI(u32 to, u32 ra, s32 simm16) override; - - void MFVSCR(u32 vd) override; - void MTVSCR(u32 vb) override; - void VADDCUW(u32 vd, u32 va, u32 vb) override; - void VADDFP(u32 vd, u32 va, u32 vb) override; - void VADDSBS(u32 vd, u32 va, u32 vb) override; - void VADDSHS(u32 vd, u32 va, u32 vb) override; - void VADDSWS(u32 vd, u32 va, u32 vb) override; - void VADDUBM(u32 vd, u32 va, u32 vb) override; - void VADDUBS(u32 vd, u32 va, u32 vb) override; - void VADDUHM(u32 vd, u32 va, u32 vb) override; - void VADDUHS(u32 vd, u32 va, u32 vb) override; - void VADDUWM(u32 vd, u32 va, u32 vb) override; - void VADDUWS(u32 vd, u32 va, u32 vb) override; - void VAND(u32 vd, u32 va, u32 vb) override; - void VANDC(u32 vd, u32 va, u32 vb) override; - void VAVGSB(u32 vd, u32 va, u32 vb) override; - void VAVGSH(u32 vd, u32 va, u32 vb) override; - void VAVGSW(u32 vd, u32 va, u32 vb) override; - void VAVGUB(u32 vd, u32 va, u32 vb) override; - void VAVGUH(u32 vd, u32 va, u32 vb) override; - void VAVGUW(u32 vd, u32 va, u32 vb) override; - void VCFSX(u32 vd, u32 uimm5, u32 vb) override; - void VCFUX(u32 vd, u32 uimm5, u32 vb) override; - void VCMPBFP(u32 vd, u32 va, u32 vb) override; - void VCMPBFP_(u32 vd, u32 va, u32 vb) override; - void VCMPEQFP(u32 vd, u32 va, u32 vb) override; - void VCMPEQFP_(u32 vd, u32 va, u32 vb) override; - void VCMPEQUB(u32 vd, u32 va, u32 vb) override; - void VCMPEQUB_(u32 vd, u32 va, u32 vb) override; - void VCMPEQUH(u32 vd, u32 va, u32 vb) override; - void VCMPEQUH_(u32 vd, u32 va, u32 vb) override; - void VCMPEQUW(u32 vd, u32 va, u32 vb) override; - void VCMPEQUW_(u32 vd, u32 va, u32 vb) override; - void VCMPGEFP(u32 vd, u32 va, u32 vb) override; - void VCMPGEFP_(u32 vd, u32 va, u32 vb) override; - void VCMPGTFP(u32 vd, u32 va, u32 vb) override; - void VCMPGTFP_(u32 vd, u32 va, u32 vb) override; - void VCMPGTSB(u32 vd, u32 va, u32 vb) override; - void VCMPGTSB_(u32 vd, u32 va, u32 vb) override; - void VCMPGTSH(u32 vd, u32 va, u32 vb) override; - void VCMPGTSH_(u32 vd, u32 va, u32 vb) override; - void VCMPGTSW(u32 vd, u32 va, u32 vb) override; - void VCMPGTSW_(u32 vd, u32 va, u32 vb) override; - void VCMPGTUB(u32 vd, u32 va, u32 vb) override; - void VCMPGTUB_(u32 vd, u32 va, u32 vb) override; - void VCMPGTUH(u32 vd, u32 va, u32 vb) override; - void VCMPGTUH_(u32 vd, u32 va, u32 vb) override; - void VCMPGTUW(u32 vd, u32 va, u32 vb) override; - void VCMPGTUW_(u32 vd, u32 va, u32 vb) override; - void VCTSXS(u32 vd, u32 uimm5, u32 vb) override; - void VCTUXS(u32 vd, u32 uimm5, u32 vb) override; - void VEXPTEFP(u32 vd, u32 vb) override; - void VLOGEFP(u32 vd, u32 vb) override; - void VMADDFP(u32 vd, u32 va, u32 vc, u32 vb) override; - void VMAXFP(u32 vd, u32 va, u32 vb) override; - void VMAXSB(u32 vd, u32 va, u32 vb) override; - void VMAXSH(u32 vd, u32 va, u32 vb) override; - void VMAXSW(u32 vd, u32 va, u32 vb) override; - void VMAXUB(u32 vd, u32 va, u32 vb) override; - void VMAXUH(u32 vd, u32 va, u32 vb) override; - void VMAXUW(u32 vd, u32 va, u32 vb) override; - void VMHADDSHS(u32 vd, u32 va, u32 vb, u32 vc) override; - void VMHRADDSHS(u32 vd, u32 va, u32 vb, u32 vc) override; - void VMINFP(u32 vd, u32 va, u32 vb) override; - void VMINSB(u32 vd, u32 va, u32 vb) override; - void VMINSH(u32 vd, u32 va, u32 vb) override; - void VMINSW(u32 vd, u32 va, u32 vb) override; - void VMINUB(u32 vd, u32 va, u32 vb) override; - void VMINUH(u32 vd, u32 va, u32 vb) override; - void VMINUW(u32 vd, u32 va, u32 vb) override; - void VMLADDUHM(u32 vd, u32 va, u32 vb, u32 vc) override; - void VMRGHB(u32 vd, u32 va, u32 vb) override; - void VMRGHH(u32 vd, u32 va, u32 vb) override; - void VMRGHW(u32 vd, u32 va, u32 vb) override; - void VMRGLB(u32 vd, u32 va, u32 vb) override; - void VMRGLH(u32 vd, u32 va, u32 vb) override; - void VMRGLW(u32 vd, u32 va, u32 vb) override; - void VMSUMMBM(u32 vd, u32 va, u32 vb, u32 vc) override; - void VMSUMSHM(u32 vd, u32 va, u32 vb, u32 vc) override; - void VMSUMSHS(u32 vd, u32 va, u32 vb, u32 vc) override; - void VMSUMUBM(u32 vd, u32 va, u32 vb, u32 vc) override; - void VMSUMUHM(u32 vd, u32 va, u32 vb, u32 vc) override; - void VMSUMUHS(u32 vd, u32 va, u32 vb, u32 vc) override; - void VMULESB(u32 vd, u32 va, u32 vb) override; - void VMULESH(u32 vd, u32 va, u32 vb) override; - void VMULEUB(u32 vd, u32 va, u32 vb) override; - void VMULEUH(u32 vd, u32 va, u32 vb) override; - void VMULOSB(u32 vd, u32 va, u32 vb) override; - void VMULOSH(u32 vd, u32 va, u32 vb) override; - void VMULOUB(u32 vd, u32 va, u32 vb) override; - void VMULOUH(u32 vd, u32 va, u32 vb) override; - void VNMSUBFP(u32 vd, u32 va, u32 vc, u32 vb) override; - void VNOR(u32 vd, u32 va, u32 vb) override; - void VOR(u32 vd, u32 va, u32 vb) override; - void VPERM(u32 vd, u32 va, u32 vb, u32 vc) override; - void VPKPX(u32 vd, u32 va, u32 vb) override; - void VPKSHSS(u32 vd, u32 va, u32 vb) override; - void VPKSHUS(u32 vd, u32 va, u32 vb) override; - void VPKSWSS(u32 vd, u32 va, u32 vb) override; - void VPKSWUS(u32 vd, u32 va, u32 vb) override; - void VPKUHUM(u32 vd, u32 va, u32 vb) override; - void VPKUHUS(u32 vd, u32 va, u32 vb) override; - void VPKUWUM(u32 vd, u32 va, u32 vb) override; - void VPKUWUS(u32 vd, u32 va, u32 vb) override; - void VREFP(u32 vd, u32 vb) override; - void VRFIM(u32 vd, u32 vb) override; - void VRFIN(u32 vd, u32 vb) override; - void VRFIP(u32 vd, u32 vb) override; - void VRFIZ(u32 vd, u32 vb) override; - void VRLB(u32 vd, u32 va, u32 vb) override; - void VRLH(u32 vd, u32 va, u32 vb) override; - void VRLW(u32 vd, u32 va, u32 vb) override; - void VRSQRTEFP(u32 vd, u32 vb) override; - void VSEL(u32 vd, u32 va, u32 vb, u32 vc) override; - void VSL(u32 vd, u32 va, u32 vb) override; - void VSLB(u32 vd, u32 va, u32 vb) override; - void VSLDOI(u32 vd, u32 va, u32 vb, u32 sh) override; - void VSLH(u32 vd, u32 va, u32 vb) override; - void VSLO(u32 vd, u32 va, u32 vb) override; - void VSLW(u32 vd, u32 va, u32 vb) override; - void VSPLTB(u32 vd, u32 uimm5, u32 vb) override; - void VSPLTH(u32 vd, u32 uimm5, u32 vb) override; - void VSPLTISB(u32 vd, s32 simm5) override; - void VSPLTISH(u32 vd, s32 simm5) override; - void VSPLTISW(u32 vd, s32 simm5) override; - void VSPLTW(u32 vd, u32 uimm5, u32 vb) override; - void VSR(u32 vd, u32 va, u32 vb) override; - void VSRAB(u32 vd, u32 va, u32 vb) override; - void VSRAH(u32 vd, u32 va, u32 vb) override; - void VSRAW(u32 vd, u32 va, u32 vb) override; - void VSRB(u32 vd, u32 va, u32 vb) override; - void VSRH(u32 vd, u32 va, u32 vb) override; - void VSRO(u32 vd, u32 va, u32 vb) override; - void VSRW(u32 vd, u32 va, u32 vb) override; - void VSUBCUW(u32 vd, u32 va, u32 vb) override; - void VSUBFP(u32 vd, u32 va, u32 vb) override; - void VSUBSBS(u32 vd, u32 va, u32 vb) override; - void VSUBSHS(u32 vd, u32 va, u32 vb) override; - void VSUBSWS(u32 vd, u32 va, u32 vb) override; - void VSUBUBM(u32 vd, u32 va, u32 vb) override; - void VSUBUBS(u32 vd, u32 va, u32 vb) override; - void VSUBUHM(u32 vd, u32 va, u32 vb) override; - void VSUBUHS(u32 vd, u32 va, u32 vb) override; - void VSUBUWM(u32 vd, u32 va, u32 vb) override; - void VSUBUWS(u32 vd, u32 va, u32 vb) override; - void VSUMSWS(u32 vd, u32 va, u32 vb) override; - void VSUM2SWS(u32 vd, u32 va, u32 vb) override; - void VSUM4SBS(u32 vd, u32 va, u32 vb) override; - void VSUM4SHS(u32 vd, u32 va, u32 vb) override; - void VSUM4UBS(u32 vd, u32 va, u32 vb) override; - void VUPKHPX(u32 vd, u32 vb) override; - void VUPKHSB(u32 vd, u32 vb) override; - void VUPKHSH(u32 vd, u32 vb) override; - void VUPKLPX(u32 vd, u32 vb) override; - void VUPKLSB(u32 vd, u32 vb) override; - void VUPKLSH(u32 vd, u32 vb) override; - void VXOR(u32 vd, u32 va, u32 vb) override; - void MULLI(u32 rd, u32 ra, s32 simm16) override; - void SUBFIC(u32 rd, u32 ra, s32 simm16) override; - void CMPLI(u32 bf, u32 l, u32 ra, u32 uimm16) override; - void CMPI(u32 bf, u32 l, u32 ra, s32 simm16) override; - void ADDIC(u32 rd, u32 ra, s32 simm16) override; - void ADDIC_(u32 rd, u32 ra, s32 simm16) override; - void ADDI(u32 rd, u32 ra, s32 simm16) override; - void ADDIS(u32 rd, u32 ra, s32 simm16) override; - void BC(u32 bo, u32 bi, s32 bd, u32 aa, u32 lk) override; - void HACK(u32 id) override; - void SC(u32 sc_code) override; - void B(s32 ll, u32 aa, u32 lk) override; - void MCRF(u32 crfd, u32 crfs) override; - void BCLR(u32 bo, u32 bi, u32 bh, u32 lk) override; - void CRNOR(u32 bt, u32 ba, u32 bb) override; - void CRANDC(u32 bt, u32 ba, u32 bb) override; - void ISYNC() override; - void CRXOR(u32 bt, u32 ba, u32 bb) override; - void CRNAND(u32 bt, u32 ba, u32 bb) override; - void CRAND(u32 bt, u32 ba, u32 bb) override; - void CREQV(u32 bt, u32 ba, u32 bb) override; - void CRORC(u32 bt, u32 ba, u32 bb) override; - void CROR(u32 bt, u32 ba, u32 bb) override; - void BCCTR(u32 bo, u32 bi, u32 bh, u32 lk) override; - void RLWIMI(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, u32 rc) override; - void RLWINM(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, u32 rc) override; - void RLWNM(u32 ra, u32 rs, u32 rb, u32 MB, u32 ME, u32 rc) override; - void ORI(u32 rs, u32 ra, u32 uimm16) override; - void ORIS(u32 rs, u32 ra, u32 uimm16) override; - void XORI(u32 ra, u32 rs, u32 uimm16) override; - void XORIS(u32 ra, u32 rs, u32 uimm16) override; - void ANDI_(u32 ra, u32 rs, u32 uimm16) override; - void ANDIS_(u32 ra, u32 rs, u32 uimm16) override; - void RLDICL(u32 ra, u32 rs, u32 sh, u32 mb, u32 rc) override; - void RLDICR(u32 ra, u32 rs, u32 sh, u32 me, u32 rc) override; - void RLDIC(u32 ra, u32 rs, u32 sh, u32 mb, u32 rc) override; - void RLDIMI(u32 ra, u32 rs, u32 sh, u32 mb, u32 rc) override; - void RLDC_LR(u32 ra, u32 rs, u32 rb, u32 m_eb, u32 is_r, u32 rc) override; - void CMP(u32 crfd, u32 l, u32 ra, u32 rb) override; - void TW(u32 to, u32 ra, u32 rb) override; - void LVSL(u32 vd, u32 ra, u32 rb) override; - void LVEBX(u32 vd, u32 ra, u32 rb) override; - void SUBFC(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) override; - void MULHDU(u32 rd, u32 ra, u32 rb, u32 rc) override; - void ADDC(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) override; - void MULHWU(u32 rd, u32 ra, u32 rb, u32 rc) override; - void MFOCRF(u32 a, u32 rd, u32 crm) override; - void LWARX(u32 rd, u32 ra, u32 rb) override; - void LDX(u32 ra, u32 rs, u32 rb) override; - void LWZX(u32 rd, u32 ra, u32 rb) override; - void SLW(u32 ra, u32 rs, u32 rb, u32 rc) override; - void CNTLZW(u32 ra, u32 rs, u32 rc) override; - void SLD(u32 ra, u32 rs, u32 rb, u32 rc) override; - void AND(u32 ra, u32 rs, u32 rb, u32 rc) override; - void CMPL(u32 bf, u32 l, u32 ra, u32 rb) override; - void LVSR(u32 vd, u32 ra, u32 rb) override; - void LVEHX(u32 vd, u32 ra, u32 rb) override; - void SUBF(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) override; - void LDUX(u32 rd, u32 ra, u32 rb) override; - void DCBST(u32 ra, u32 rb) override; - void LWZUX(u32 rd, u32 ra, u32 rb) override; - void CNTLZD(u32 ra, u32 rs, u32 rc) override; - void ANDC(u32 ra, u32 rs, u32 rb, u32 rc) override; - void TD(u32 to, u32 ra, u32 rb) override; - void LVEWX(u32 vd, u32 ra, u32 rb) override; - void MULHD(u32 rd, u32 ra, u32 rb, u32 rc) override; - void MULHW(u32 rd, u32 ra, u32 rb, u32 rc) override; - void LDARX(u32 rd, u32 ra, u32 rb) override; - void DCBF(u32 ra, u32 rb) override; - void LBZX(u32 rd, u32 ra, u32 rb) override; - void LVX(u32 vd, u32 ra, u32 rb) override; - void NEG(u32 rd, u32 ra, u32 oe, u32 rc) override; - void LBZUX(u32 rd, u32 ra, u32 rb) override; - void NOR(u32 ra, u32 rs, u32 rb, u32 rc) override; - void STVEBX(u32 vs, u32 ra, u32 rb) override; - void SUBFE(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) override; - void ADDE(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) override; - void MTOCRF(u32 l, u32 crm, u32 rs) override; - void STDX(u32 rs, u32 ra, u32 rb) override; - void STWCX_(u32 rs, u32 ra, u32 rb) override; - void STWX(u32 rs, u32 ra, u32 rb) override; - void STVEHX(u32 vs, u32 ra, u32 rb) override; - void STDUX(u32 rs, u32 ra, u32 rb) override; - void STWUX(u32 rs, u32 ra, u32 rb) override; - void STVEWX(u32 vs, u32 ra, u32 rb) override; - void SUBFZE(u32 rd, u32 ra, u32 oe, u32 rc) override; - void ADDZE(u32 rd, u32 ra, u32 oe, u32 rc) override; - void STDCX_(u32 rs, u32 ra, u32 rb) override; - void STBX(u32 rs, u32 ra, u32 rb) override; - void STVX(u32 vs, u32 ra, u32 rb) override; - void MULLD(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) override; - void SUBFME(u32 rd, u32 ra, u32 oe, u32 rc) override; - void ADDME(u32 rd, u32 ra, u32 oe, u32 rc) override; - void MULLW(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) override; - void DCBTST(u32 ra, u32 rb, u32 th) override; - void STBUX(u32 rs, u32 ra, u32 rb) override; - void ADD(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) override; - void DCBT(u32 ra, u32 rb, u32 th) override; - void LHZX(u32 rd, u32 ra, u32 rb) override; - void EQV(u32 ra, u32 rs, u32 rb, u32 rc) override; - void ECIWX(u32 rd, u32 ra, u32 rb) override; - void LHZUX(u32 rd, u32 ra, u32 rb) override; - void XOR(u32 rs, u32 ra, u32 rb, u32 rc) override; - void MFSPR(u32 rd, u32 spr) override; - void LWAX(u32 rd, u32 ra, u32 rb) override; - void DST(u32 ra, u32 rb, u32 strm, u32 t) override; - void LHAX(u32 rd, u32 ra, u32 rb) override; - void LVXL(u32 vd, u32 ra, u32 rb) override; - void MFTB(u32 rd, u32 spr) override; - void LWAUX(u32 rd, u32 ra, u32 rb) override; - void DSTST(u32 ra, u32 rb, u32 strm, u32 t) override; - void LHAUX(u32 rd, u32 ra, u32 rb) override; - void STHX(u32 rs, u32 ra, u32 rb) override; - void ORC(u32 rs, u32 ra, u32 rb, u32 rc) override; - void ECOWX(u32 rs, u32 ra, u32 rb) override; - void STHUX(u32 rs, u32 ra, u32 rb) override; - void OR(u32 ra, u32 rs, u32 rb, u32 rc) override; - void DIVDU(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) override; - void DIVWU(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) override; - void MTSPR(u32 spr, u32 rs) override; - void DCBI(u32 ra, u32 rb) override; - void NAND(u32 ra, u32 rs, u32 rb, u32 rc) override; - void STVXL(u32 vs, u32 ra, u32 rb) override; - void DIVD(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) override; - void DIVW(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) override; - void LVLX(u32 vd, u32 ra, u32 rb) override; - void LDBRX(u32 rd, u32 ra, u32 rb) override; - void LSWX(u32 rd, u32 ra, u32 rb) override; - void LWBRX(u32 rd, u32 ra, u32 rb) override; - void LFSX(u32 frd, u32 ra, u32 rb) override; - void SRW(u32 ra, u32 rs, u32 rb, u32 rc) override; - void SRD(u32 ra, u32 rs, u32 rb, u32 rc) override; - void LVRX(u32 vd, u32 ra, u32 rb) override; - void LSWI(u32 rd, u32 ra, u32 nb) override; - void LFSUX(u32 frd, u32 ra, u32 rb) override; - void SYNC(u32 l) override; - void LFDX(u32 frd, u32 ra, u32 rb) override; - void LFDUX(u32 frd, u32 ra, u32 rb) override; - void STVLX(u32 vs, u32 ra, u32 rb) override; - void STDBRX(u32 rd, u32 ra, u32 rb) override; - void STSWX(u32 rs, u32 ra, u32 rb) override; - void STWBRX(u32 rs, u32 ra, u32 rb) override; - void STFSX(u32 frs, u32 ra, u32 rb) override; - void STVRX(u32 vs, u32 ra, u32 rb) override; - void STFSUX(u32 frs, u32 ra, u32 rb) override; - void STSWI(u32 rd, u32 ra, u32 nb) override; - void STFDX(u32 frs, u32 ra, u32 rb) override; - void STFDUX(u32 frs, u32 ra, u32 rb) override; - void LVLXL(u32 vd, u32 ra, u32 rb) override; - void LHBRX(u32 rd, u32 ra, u32 rb) override; - void SRAW(u32 ra, u32 rs, u32 rb, u32 rc) override; - void SRAD(u32 ra, u32 rs, u32 rb, u32 rc) override; - void LVRXL(u32 vd, u32 ra, u32 rb) override; - void DSS(u32 strm, u32 a) override; - void SRAWI(u32 ra, u32 rs, u32 sh, u32 rc) override; - void SRADI1(u32 ra, u32 rs, u32 sh, u32 rc) override; - void SRADI2(u32 ra, u32 rs, u32 sh, u32 rc) override; - void EIEIO() override; - void STVLXL(u32 vs, u32 ra, u32 rb) override; - void STHBRX(u32 rs, u32 ra, u32 rb) override; - void EXTSH(u32 ra, u32 rs, u32 rc) override; - void STVRXL(u32 sd, u32 ra, u32 rb) override; - void EXTSB(u32 ra, u32 rs, u32 rc) override; - void STFIWX(u32 frs, u32 ra, u32 rb) override; - void EXTSW(u32 ra, u32 rs, u32 rc) override; - void ICBI(u32 ra, u32 rb) override; - void DCBZ(u32 ra, u32 rb) override; - void LWZ(u32 rd, u32 ra, s32 d) override; - void LWZU(u32 rd, u32 ra, s32 d) override; - void LBZ(u32 rd, u32 ra, s32 d) override; - void LBZU(u32 rd, u32 ra, s32 d) override; - void STW(u32 rs, u32 ra, s32 d) override; - void STWU(u32 rs, u32 ra, s32 d) override; - void STB(u32 rs, u32 ra, s32 d) override; - void STBU(u32 rs, u32 ra, s32 d) override; - void LHZ(u32 rd, u32 ra, s32 d) override; - void LHZU(u32 rd, u32 ra, s32 d) override; - void LHA(u32 rs, u32 ra, s32 d) override; - void LHAU(u32 rs, u32 ra, s32 d) override; - void STH(u32 rs, u32 ra, s32 d) override; - void STHU(u32 rs, u32 ra, s32 d) override; - void LMW(u32 rd, u32 ra, s32 d) override; - void STMW(u32 rs, u32 ra, s32 d) override; - void LFS(u32 frd, u32 ra, s32 d) override; - void LFSU(u32 frd, u32 ra, s32 d) override; - void LFD(u32 frd, u32 ra, s32 d) override; - void LFDU(u32 frd, u32 ra, s32 d) override; - void STFS(u32 frs, u32 ra, s32 d) override; - void STFSU(u32 frs, u32 ra, s32 d) override; - void STFD(u32 frs, u32 ra, s32 d) override; - void STFDU(u32 frs, u32 ra, s32 d) override; - void LD(u32 rd, u32 ra, s32 ds) override; - void LDU(u32 rd, u32 ra, s32 ds) override; - void LWA(u32 rd, u32 ra, s32 ds) override; - void FDIVS(u32 frd, u32 fra, u32 frb, u32 rc) override; - void FSUBS(u32 frd, u32 fra, u32 frb, u32 rc) override; - void FADDS(u32 frd, u32 fra, u32 frb, u32 rc) override; - void FSQRTS(u32 frd, u32 frb, u32 rc) override; - void FRES(u32 frd, u32 frb, u32 rc) override; - void FMULS(u32 frd, u32 fra, u32 frc, u32 rc) override; - void FMADDS(u32 frd, u32 fra, u32 frc, u32 frb, u32 rc) override; - void FMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, u32 rc) override; - void FNMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, u32 rc) override; - void FNMADDS(u32 frd, u32 fra, u32 frc, u32 frb, u32 rc) override; - void STD(u32 rs, u32 ra, s32 ds) override; - void STDU(u32 rs, u32 ra, s32 ds) override; - void MTFSB1(u32 bt, u32 rc) override; - void MCRFS(u32 bf, u32 bfa) override; - void MTFSB0(u32 bt, u32 rc) override; - void MTFSFI(u32 crfd, u32 i, u32 rc) override; - void MFFS(u32 frd, u32 rc) override; - void MTFSF(u32 flm, u32 frb, u32 rc) override; - - void FCMPU(u32 bf, u32 fra, u32 frb) override; - void FRSP(u32 frd, u32 frb, u32 rc) override; - void FCTIW(u32 frd, u32 frb, u32 rc) override; - void FCTIWZ(u32 frd, u32 frb, u32 rc) override; - void FDIV(u32 frd, u32 fra, u32 frb, u32 rc) override; - void FSUB(u32 frd, u32 fra, u32 frb, u32 rc) override; - void FADD(u32 frd, u32 fra, u32 frb, u32 rc) override; - void FSQRT(u32 frd, u32 frb, u32 rc) override; - void FSEL(u32 frd, u32 fra, u32 frc, u32 frb, u32 rc) override; - void FMUL(u32 frd, u32 fra, u32 frc, u32 rc) override; - void FRSQRTE(u32 frd, u32 frb, u32 rc) override; - void FMSUB(u32 frd, u32 fra, u32 frc, u32 frb, u32 rc) override; - void FMADD(u32 frd, u32 fra, u32 frc, u32 frb, u32 rc) override; - void FNMSUB(u32 frd, u32 fra, u32 frc, u32 frb, u32 rc) override; - void FNMADD(u32 frd, u32 fra, u32 frc, u32 frb, u32 rc) override; - void FCMPO(u32 crfd, u32 fra, u32 frb) override; - void FNEG(u32 frd, u32 frb, u32 rc) override; - void FMR(u32 frd, u32 frb, u32 rc) override; - void FNABS(u32 frd, u32 frb, u32 rc) override; - void FABS(u32 frd, u32 frb, u32 rc) override; - void FCTID(u32 frd, u32 frb, u32 rc) override; - void FCTIDZ(u32 frd, u32 frb, u32 rc) override; - void FCFID(u32 frd, u32 frb, u32 rc) override; - - void UNK(const u32 code, const u32 opcode, const u32 gcode) override; - - private: - /// State of a compilation task - struct CompileTaskState { - enum Args { - State, - Context, - MaxArgs, - }; - - /// The LLVM function for the compilation task - llvm::Function * function; - - /// Args of the LLVM function - llvm::Value * args[MaxArgs]; - - /// The CFG being compiled - const ControlFlowGraph * cfg; - - /// Address of the current instruction being compiled - u32 current_instruction_address; - - /// A flag used to detect branch instructions. - /// This is set to false at the start of compilation of an instruction. - /// If a branch instruction is encountered, this is set to true by the decode function. - bool hit_branch_instruction; - - /// Create code such that exit points can be linked to other blocks - bool generate_linkable_exits; - }; - - /// Recompilation engine - RecompilationEngine & m_recompilation_engine; - - /// The function that should be called to check the status of the thread - bool (*m_poll_status_function)(PPUThread * ppu_state); - - /// The function that will be called to execute unknown functions - llvm::Function * m_execute_unknown_function; - - /// The executable that will be called to execute unknown blocks - llvm::Function * m_execute_unknown_block; - - /// Maps function name to executable memory pointer - std::unordered_map m_executableMap; - - /// LLVM context - llvm::LLVMContext * m_llvm_context; - - /// LLVM IR builder - llvm::IRBuilder<> * m_ir_builder; + class Compiler; + class RecompilationEngine; + class Tracer; + class ExecutionEngine; + struct PPUState; + + /// An entry in an execution trace + struct ExecutionTraceEntry { + /// Data associated with the entry. This is discriminated by type. + union { + struct Instruction { + u32 address; + } instruction; + + struct FunctionCall { + u32 address; + } function_call; + + struct CompiledBlock { + u32 entry_address; + u32 exit_address; + } compiled_block; + } data; + + /// The type of the entry + enum class Type { + FunctionCall, + Instruction, + CompiledBlock, + } type; + + ExecutionTraceEntry(Type type, u32 arg1, u32 arg2 = 0) + : type(type) { + switch (type) { + case Type::Instruction: + data.instruction.address = arg1; + break; + case Type::FunctionCall: + data.function_call.address = arg1; + break; + case Type::CompiledBlock: + data.compiled_block.entry_address = arg1; + data.compiled_block.exit_address = arg2; + break; + default: + assert(0); + break; + } + } + + u32 GetPrimaryAddress() const { + switch (type) { + case Type::Instruction: + return data.instruction.address; + case Type::FunctionCall: + return data.function_call.address; + case Type::CompiledBlock: + return data.compiled_block.entry_address; + default: + assert(0); + return 0; + } + } + + std::string ToString() const { + switch (type) { + case Type::Instruction: + return fmt::Format("I:0x%08X", data.instruction.address); + case Type::FunctionCall: + return fmt::Format("F:0x%08X", data.function_call.address); + case Type::CompiledBlock: + return fmt::Format("C:0x%08X-0x%08X", data.compiled_block.entry_address, data.compiled_block.exit_address); + default: + assert(0); + return ""; + } + } + + u64 hash() const { + u64 hash = ((u64)type << 32); + switch (type) { + case Type::Instruction: + hash |= data.instruction.address; + break; + case Type::FunctionCall: + hash |= data.function_call.address; + break; + case Type::CompiledBlock: + hash = data.compiled_block.exit_address; + hash <<= 32; + hash |= data.compiled_block.entry_address; + break; + default: + assert(0); + break; + } + + return hash; + } + }; + + /// An execution trace. + struct ExecutionTrace { + /// Unique id of an execution trace; + typedef u64 Id; + + /// The function to which this trace belongs + u32 function_address; + + /// Execution trace type + enum class Type { + Linear, + Loop, + } type; + + /// entries in the trace + std::vector entries; + + ExecutionTrace(u32 address) + : function_address(address) { + } + + std::string ToString() const { + auto s = fmt::Format("0x%08X %s ->", function_address, type == ExecutionTrace::Type::Loop ? "Loop" : "Linear"); + for (auto i = 0; i < entries.size(); i++) { + s += " " + entries[i].ToString(); + } + + return s; + } + + Id GetId() const { + Id id = 0; + + for (auto i = entries.begin(); i != entries.end(); i++) { + id ^= i->hash(); + id <<= 1; + } + + return id; + } + }; + + /// A control flow graph + struct ControlFlowGraph { + /// Address of the first instruction + u32 start_address; + + /// Address of the function to which this CFG belongs to + u32 function_address; + + /// Set of addresses of the instructions in the CFG + std::set instruction_addresses; + + /// Branches in the CFG. + /// Key is the address of an instruction + /// Data is the set of all instructions to which this instruction branches to. + std::map> branches; + + /// Function calls in the CFG + /// Key is the address of an instruction + /// Data is the set of all functions which this instruction invokes. + std::map> calls; + + ControlFlowGraph(u32 start_address, u32 function_address) + : start_address(start_address) + , function_address(function_address) { + } + + void operator += (const ControlFlowGraph & other) { + for (auto i = other.instruction_addresses.begin(); i != other.instruction_addresses.end(); i++) { + instruction_addresses.insert(*i); + } + + for (auto i = other.branches.begin(); i != other.branches.end(); i++) { + auto j = branches.find(i->first); + if (j == branches.end()) { + j = branches.insert(branches.begin(), std::make_pair(i->first, std::set())); + } + + for (auto k = i->second.begin(); k != i->second.end(); k++) { + j->second.insert(*k); + } + } + + for (auto i = other.calls.begin(); i != other.calls.end(); i++) { + auto j = calls.find(i->first); + if (j == calls.end()) { + j = calls.insert(calls.begin(), std::make_pair(i->first, std::set())); + } + + for (auto k = i->second.begin(); k != i->second.end(); k++) { + j->second.insert(*k); + } + } + } + + std::string ToString() const { + auto s = fmt::Format("0x%08X (0x%08X): Size=%u ->", start_address, function_address, GetSize()); + for (auto i = instruction_addresses.begin(); i != instruction_addresses.end(); i++) { + s += fmt::Format(" 0x%08X", *i); + } + + s += "\nBranches:"; + for (auto i = branches.begin(); i != branches.end(); i++) { + s += fmt::Format("\n0x%08X ->", i->first); + for (auto j = i->second.begin(); j != i->second.end(); j++) { + s += fmt::Format(" 0x%08X", *j); + } + } + + s += "\nCalls:"; + for (auto i = calls.begin(); i != calls.end(); i++) { + s += fmt::Format("\n0x%08X ->", i->first); + for (auto j = i->second.begin(); j != i->second.end(); j++) { + s += fmt::Format(" 0x%08X", *j); + } + } + + return s; + } + + /// Get the size of the CFG. The size is a score of how large the CFG is and increases everytime + /// a node or an edge is added to the CFG. + size_t GetSize() const { + return instruction_addresses.size() + branches.size() + calls.size(); + } + }; + + enum class BranchType { + NonBranch, + LocalBranch, + FunctionCall, + Return, + }; + + /// Pointer to an executable + typedef u32(*Executable)(PPUThread * ppu_state, u64 context); + + /// PPU compiler that uses LLVM for code generation and optimization + class Compiler : protected PPUOpcodes, protected PPCDecoder { + public: + struct Stats { + /// Time spent building the LLVM IR + std::chrono::nanoseconds ir_build_time; + + /// Time spent optimizing + std::chrono::nanoseconds optimization_time; + + /// Time spent translating LLVM IR to machine code + std::chrono::nanoseconds translation_time; + + /// Total time + std::chrono::nanoseconds total_time; + }; + + Compiler(RecompilationEngine & recompilation_engine, const Executable execute_unknown_function, + const Executable execute_unknown_block, bool(*poll_status_function)(PPUThread * ppu_state)); + + Compiler(const Compiler & other) = delete; + Compiler(Compiler && other) = delete; + + virtual ~Compiler(); + + Compiler & operator = (const Compiler & other) = delete; + Compiler & operator = (Compiler && other) = delete; + + /** + * Compile a code fragment described by a cfg and return an executable and the ExecutionEngine storing it + * Pointer to function can be retrieved with getPointerToFunction + */ + std::pair Compile(const std::string & name, const ControlFlowGraph & cfg, bool generate_linkable_exits); + + /// Retrieve compiler stats + Stats GetStats(); + + /// Execute all tests + void RunAllTests(); + + protected: + void Decode(const u32 code) override; + + void NULL_OP() override; + void NOP() override; + + void TDI(u32 to, u32 ra, s32 simm16) override; + void TWI(u32 to, u32 ra, s32 simm16) override; + + void MFVSCR(u32 vd) override; + void MTVSCR(u32 vb) override; + void VADDCUW(u32 vd, u32 va, u32 vb) override; + void VADDFP(u32 vd, u32 va, u32 vb) override; + void VADDSBS(u32 vd, u32 va, u32 vb) override; + void VADDSHS(u32 vd, u32 va, u32 vb) override; + void VADDSWS(u32 vd, u32 va, u32 vb) override; + void VADDUBM(u32 vd, u32 va, u32 vb) override; + void VADDUBS(u32 vd, u32 va, u32 vb) override; + void VADDUHM(u32 vd, u32 va, u32 vb) override; + void VADDUHS(u32 vd, u32 va, u32 vb) override; + void VADDUWM(u32 vd, u32 va, u32 vb) override; + void VADDUWS(u32 vd, u32 va, u32 vb) override; + void VAND(u32 vd, u32 va, u32 vb) override; + void VANDC(u32 vd, u32 va, u32 vb) override; + void VAVGSB(u32 vd, u32 va, u32 vb) override; + void VAVGSH(u32 vd, u32 va, u32 vb) override; + void VAVGSW(u32 vd, u32 va, u32 vb) override; + void VAVGUB(u32 vd, u32 va, u32 vb) override; + void VAVGUH(u32 vd, u32 va, u32 vb) override; + void VAVGUW(u32 vd, u32 va, u32 vb) override; + void VCFSX(u32 vd, u32 uimm5, u32 vb) override; + void VCFUX(u32 vd, u32 uimm5, u32 vb) override; + void VCMPBFP(u32 vd, u32 va, u32 vb) override; + void VCMPBFP_(u32 vd, u32 va, u32 vb) override; + void VCMPEQFP(u32 vd, u32 va, u32 vb) override; + void VCMPEQFP_(u32 vd, u32 va, u32 vb) override; + void VCMPEQUB(u32 vd, u32 va, u32 vb) override; + void VCMPEQUB_(u32 vd, u32 va, u32 vb) override; + void VCMPEQUH(u32 vd, u32 va, u32 vb) override; + void VCMPEQUH_(u32 vd, u32 va, u32 vb) override; + void VCMPEQUW(u32 vd, u32 va, u32 vb) override; + void VCMPEQUW_(u32 vd, u32 va, u32 vb) override; + void VCMPGEFP(u32 vd, u32 va, u32 vb) override; + void VCMPGEFP_(u32 vd, u32 va, u32 vb) override; + void VCMPGTFP(u32 vd, u32 va, u32 vb) override; + void VCMPGTFP_(u32 vd, u32 va, u32 vb) override; + void VCMPGTSB(u32 vd, u32 va, u32 vb) override; + void VCMPGTSB_(u32 vd, u32 va, u32 vb) override; + void VCMPGTSH(u32 vd, u32 va, u32 vb) override; + void VCMPGTSH_(u32 vd, u32 va, u32 vb) override; + void VCMPGTSW(u32 vd, u32 va, u32 vb) override; + void VCMPGTSW_(u32 vd, u32 va, u32 vb) override; + void VCMPGTUB(u32 vd, u32 va, u32 vb) override; + void VCMPGTUB_(u32 vd, u32 va, u32 vb) override; + void VCMPGTUH(u32 vd, u32 va, u32 vb) override; + void VCMPGTUH_(u32 vd, u32 va, u32 vb) override; + void VCMPGTUW(u32 vd, u32 va, u32 vb) override; + void VCMPGTUW_(u32 vd, u32 va, u32 vb) override; + void VCTSXS(u32 vd, u32 uimm5, u32 vb) override; + void VCTUXS(u32 vd, u32 uimm5, u32 vb) override; + void VEXPTEFP(u32 vd, u32 vb) override; + void VLOGEFP(u32 vd, u32 vb) override; + void VMADDFP(u32 vd, u32 va, u32 vc, u32 vb) override; + void VMAXFP(u32 vd, u32 va, u32 vb) override; + void VMAXSB(u32 vd, u32 va, u32 vb) override; + void VMAXSH(u32 vd, u32 va, u32 vb) override; + void VMAXSW(u32 vd, u32 va, u32 vb) override; + void VMAXUB(u32 vd, u32 va, u32 vb) override; + void VMAXUH(u32 vd, u32 va, u32 vb) override; + void VMAXUW(u32 vd, u32 va, u32 vb) override; + void VMHADDSHS(u32 vd, u32 va, u32 vb, u32 vc) override; + void VMHRADDSHS(u32 vd, u32 va, u32 vb, u32 vc) override; + void VMINFP(u32 vd, u32 va, u32 vb) override; + void VMINSB(u32 vd, u32 va, u32 vb) override; + void VMINSH(u32 vd, u32 va, u32 vb) override; + void VMINSW(u32 vd, u32 va, u32 vb) override; + void VMINUB(u32 vd, u32 va, u32 vb) override; + void VMINUH(u32 vd, u32 va, u32 vb) override; + void VMINUW(u32 vd, u32 va, u32 vb) override; + void VMLADDUHM(u32 vd, u32 va, u32 vb, u32 vc) override; + void VMRGHB(u32 vd, u32 va, u32 vb) override; + void VMRGHH(u32 vd, u32 va, u32 vb) override; + void VMRGHW(u32 vd, u32 va, u32 vb) override; + void VMRGLB(u32 vd, u32 va, u32 vb) override; + void VMRGLH(u32 vd, u32 va, u32 vb) override; + void VMRGLW(u32 vd, u32 va, u32 vb) override; + void VMSUMMBM(u32 vd, u32 va, u32 vb, u32 vc) override; + void VMSUMSHM(u32 vd, u32 va, u32 vb, u32 vc) override; + void VMSUMSHS(u32 vd, u32 va, u32 vb, u32 vc) override; + void VMSUMUBM(u32 vd, u32 va, u32 vb, u32 vc) override; + void VMSUMUHM(u32 vd, u32 va, u32 vb, u32 vc) override; + void VMSUMUHS(u32 vd, u32 va, u32 vb, u32 vc) override; + void VMULESB(u32 vd, u32 va, u32 vb) override; + void VMULESH(u32 vd, u32 va, u32 vb) override; + void VMULEUB(u32 vd, u32 va, u32 vb) override; + void VMULEUH(u32 vd, u32 va, u32 vb) override; + void VMULOSB(u32 vd, u32 va, u32 vb) override; + void VMULOSH(u32 vd, u32 va, u32 vb) override; + void VMULOUB(u32 vd, u32 va, u32 vb) override; + void VMULOUH(u32 vd, u32 va, u32 vb) override; + void VNMSUBFP(u32 vd, u32 va, u32 vc, u32 vb) override; + void VNOR(u32 vd, u32 va, u32 vb) override; + void VOR(u32 vd, u32 va, u32 vb) override; + void VPERM(u32 vd, u32 va, u32 vb, u32 vc) override; + void VPKPX(u32 vd, u32 va, u32 vb) override; + void VPKSHSS(u32 vd, u32 va, u32 vb) override; + void VPKSHUS(u32 vd, u32 va, u32 vb) override; + void VPKSWSS(u32 vd, u32 va, u32 vb) override; + void VPKSWUS(u32 vd, u32 va, u32 vb) override; + void VPKUHUM(u32 vd, u32 va, u32 vb) override; + void VPKUHUS(u32 vd, u32 va, u32 vb) override; + void VPKUWUM(u32 vd, u32 va, u32 vb) override; + void VPKUWUS(u32 vd, u32 va, u32 vb) override; + void VREFP(u32 vd, u32 vb) override; + void VRFIM(u32 vd, u32 vb) override; + void VRFIN(u32 vd, u32 vb) override; + void VRFIP(u32 vd, u32 vb) override; + void VRFIZ(u32 vd, u32 vb) override; + void VRLB(u32 vd, u32 va, u32 vb) override; + void VRLH(u32 vd, u32 va, u32 vb) override; + void VRLW(u32 vd, u32 va, u32 vb) override; + void VRSQRTEFP(u32 vd, u32 vb) override; + void VSEL(u32 vd, u32 va, u32 vb, u32 vc) override; + void VSL(u32 vd, u32 va, u32 vb) override; + void VSLB(u32 vd, u32 va, u32 vb) override; + void VSLDOI(u32 vd, u32 va, u32 vb, u32 sh) override; + void VSLH(u32 vd, u32 va, u32 vb) override; + void VSLO(u32 vd, u32 va, u32 vb) override; + void VSLW(u32 vd, u32 va, u32 vb) override; + void VSPLTB(u32 vd, u32 uimm5, u32 vb) override; + void VSPLTH(u32 vd, u32 uimm5, u32 vb) override; + void VSPLTISB(u32 vd, s32 simm5) override; + void VSPLTISH(u32 vd, s32 simm5) override; + void VSPLTISW(u32 vd, s32 simm5) override; + void VSPLTW(u32 vd, u32 uimm5, u32 vb) override; + void VSR(u32 vd, u32 va, u32 vb) override; + void VSRAB(u32 vd, u32 va, u32 vb) override; + void VSRAH(u32 vd, u32 va, u32 vb) override; + void VSRAW(u32 vd, u32 va, u32 vb) override; + void VSRB(u32 vd, u32 va, u32 vb) override; + void VSRH(u32 vd, u32 va, u32 vb) override; + void VSRO(u32 vd, u32 va, u32 vb) override; + void VSRW(u32 vd, u32 va, u32 vb) override; + void VSUBCUW(u32 vd, u32 va, u32 vb) override; + void VSUBFP(u32 vd, u32 va, u32 vb) override; + void VSUBSBS(u32 vd, u32 va, u32 vb) override; + void VSUBSHS(u32 vd, u32 va, u32 vb) override; + void VSUBSWS(u32 vd, u32 va, u32 vb) override; + void VSUBUBM(u32 vd, u32 va, u32 vb) override; + void VSUBUBS(u32 vd, u32 va, u32 vb) override; + void VSUBUHM(u32 vd, u32 va, u32 vb) override; + void VSUBUHS(u32 vd, u32 va, u32 vb) override; + void VSUBUWM(u32 vd, u32 va, u32 vb) override; + void VSUBUWS(u32 vd, u32 va, u32 vb) override; + void VSUMSWS(u32 vd, u32 va, u32 vb) override; + void VSUM2SWS(u32 vd, u32 va, u32 vb) override; + void VSUM4SBS(u32 vd, u32 va, u32 vb) override; + void VSUM4SHS(u32 vd, u32 va, u32 vb) override; + void VSUM4UBS(u32 vd, u32 va, u32 vb) override; + void VUPKHPX(u32 vd, u32 vb) override; + void VUPKHSB(u32 vd, u32 vb) override; + void VUPKHSH(u32 vd, u32 vb) override; + void VUPKLPX(u32 vd, u32 vb) override; + void VUPKLSB(u32 vd, u32 vb) override; + void VUPKLSH(u32 vd, u32 vb) override; + void VXOR(u32 vd, u32 va, u32 vb) override; + void MULLI(u32 rd, u32 ra, s32 simm16) override; + void SUBFIC(u32 rd, u32 ra, s32 simm16) override; + void CMPLI(u32 bf, u32 l, u32 ra, u32 uimm16) override; + void CMPI(u32 bf, u32 l, u32 ra, s32 simm16) override; + void ADDIC(u32 rd, u32 ra, s32 simm16) override; + void ADDIC_(u32 rd, u32 ra, s32 simm16) override; + void ADDI(u32 rd, u32 ra, s32 simm16) override; + void ADDIS(u32 rd, u32 ra, s32 simm16) override; + void BC(u32 bo, u32 bi, s32 bd, u32 aa, u32 lk) override; + void HACK(u32 id) override; + void SC(u32 sc_code) override; + void B(s32 ll, u32 aa, u32 lk) override; + void MCRF(u32 crfd, u32 crfs) override; + void BCLR(u32 bo, u32 bi, u32 bh, u32 lk) override; + void CRNOR(u32 bt, u32 ba, u32 bb) override; + void CRANDC(u32 bt, u32 ba, u32 bb) override; + void ISYNC() override; + void CRXOR(u32 bt, u32 ba, u32 bb) override; + void CRNAND(u32 bt, u32 ba, u32 bb) override; + void CRAND(u32 bt, u32 ba, u32 bb) override; + void CREQV(u32 bt, u32 ba, u32 bb) override; + void CRORC(u32 bt, u32 ba, u32 bb) override; + void CROR(u32 bt, u32 ba, u32 bb) override; + void BCCTR(u32 bo, u32 bi, u32 bh, u32 lk) override; + void RLWIMI(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, u32 rc) override; + void RLWINM(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, u32 rc) override; + void RLWNM(u32 ra, u32 rs, u32 rb, u32 MB, u32 ME, u32 rc) override; + void ORI(u32 rs, u32 ra, u32 uimm16) override; + void ORIS(u32 rs, u32 ra, u32 uimm16) override; + void XORI(u32 ra, u32 rs, u32 uimm16) override; + void XORIS(u32 ra, u32 rs, u32 uimm16) override; + void ANDI_(u32 ra, u32 rs, u32 uimm16) override; + void ANDIS_(u32 ra, u32 rs, u32 uimm16) override; + void RLDICL(u32 ra, u32 rs, u32 sh, u32 mb, u32 rc) override; + void RLDICR(u32 ra, u32 rs, u32 sh, u32 me, u32 rc) override; + void RLDIC(u32 ra, u32 rs, u32 sh, u32 mb, u32 rc) override; + void RLDIMI(u32 ra, u32 rs, u32 sh, u32 mb, u32 rc) override; + void RLDC_LR(u32 ra, u32 rs, u32 rb, u32 m_eb, u32 is_r, u32 rc) override; + void CMP(u32 crfd, u32 l, u32 ra, u32 rb) override; + void TW(u32 to, u32 ra, u32 rb) override; + void LVSL(u32 vd, u32 ra, u32 rb) override; + void LVEBX(u32 vd, u32 ra, u32 rb) override; + void SUBFC(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) override; + void MULHDU(u32 rd, u32 ra, u32 rb, u32 rc) override; + void ADDC(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) override; + void MULHWU(u32 rd, u32 ra, u32 rb, u32 rc) override; + void MFOCRF(u32 a, u32 rd, u32 crm) override; + void LWARX(u32 rd, u32 ra, u32 rb) override; + void LDX(u32 ra, u32 rs, u32 rb) override; + void LWZX(u32 rd, u32 ra, u32 rb) override; + void SLW(u32 ra, u32 rs, u32 rb, u32 rc) override; + void CNTLZW(u32 ra, u32 rs, u32 rc) override; + void SLD(u32 ra, u32 rs, u32 rb, u32 rc) override; + void AND(u32 ra, u32 rs, u32 rb, u32 rc) override; + void CMPL(u32 bf, u32 l, u32 ra, u32 rb) override; + void LVSR(u32 vd, u32 ra, u32 rb) override; + void LVEHX(u32 vd, u32 ra, u32 rb) override; + void SUBF(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) override; + void LDUX(u32 rd, u32 ra, u32 rb) override; + void DCBST(u32 ra, u32 rb) override; + void LWZUX(u32 rd, u32 ra, u32 rb) override; + void CNTLZD(u32 ra, u32 rs, u32 rc) override; + void ANDC(u32 ra, u32 rs, u32 rb, u32 rc) override; + void TD(u32 to, u32 ra, u32 rb) override; + void LVEWX(u32 vd, u32 ra, u32 rb) override; + void MULHD(u32 rd, u32 ra, u32 rb, u32 rc) override; + void MULHW(u32 rd, u32 ra, u32 rb, u32 rc) override; + void LDARX(u32 rd, u32 ra, u32 rb) override; + void DCBF(u32 ra, u32 rb) override; + void LBZX(u32 rd, u32 ra, u32 rb) override; + void LVX(u32 vd, u32 ra, u32 rb) override; + void NEG(u32 rd, u32 ra, u32 oe, u32 rc) override; + void LBZUX(u32 rd, u32 ra, u32 rb) override; + void NOR(u32 ra, u32 rs, u32 rb, u32 rc) override; + void STVEBX(u32 vs, u32 ra, u32 rb) override; + void SUBFE(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) override; + void ADDE(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) override; + void MTOCRF(u32 l, u32 crm, u32 rs) override; + void STDX(u32 rs, u32 ra, u32 rb) override; + void STWCX_(u32 rs, u32 ra, u32 rb) override; + void STWX(u32 rs, u32 ra, u32 rb) override; + void STVEHX(u32 vs, u32 ra, u32 rb) override; + void STDUX(u32 rs, u32 ra, u32 rb) override; + void STWUX(u32 rs, u32 ra, u32 rb) override; + void STVEWX(u32 vs, u32 ra, u32 rb) override; + void SUBFZE(u32 rd, u32 ra, u32 oe, u32 rc) override; + void ADDZE(u32 rd, u32 ra, u32 oe, u32 rc) override; + void STDCX_(u32 rs, u32 ra, u32 rb) override; + void STBX(u32 rs, u32 ra, u32 rb) override; + void STVX(u32 vs, u32 ra, u32 rb) override; + void MULLD(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) override; + void SUBFME(u32 rd, u32 ra, u32 oe, u32 rc) override; + void ADDME(u32 rd, u32 ra, u32 oe, u32 rc) override; + void MULLW(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) override; + void DCBTST(u32 ra, u32 rb, u32 th) override; + void STBUX(u32 rs, u32 ra, u32 rb) override; + void ADD(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) override; + void DCBT(u32 ra, u32 rb, u32 th) override; + void LHZX(u32 rd, u32 ra, u32 rb) override; + void EQV(u32 ra, u32 rs, u32 rb, u32 rc) override; + void ECIWX(u32 rd, u32 ra, u32 rb) override; + void LHZUX(u32 rd, u32 ra, u32 rb) override; + void XOR(u32 rs, u32 ra, u32 rb, u32 rc) override; + void MFSPR(u32 rd, u32 spr) override; + void LWAX(u32 rd, u32 ra, u32 rb) override; + void DST(u32 ra, u32 rb, u32 strm, u32 t) override; + void LHAX(u32 rd, u32 ra, u32 rb) override; + void LVXL(u32 vd, u32 ra, u32 rb) override; + void MFTB(u32 rd, u32 spr) override; + void LWAUX(u32 rd, u32 ra, u32 rb) override; + void DSTST(u32 ra, u32 rb, u32 strm, u32 t) override; + void LHAUX(u32 rd, u32 ra, u32 rb) override; + void STHX(u32 rs, u32 ra, u32 rb) override; + void ORC(u32 rs, u32 ra, u32 rb, u32 rc) override; + void ECOWX(u32 rs, u32 ra, u32 rb) override; + void STHUX(u32 rs, u32 ra, u32 rb) override; + void OR(u32 ra, u32 rs, u32 rb, u32 rc) override; + void DIVDU(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) override; + void DIVWU(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) override; + void MTSPR(u32 spr, u32 rs) override; + void DCBI(u32 ra, u32 rb) override; + void NAND(u32 ra, u32 rs, u32 rb, u32 rc) override; + void STVXL(u32 vs, u32 ra, u32 rb) override; + void DIVD(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) override; + void DIVW(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) override; + void LVLX(u32 vd, u32 ra, u32 rb) override; + void LDBRX(u32 rd, u32 ra, u32 rb) override; + void LSWX(u32 rd, u32 ra, u32 rb) override; + void LWBRX(u32 rd, u32 ra, u32 rb) override; + void LFSX(u32 frd, u32 ra, u32 rb) override; + void SRW(u32 ra, u32 rs, u32 rb, u32 rc) override; + void SRD(u32 ra, u32 rs, u32 rb, u32 rc) override; + void LVRX(u32 vd, u32 ra, u32 rb) override; + void LSWI(u32 rd, u32 ra, u32 nb) override; + void LFSUX(u32 frd, u32 ra, u32 rb) override; + void SYNC(u32 l) override; + void LFDX(u32 frd, u32 ra, u32 rb) override; + void LFDUX(u32 frd, u32 ra, u32 rb) override; + void STVLX(u32 vs, u32 ra, u32 rb) override; + void STDBRX(u32 rd, u32 ra, u32 rb) override; + void STSWX(u32 rs, u32 ra, u32 rb) override; + void STWBRX(u32 rs, u32 ra, u32 rb) override; + void STFSX(u32 frs, u32 ra, u32 rb) override; + void STVRX(u32 vs, u32 ra, u32 rb) override; + void STFSUX(u32 frs, u32 ra, u32 rb) override; + void STSWI(u32 rd, u32 ra, u32 nb) override; + void STFDX(u32 frs, u32 ra, u32 rb) override; + void STFDUX(u32 frs, u32 ra, u32 rb) override; + void LVLXL(u32 vd, u32 ra, u32 rb) override; + void LHBRX(u32 rd, u32 ra, u32 rb) override; + void SRAW(u32 ra, u32 rs, u32 rb, u32 rc) override; + void SRAD(u32 ra, u32 rs, u32 rb, u32 rc) override; + void LVRXL(u32 vd, u32 ra, u32 rb) override; + void DSS(u32 strm, u32 a) override; + void SRAWI(u32 ra, u32 rs, u32 sh, u32 rc) override; + void SRADI1(u32 ra, u32 rs, u32 sh, u32 rc) override; + void SRADI2(u32 ra, u32 rs, u32 sh, u32 rc) override; + void EIEIO() override; + void STVLXL(u32 vs, u32 ra, u32 rb) override; + void STHBRX(u32 rs, u32 ra, u32 rb) override; + void EXTSH(u32 ra, u32 rs, u32 rc) override; + void STVRXL(u32 sd, u32 ra, u32 rb) override; + void EXTSB(u32 ra, u32 rs, u32 rc) override; + void STFIWX(u32 frs, u32 ra, u32 rb) override; + void EXTSW(u32 ra, u32 rs, u32 rc) override; + void ICBI(u32 ra, u32 rb) override; + void DCBZ(u32 ra, u32 rb) override; + void LWZ(u32 rd, u32 ra, s32 d) override; + void LWZU(u32 rd, u32 ra, s32 d) override; + void LBZ(u32 rd, u32 ra, s32 d) override; + void LBZU(u32 rd, u32 ra, s32 d) override; + void STW(u32 rs, u32 ra, s32 d) override; + void STWU(u32 rs, u32 ra, s32 d) override; + void STB(u32 rs, u32 ra, s32 d) override; + void STBU(u32 rs, u32 ra, s32 d) override; + void LHZ(u32 rd, u32 ra, s32 d) override; + void LHZU(u32 rd, u32 ra, s32 d) override; + void LHA(u32 rs, u32 ra, s32 d) override; + void LHAU(u32 rs, u32 ra, s32 d) override; + void STH(u32 rs, u32 ra, s32 d) override; + void STHU(u32 rs, u32 ra, s32 d) override; + void LMW(u32 rd, u32 ra, s32 d) override; + void STMW(u32 rs, u32 ra, s32 d) override; + void LFS(u32 frd, u32 ra, s32 d) override; + void LFSU(u32 frd, u32 ra, s32 d) override; + void LFD(u32 frd, u32 ra, s32 d) override; + void LFDU(u32 frd, u32 ra, s32 d) override; + void STFS(u32 frs, u32 ra, s32 d) override; + void STFSU(u32 frs, u32 ra, s32 d) override; + void STFD(u32 frs, u32 ra, s32 d) override; + void STFDU(u32 frs, u32 ra, s32 d) override; + void LD(u32 rd, u32 ra, s32 ds) override; + void LDU(u32 rd, u32 ra, s32 ds) override; + void LWA(u32 rd, u32 ra, s32 ds) override; + void FDIVS(u32 frd, u32 fra, u32 frb, u32 rc) override; + void FSUBS(u32 frd, u32 fra, u32 frb, u32 rc) override; + void FADDS(u32 frd, u32 fra, u32 frb, u32 rc) override; + void FSQRTS(u32 frd, u32 frb, u32 rc) override; + void FRES(u32 frd, u32 frb, u32 rc) override; + void FMULS(u32 frd, u32 fra, u32 frc, u32 rc) override; + void FMADDS(u32 frd, u32 fra, u32 frc, u32 frb, u32 rc) override; + void FMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, u32 rc) override; + void FNMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, u32 rc) override; + void FNMADDS(u32 frd, u32 fra, u32 frc, u32 frb, u32 rc) override; + void STD(u32 rs, u32 ra, s32 ds) override; + void STDU(u32 rs, u32 ra, s32 ds) override; + void MTFSB1(u32 bt, u32 rc) override; + void MCRFS(u32 bf, u32 bfa) override; + void MTFSB0(u32 bt, u32 rc) override; + void MTFSFI(u32 crfd, u32 i, u32 rc) override; + void MFFS(u32 frd, u32 rc) override; + void MTFSF(u32 flm, u32 frb, u32 rc) override; + + void FCMPU(u32 bf, u32 fra, u32 frb) override; + void FRSP(u32 frd, u32 frb, u32 rc) override; + void FCTIW(u32 frd, u32 frb, u32 rc) override; + void FCTIWZ(u32 frd, u32 frb, u32 rc) override; + void FDIV(u32 frd, u32 fra, u32 frb, u32 rc) override; + void FSUB(u32 frd, u32 fra, u32 frb, u32 rc) override; + void FADD(u32 frd, u32 fra, u32 frb, u32 rc) override; + void FSQRT(u32 frd, u32 frb, u32 rc) override; + void FSEL(u32 frd, u32 fra, u32 frc, u32 frb, u32 rc) override; + void FMUL(u32 frd, u32 fra, u32 frc, u32 rc) override; + void FRSQRTE(u32 frd, u32 frb, u32 rc) override; + void FMSUB(u32 frd, u32 fra, u32 frc, u32 frb, u32 rc) override; + void FMADD(u32 frd, u32 fra, u32 frc, u32 frb, u32 rc) override; + void FNMSUB(u32 frd, u32 fra, u32 frc, u32 frb, u32 rc) override; + void FNMADD(u32 frd, u32 fra, u32 frc, u32 frb, u32 rc) override; + void FCMPO(u32 crfd, u32 fra, u32 frb) override; + void FNEG(u32 frd, u32 frb, u32 rc) override; + void FMR(u32 frd, u32 frb, u32 rc) override; + void FNABS(u32 frd, u32 frb, u32 rc) override; + void FABS(u32 frd, u32 frb, u32 rc) override; + void FCTID(u32 frd, u32 frb, u32 rc) override; + void FCTIDZ(u32 frd, u32 frb, u32 rc) override; + void FCFID(u32 frd, u32 frb, u32 rc) override; + + void UNK(const u32 code, const u32 opcode, const u32 gcode) override; + + private: + /// State of a compilation task + struct CompileTaskState { + enum Args { + State, + Context, + MaxArgs, + }; + + /// The LLVM function for the compilation task + llvm::Function * function; + + /// Args of the LLVM function + llvm::Value * args[MaxArgs]; + + /// The CFG being compiled + const ControlFlowGraph * cfg; + + /// Address of the current instruction being compiled + u32 current_instruction_address; + + /// A flag used to detect branch instructions. + /// This is set to false at the start of compilation of an instruction. + /// If a branch instruction is encountered, this is set to true by the decode function. + bool hit_branch_instruction; + + /// Create code such that exit points can be linked to other blocks + bool generate_linkable_exits; + }; + + /// Recompilation engine + RecompilationEngine & m_recompilation_engine; + + /// The function that should be called to check the status of the thread + bool(*m_poll_status_function)(PPUThread * ppu_state); + + /// The function that will be called to execute unknown functions + llvm::Function * m_execute_unknown_function; + + /// The executable that will be called to execute unknown blocks + llvm::Function * m_execute_unknown_block; + + /// Maps function name to executable memory pointer + std::unordered_map m_executableMap; + + /// LLVM context + llvm::LLVMContext * m_llvm_context; + + /// LLVM IR builder + llvm::IRBuilder<> * m_ir_builder; - /// Module to which all generated code is output to - llvm::Module * m_module; + /// Module to which all generated code is output to + llvm::Module * m_module; - /// LLVM type of the functions genreated by the compiler - llvm::FunctionType * m_compiled_function_type; + /// LLVM type of the functions genreated by the compiler + llvm::FunctionType * m_compiled_function_type; - /// State of the current compilation task - CompileTaskState m_state; + /// State of the current compilation task + CompileTaskState m_state; - /// Compiler stats - Stats m_stats; + /// Compiler stats + Stats m_stats; - /// Get the name of the basic block for the specified address - std::string GetBasicBlockNameFromAddress(u32 address, const std::string & suffix = "") const; + /// Get the name of the basic block for the specified address + std::string GetBasicBlockNameFromAddress(u32 address, const std::string & suffix = "") const; - /// Get the address of a basic block from its name - u32 GetAddressFromBasicBlockName(const std::string & name) const; + /// Get the address of a basic block from its name + u32 GetAddressFromBasicBlockName(const std::string & name) const; - /// Get the basic block in for the specified address. - llvm::BasicBlock * GetBasicBlockFromAddress(u32 address, const std::string & suffix = "", bool create_if_not_exist = true); + /// Get the basic block in for the specified address. + llvm::BasicBlock * GetBasicBlockFromAddress(u32 address, const std::string & suffix = "", bool create_if_not_exist = true); - /// Get a bit - llvm::Value * GetBit(llvm::Value * val, u32 n); + /// Get a bit + llvm::Value * GetBit(llvm::Value * val, u32 n); - /// Clear a bit - llvm::Value * ClrBit(llvm::Value * val, u32 n); + /// Clear a bit + llvm::Value * ClrBit(llvm::Value * val, u32 n); - /// Set a bit - llvm::Value * SetBit(llvm::Value * val, u32 n, llvm::Value * bit, bool doClear = true); + /// Set a bit + llvm::Value * SetBit(llvm::Value * val, u32 n, llvm::Value * bit, bool doClear = true); - /// Get a nibble - llvm::Value * GetNibble(llvm::Value * val, u32 n); + /// Get a nibble + llvm::Value * GetNibble(llvm::Value * val, u32 n); - /// Clear a nibble - llvm::Value * ClrNibble(llvm::Value * val, u32 n); + /// Clear a nibble + llvm::Value * ClrNibble(llvm::Value * val, u32 n); - /// Set a nibble - llvm::Value * SetNibble(llvm::Value * val, u32 n, llvm::Value * nibble, bool doClear = true); + /// Set a nibble + llvm::Value * SetNibble(llvm::Value * val, u32 n, llvm::Value * nibble, bool doClear = true); - /// Set a nibble - llvm::Value * SetNibble(llvm::Value * val, u32 n, llvm::Value * b0, llvm::Value * b1, llvm::Value * b2, llvm::Value * b3, bool doClear = true); + /// Set a nibble + llvm::Value * SetNibble(llvm::Value * val, u32 n, llvm::Value * b0, llvm::Value * b1, llvm::Value * b2, llvm::Value * b3, bool doClear = true); - /// Load PC - llvm::Value * GetPc(); + /// Load PC + llvm::Value * GetPc(); - /// Set PC - void SetPc(llvm::Value * val_ix); + /// Set PC + void SetPc(llvm::Value * val_ix); - /// Load GPR - llvm::Value * GetGpr(u32 r, u32 num_bits = 64); + /// Load GPR + llvm::Value * GetGpr(u32 r, u32 num_bits = 64); - /// Set GPR - void SetGpr(u32 r, llvm::Value * val_x64); + /// Set GPR + void SetGpr(u32 r, llvm::Value * val_x64); - /// Load CR - llvm::Value * GetCr(); + /// Load CR + llvm::Value * GetCr(); - /// Load CR and get field CRn - llvm::Value * GetCrField(u32 n); + /// Load CR and get field CRn + llvm::Value * GetCrField(u32 n); - /// Set CR - void SetCr(llvm::Value * val_x32); + /// Set CR + void SetCr(llvm::Value * val_x32); - /// Set CR field - void SetCrField(u32 n, llvm::Value * field); + /// Set CR field + void SetCrField(u32 n, llvm::Value * field); - /// Set CR field - void SetCrField(u32 n, llvm::Value * b0, llvm::Value * b1, llvm::Value * b2, llvm::Value * b3); + /// Set CR field + void SetCrField(u32 n, llvm::Value * b0, llvm::Value * b1, llvm::Value * b2, llvm::Value * b3); - /// Set CR field based on signed comparison - void SetCrFieldSignedCmp(u32 n, llvm::Value * a, llvm::Value * b); + /// Set CR field based on signed comparison + void SetCrFieldSignedCmp(u32 n, llvm::Value * a, llvm::Value * b); - /// Set CR field based on unsigned comparison - void SetCrFieldUnsignedCmp(u32 n, llvm::Value * a, llvm::Value * b); + /// Set CR field based on unsigned comparison + void SetCrFieldUnsignedCmp(u32 n, llvm::Value * a, llvm::Value * b); - /// Set CR6 based on the result of the vector compare instruction - void SetCr6AfterVectorCompare(u32 vr); + /// Set CR6 based on the result of the vector compare instruction + void SetCr6AfterVectorCompare(u32 vr); - /// Get LR - llvm::Value * GetLr(); + /// Get LR + llvm::Value * GetLr(); - /// Set LR - void SetLr(llvm::Value * val_x64); + /// Set LR + void SetLr(llvm::Value * val_x64); - /// Get CTR - llvm::Value * GetCtr(); + /// Get CTR + llvm::Value * GetCtr(); - /// Set CTR - void SetCtr(llvm::Value * val_x64); + /// Set CTR + void SetCtr(llvm::Value * val_x64); - /// Load XER and convert it to an i64 - llvm::Value * GetXer(); + /// Load XER and convert it to an i64 + llvm::Value * GetXer(); - /// Load XER and return the CA bit - llvm::Value * GetXerCa(); + /// Load XER and return the CA bit + llvm::Value * GetXerCa(); - /// Load XER and return the SO bit - llvm::Value * GetXerSo(); + /// Load XER and return the SO bit + llvm::Value * GetXerSo(); - /// Set XER - void SetXer(llvm::Value * val_x64); + /// Set XER + void SetXer(llvm::Value * val_x64); - /// Set the CA bit of XER - void SetXerCa(llvm::Value * ca); + /// Set the CA bit of XER + void SetXerCa(llvm::Value * ca); - /// Set the SO bit of XER - void SetXerSo(llvm::Value * so); + /// Set the SO bit of XER + void SetXerSo(llvm::Value * so); - /// Get VRSAVE - llvm::Value * GetVrsave(); + /// Get VRSAVE + llvm::Value * GetVrsave(); - /// Set VRSAVE - void SetVrsave(llvm::Value * val_x64); + /// Set VRSAVE + void SetVrsave(llvm::Value * val_x64); - /// Load FPSCR - llvm::Value * GetFpscr(); + /// Load FPSCR + llvm::Value * GetFpscr(); - /// Set FPSCR - void SetFpscr(llvm::Value * val_x32); + /// Set FPSCR + void SetFpscr(llvm::Value * val_x32); - /// Get FPR - llvm::Value * GetFpr(u32 r, u32 bits = 64, bool as_int = false); + /// Get FPR + llvm::Value * GetFpr(u32 r, u32 bits = 64, bool as_int = false); - /// Set FPR - void SetFpr(u32 r, llvm::Value * val); + /// Set FPR + void SetFpr(u32 r, llvm::Value * val); - /// Load VSCR - llvm::Value * GetVscr(); + /// Load VSCR + llvm::Value * GetVscr(); - /// Set VSCR - void SetVscr(llvm::Value * val_x32); + /// Set VSCR + void SetVscr(llvm::Value * val_x32); - /// Load VR - llvm::Value * GetVr(u32 vr); + /// Load VR + llvm::Value * GetVr(u32 vr); - /// Load VR and convert it to an integer vector - llvm::Value * GetVrAsIntVec(u32 vr, u32 vec_elt_num_bits); + /// Load VR and convert it to an integer vector + llvm::Value * GetVrAsIntVec(u32 vr, u32 vec_elt_num_bits); - /// Load VR and convert it to a float vector with 4 elements - llvm::Value * GetVrAsFloatVec(u32 vr); + /// Load VR and convert it to a float vector with 4 elements + llvm::Value * GetVrAsFloatVec(u32 vr); - /// Load VR and convert it to a double vector with 2 elements - llvm::Value * GetVrAsDoubleVec(u32 vr); + /// Load VR and convert it to a double vector with 2 elements + llvm::Value * GetVrAsDoubleVec(u32 vr); - /// Set VR to the specified value - void SetVr(u32 vr, llvm::Value * val_x128); + /// Set VR to the specified value + void SetVr(u32 vr, llvm::Value * val_x128); - /// Check condition for branch instructions - llvm::Value * CheckBranchCondition(u32 bo, u32 bi); + /// Check condition for branch instructions + llvm::Value * CheckBranchCondition(u32 bo, u32 bi); - /// Create IR for a branch instruction - void CreateBranch(llvm::Value * cmp_i1, llvm::Value * target_i32, bool lk, bool target_is_lr = false); + /// Create IR for a branch instruction + void CreateBranch(llvm::Value * cmp_i1, llvm::Value * target_i32, bool lk, bool target_is_lr = false); - /// Read from memory - llvm::Value * ReadMemory(llvm::Value * addr_i64, u32 bits, u32 alignment = 0, bool bswap = true, bool could_be_mmio = true); + /// Read from memory + llvm::Value * ReadMemory(llvm::Value * addr_i64, u32 bits, u32 alignment = 0, bool bswap = true, bool could_be_mmio = true); - /// Write to memory - void WriteMemory(llvm::Value * addr_i64, llvm::Value * val_ix, u32 alignment = 0, bool bswap = true, bool could_be_mmio = true); + /// Write to memory + void WriteMemory(llvm::Value * addr_i64, llvm::Value * val_ix, u32 alignment = 0, bool bswap = true, bool could_be_mmio = true); - /// Convert a C++ type to an LLVM type - template - llvm::Type * CppToLlvmType() { - if (std::is_void::value) { - return m_ir_builder->getVoidTy(); - } - else if (std::is_same::value || std::is_same::value) { - return m_ir_builder->getInt64Ty(); - } - else if (std::is_same::value || std::is_same::value) { - return m_ir_builder->getInt32Ty(); - } - else if (std::is_same::value || std::is_same::value) { - return m_ir_builder->getInt16Ty(); - } - else if (std::is_same::value || std::is_same::value) { - return m_ir_builder->getInt8Ty(); - } - else if (std::is_same::value) { - return m_ir_builder->getFloatTy(); - } - else if (std::is_same::value) { - return m_ir_builder->getDoubleTy(); - } - else if (std::is_same::value) { - return m_ir_builder->getInt1Ty(); - } - else if (std::is_pointer::value) { - return m_ir_builder->getInt8PtrTy(); - } - else { - assert(0); - } - - return nullptr; - } - - /// Call a function - template - llvm::Value * Call(const char * name, Func function, Args... args) { - auto fn = m_module->getFunction(name); - if (!fn) { - std::vector fn_args_type = { args->getType()... }; - auto fn_type = llvm::FunctionType::get(CppToLlvmType(), fn_args_type, false); - fn = llvm::cast(m_module->getOrInsertFunction(name, fn_type)); - fn->setCallingConv(llvm::CallingConv::X86_64_Win64); - // Note: not threadsafe - m_executableMap[name] = (Executable)(void *&)function; - } - - std::vector fn_args = { args... }; - return m_ir_builder->CreateCall(fn, fn_args); - } - - /// Indirect call - llvm::Value * IndirectCall(u32 address, llvm::Value * context_i64, bool is_function); - - /// Test an instruction against the interpreter - template - void VerifyInstructionAgainstInterpreter(const char * name, void (Compiler::*recomp_fn)(Args...), void (PPUInterpreter::*interp_fn)(Args...), PPUState & input_state, Args... args); - - /// Excute a test - void RunTest(const char * name, std::function test_case, std::function input, std::function check_result); - - /// Handle compilation errors - void CompilationError(const std::string & error); - - /// A mask used in rotate instructions - static u64 s_rotate_mask[64][64]; - - /// A flag indicating whether s_rotate_mask has been initialised or not - static bool s_rotate_mask_inited; - - /// Initialse s_rotate_mask - static void InitRotateMask(); + /// Convert a C++ type to an LLVM type + template + llvm::Type * CppToLlvmType() { + if (std::is_void::value) { + return m_ir_builder->getVoidTy(); + } + else if (std::is_same::value || std::is_same::value) { + return m_ir_builder->getInt64Ty(); + } + else if (std::is_same::value || std::is_same::value) { + return m_ir_builder->getInt32Ty(); + } + else if (std::is_same::value || std::is_same::value) { + return m_ir_builder->getInt16Ty(); + } + else if (std::is_same::value || std::is_same::value) { + return m_ir_builder->getInt8Ty(); + } + else if (std::is_same::value) { + return m_ir_builder->getFloatTy(); + } + else if (std::is_same::value) { + return m_ir_builder->getDoubleTy(); + } + else if (std::is_same::value) { + return m_ir_builder->getInt1Ty(); + } + else if (std::is_pointer::value) { + return m_ir_builder->getInt8PtrTy(); + } + else { + assert(0); + } + + return nullptr; + } + + /// Call a function + template + llvm::Value * Call(const char * name, Func function, Args... args) { + auto fn = m_module->getFunction(name); + if (!fn) { + std::vector fn_args_type = { args->getType()... }; + auto fn_type = llvm::FunctionType::get(CppToLlvmType(), fn_args_type, false); + fn = llvm::cast(m_module->getOrInsertFunction(name, fn_type)); + fn->setCallingConv(llvm::CallingConv::X86_64_Win64); + // Note: not threadsafe + m_executableMap[name] = (Executable)(void *&)function; + } + + std::vector fn_args = { args... }; + return m_ir_builder->CreateCall(fn, fn_args); + } + + /// Indirect call + llvm::Value * IndirectCall(u32 address, llvm::Value * context_i64, bool is_function); + + /// Test an instruction against the interpreter + template + void VerifyInstructionAgainstInterpreter(const char * name, void (Compiler::*recomp_fn)(Args...), void (PPUInterpreter::*interp_fn)(Args...), PPUState & input_state, Args... args); + + /// Excute a test + void RunTest(const char * name, std::function test_case, std::function input, std::function check_result); + + /// Handle compilation errors + void CompilationError(const std::string & error); + + /// A mask used in rotate instructions + static u64 s_rotate_mask[64][64]; + + /// A flag indicating whether s_rotate_mask has been initialised or not + static bool s_rotate_mask_inited; + + /// Initialse s_rotate_mask + static void InitRotateMask(); //ugly std::pair getFpmAndExec(); - }; + }; - /** - * Manages block compilation. - * PPUInterpreter1 execution is traced (using Tracer class) - * Periodically RecompilationEngine process traces result to find blocks - * whose compilation can improve performances. - * It then builds them asynchroneously and update the executable mapping - * using atomic based locks to avoid undefined behavior. - **/ - class RecompilationEngine final : protected thread_t { - public: - virtual ~RecompilationEngine() override; + /** + * Manages block compilation. + * PPUInterpreter1 execution is traced (using Tracer class) + * Periodically RecompilationEngine process traces result to find blocks + * whose compilation can improve performances. + * It then builds them asynchroneously and update the executable mapping + * using atomic based locks to avoid undefined behavior. + **/ + class RecompilationEngine final : protected thread_t { + public: + virtual ~RecompilationEngine() override; - /** - * Get the executable for the specified address - * The pointer is always valid during the lifetime of RecompilationEngine - * but the function pointed to can be updated. - **/ - const Executable *GetExecutable(u32 address, bool isFunction); + /** + * Get the executable for the specified address + * The pointer is always valid during the lifetime of RecompilationEngine + * but the function pointed to can be updated. + **/ + const Executable *GetExecutable(u32 address, bool isFunction); - /** - * Get the executable for the specified address if a compiled version is - * available, otherwise returns nullptr. - **/ - const Executable *GetCompiledExecutableIfAvailable(u32 address, std::mutex*); + /** + * Get the executable for the specified address if a compiled version is + * available, otherwise returns nullptr. + **/ + const Executable *GetCompiledExecutableIfAvailable(u32 address, std::mutex*); - /// Notify the recompilation engine about a newly detected trace. It takes ownership of the trace. - void NotifyTrace(ExecutionTrace * execution_trace); + /// Notify the recompilation engine about a newly detected trace. It takes ownership of the trace. + void NotifyTrace(ExecutionTrace * execution_trace); - /// Log - llvm::raw_fd_ostream & Log(); + /// Log + llvm::raw_fd_ostream & Log(); - void Task(); + void Task(); - /// Get a pointer to the instance of this class - static std::shared_ptr GetInstance(); + /// Get a pointer to the instance of this class + static std::shared_ptr GetInstance(); - private: - /// An entry in the block table - struct BlockEntry { - /// Number of times this block was hit - u32 num_hits; + private: + /// An entry in the block table + struct BlockEntry { + /// Number of times this block was hit + u32 num_hits; - /// The current revision number of this function - u32 revision; + /// The current revision number of this function + u32 revision; - /// Size of the CFG when it was last compiled - size_t last_compiled_cfg_size; + /// Size of the CFG when it was last compiled + size_t last_compiled_cfg_size; - /// The CFG for this block - ControlFlowGraph cfg; + /// The CFG for this block + ControlFlowGraph cfg; - /// Indicates whether the block has been compiled or not - bool is_compiled; + /// Indicates whether the block has been compiled or not + bool is_compiled; - BlockEntry(u32 start_address, u32 function_address) - : num_hits(0) - , revision(0) - , last_compiled_cfg_size(0) - , is_compiled(false) - , cfg(start_address, function_address) { - } + BlockEntry(u32 start_address, u32 function_address) + : num_hits(0) + , revision(0) + , last_compiled_cfg_size(0) + , is_compiled(false) + , cfg(start_address, function_address) { + } - std::string ToString() const { - return fmt::Format("0x%08X (0x%08X): NumHits=%u, Revision=%u, LastCompiledCfgSize=%u, IsCompiled=%c", - cfg.start_address, cfg.function_address, num_hits, revision, last_compiled_cfg_size, is_compiled ? 'Y' : 'N'); - } + std::string ToString() const { + return fmt::Format("0x%08X (0x%08X): NumHits=%u, Revision=%u, LastCompiledCfgSize=%u, IsCompiled=%c", + cfg.start_address, cfg.function_address, num_hits, revision, last_compiled_cfg_size, is_compiled ? 'Y' : 'N'); + } - bool operator == (const BlockEntry & other) const { - return cfg.start_address == other.cfg.start_address; - } + bool operator == (const BlockEntry & other) const { + return cfg.start_address == other.cfg.start_address; + } - bool IsFunction() const { - return cfg.function_address == cfg.start_address; - } + bool IsFunction() const { + return cfg.function_address == cfg.start_address; + } - struct hash { - size_t operator()(const BlockEntry * e) const { - return e->cfg.start_address; - } - }; + struct hash { + size_t operator()(const BlockEntry * e) const { + return e->cfg.start_address; + } + }; - struct equal_to { - bool operator()(const BlockEntry * lhs, const BlockEntry * rhs) const { - return *lhs == *rhs; - } - }; - }; + struct equal_to { + bool operator()(const BlockEntry * lhs, const BlockEntry * rhs) const { + return *lhs == *rhs; + } + }; + }; - /// Log - llvm::raw_fd_ostream * m_log; + /// Log + llvm::raw_fd_ostream * m_log; - /// Lock for accessing m_pending_execution_traces. TODO: Eliminate this and use a lock-free queue. - std::mutex m_pending_execution_traces_lock; + /// Lock for accessing m_pending_execution_traces. TODO: Eliminate this and use a lock-free queue. + std::mutex m_pending_execution_traces_lock; - /// Queue of execution traces pending processing - std::list m_pending_execution_traces; + /// Queue of execution traces pending processing + std::list m_pending_execution_traces; - /// Block table - std::unordered_set m_block_table; + /// Block table + std::unordered_set m_block_table; - /// Execution traces that have been already encountered. Data is the list of all blocks that this trace includes. - std::unordered_map> m_processed_execution_traces; + /// Execution traces that have been already encountered. Data is the list of all blocks that this trace includes. + std::unordered_map> m_processed_execution_traces; - /// Lock for accessing m_address_to_function. - std::mutex m_address_to_function_lock; + /// Lock for accessing m_address_to_function. + std::mutex m_address_to_function_lock; - /// (function, module containing function, times hit, mutex for access). - typedef std::tuple, u32, std::mutex> ExecutableStorage; - /// Address to ordinal cahce. Key is address. - std::unordered_map m_address_to_function; + /// (function, module containing function, times hit, mutex for access). + typedef std::tuple, u32, std::mutex> ExecutableStorage; + /// Address to ordinal cahce. Key is address. + std::unordered_map m_address_to_function; - /// The time at which the m_address_to_ordinal cache was last cleared - std::chrono::high_resolution_clock::time_point m_last_cache_clear_time; + /// The time at which the m_address_to_ordinal cache was last cleared + std::chrono::high_resolution_clock::time_point m_last_cache_clear_time; - /// Remove unused entries from the m_address_to_ordinal cache - void RemoveUnusedEntriesFromCache(); + /// Remove unused entries from the m_address_to_ordinal cache + void RemoveUnusedEntriesFromCache(); - /// PPU Compiler - Compiler m_compiler; + /// PPU Compiler + Compiler m_compiler; - RecompilationEngine(); + RecompilationEngine(); - RecompilationEngine(const RecompilationEngine & other) = delete; - RecompilationEngine(RecompilationEngine && other) = delete; + RecompilationEngine(const RecompilationEngine & other) = delete; + RecompilationEngine(RecompilationEngine && other) = delete; - RecompilationEngine & operator = (const RecompilationEngine & other) = delete; - RecompilationEngine & operator = (RecompilationEngine && other) = delete; + RecompilationEngine & operator = (const RecompilationEngine & other) = delete; + RecompilationEngine & operator = (RecompilationEngine && other) = delete; - /// Process an execution trace. - void ProcessExecutionTrace(const ExecutionTrace & execution_trace); + /// Process an execution trace. + void ProcessExecutionTrace(const ExecutionTrace & execution_trace); - /// Update a CFG - void UpdateControlFlowGraph(ControlFlowGraph & cfg, const ExecutionTraceEntry & this_entry, const ExecutionTraceEntry * next_entry); + /// Update a CFG + void UpdateControlFlowGraph(ControlFlowGraph & cfg, const ExecutionTraceEntry & this_entry, const ExecutionTraceEntry * next_entry); - /// Compile a block - void CompileBlock(BlockEntry & block_entry); + /// Compile a block + void CompileBlock(BlockEntry & block_entry); - /// Mutex used to prevent multiple creation - static std::mutex s_mutex; + /// Mutex used to prevent multiple creation + static std::mutex s_mutex; - /// The instance - static std::shared_ptr s_the_instance; - }; + /// The instance + static std::shared_ptr s_the_instance; + }; - /// Finds interesting execution sequences - class Tracer { - public: - /// Trace type - enum class TraceType : u32 { - CallFunction, - EnterFunction, - ExitFromCompiledFunction, - Return, - Instruction, - ExitFromCompiledBlock, - }; + /// Finds interesting execution sequences + class Tracer { + public: + /// Trace type + enum class TraceType : u32 { + CallFunction, + EnterFunction, + ExitFromCompiledFunction, + Return, + Instruction, + ExitFromCompiledBlock, + }; - Tracer(); + Tracer(); - Tracer(const Tracer & other) = delete; - Tracer(Tracer && other) = delete; + Tracer(const Tracer & other) = delete; + Tracer(Tracer && other) = delete; - virtual ~Tracer(); + virtual ~Tracer(); - Tracer & operator = (const Tracer & other) = delete; - Tracer & operator = (Tracer && other) = delete; + Tracer & operator = (const Tracer & other) = delete; + Tracer & operator = (Tracer && other) = delete; - /// Notify the tracer - void Trace(TraceType trace_type, u32 arg1, u32 arg2); + /// Notify the tracer + void Trace(TraceType trace_type, u32 arg1, u32 arg2); - /// Notify the tracer that the execution sequence is being terminated. - void Terminate(); + /// Notify the tracer that the execution sequence is being terminated. + void Terminate(); - private: - /// Call stack - std::vector m_stack; + private: + /// Call stack + std::vector m_stack; - /// Recompilation engine - std::shared_ptr m_recompilation_engine; - }; + /// Recompilation engine + std::shared_ptr m_recompilation_engine; + }; - /** - * PPU execution engine - * Relies on PPUInterpreter1 to execute uncompiled code. - * Traces execution to determine which block to compile. - * Use LLVM to compile block into native code. - */ - class CPUHybridDecoderRecompiler : public CPUDecoder { - friend class RecompilationEngine; - friend class Compiler; - public: - CPUHybridDecoderRecompiler(PPUThread & ppu); - CPUHybridDecoderRecompiler() = delete; + /** + * PPU execution engine + * Relies on PPUInterpreter1 to execute uncompiled code. + * Traces execution to determine which block to compile. + * Use LLVM to compile block into native code. + */ + class CPUHybridDecoderRecompiler : public CPUDecoder { + friend class RecompilationEngine; + friend class Compiler; + public: + CPUHybridDecoderRecompiler(PPUThread & ppu); + CPUHybridDecoderRecompiler() = delete; - CPUHybridDecoderRecompiler(const CPUHybridDecoderRecompiler & other) = delete; - CPUHybridDecoderRecompiler(CPUHybridDecoderRecompiler && other) = delete; + CPUHybridDecoderRecompiler(const CPUHybridDecoderRecompiler & other) = delete; + CPUHybridDecoderRecompiler(CPUHybridDecoderRecompiler && other) = delete; - virtual ~CPUHybridDecoderRecompiler(); + virtual ~CPUHybridDecoderRecompiler(); - CPUHybridDecoderRecompiler & operator = (const ExecutionEngine & other) = delete; - CPUHybridDecoderRecompiler & operator = (ExecutionEngine && other) = delete; + CPUHybridDecoderRecompiler & operator = (const ExecutionEngine & other) = delete; + CPUHybridDecoderRecompiler & operator = (ExecutionEngine && other) = delete; - u32 DecodeMemory(const u32 address) override; + u32 DecodeMemory(const u32 address) override; - private: - /// PPU processor context - PPUThread & m_ppu; + private: + /// PPU processor context + PPUThread & m_ppu; - /// PPU Interpreter - PPUInterpreter * m_interpreter; + /// PPU Interpreter + PPUInterpreter * m_interpreter; - /// PPU instruction Decoder - PPUDecoder m_decoder; + /// PPU instruction Decoder + PPUDecoder m_decoder; - /// Execution tracer - Tracer m_tracer; + /// Execution tracer + Tracer m_tracer; - /// Recompilation engine - std::shared_ptr m_recompilation_engine; + /// Recompilation engine + std::shared_ptr m_recompilation_engine; - /// Execute a function - static u32 ExecuteFunction(PPUThread * ppu_state, u64 context); + /// Execute a function + static u32 ExecuteFunction(PPUThread * ppu_state, u64 context); - /// Execute till the current function returns - static u32 ExecuteTillReturn(PPUThread * ppu_state, u64 context); + /// Execute till the current function returns + static u32 ExecuteTillReturn(PPUThread * ppu_state, u64 context); - /// Check thread status. Returns true if the thread must exit. - static bool PollStatus(PPUThread * ppu_state); - }; + /// Check thread status. Returns true if the thread must exit. + static bool PollStatus(PPUThread * ppu_state); + }; } #endif // LLVM_AVAILABLE diff --git a/rpcs3/Emu/Cell/PPULLVMRecompilerCore.cpp b/rpcs3/Emu/Cell/PPULLVMRecompilerCore.cpp index 205cbba030..42858493f8 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompilerCore.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompilerCore.cpp @@ -27,5292 +27,5291 @@ using namespace llvm; using namespace ppu_recompiler_llvm; void Compiler::NULL_OP() { - CompilationError("NULL_OP"); + CompilationError("NULL_OP"); } void Compiler::NOP() { - m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); } void Compiler::TDI(u32 to, u32 ra, s32 simm16) { - CompilationError("TDI"); + CompilationError("TDI"); } void Compiler::TWI(u32 to, u32 ra, s32 simm16) { - CompilationError("TWI"); + CompilationError("TWI"); } void Compiler::MFVSCR(u32 vd) { - auto vscr_i32 = GetVscr(); - auto vscr_i128 = m_ir_builder->CreateZExt(vscr_i32, m_ir_builder->getIntNTy(128)); - SetVr(vd, vscr_i128); + auto vscr_i32 = GetVscr(); + auto vscr_i128 = m_ir_builder->CreateZExt(vscr_i32, m_ir_builder->getIntNTy(128)); + SetVr(vd, vscr_i128); } void Compiler::MTVSCR(u32 vb) { - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto vscr_i32 = m_ir_builder->CreateExtractElement(vb_v4i32, m_ir_builder->getInt32(0)); - vscr_i32 = m_ir_builder->CreateAnd(vscr_i32, 0x00010001); - SetVscr(vscr_i32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto vscr_i32 = m_ir_builder->CreateExtractElement(vb_v4i32, m_ir_builder->getInt32(0)); + vscr_i32 = m_ir_builder->CreateAnd(vscr_i32, 0x00010001); + SetVscr(vscr_i32); } void Compiler::VADDCUW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); - va_v4i32 = m_ir_builder->CreateNot(va_v4i32); - auto cmpv4i1 = m_ir_builder->CreateICmpULT(va_v4i32, vb_v4i32); - auto cmpv4i32 = m_ir_builder->CreateZExt(cmpv4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, cmpv4i32); + va_v4i32 = m_ir_builder->CreateNot(va_v4i32); + auto cmpv4i1 = m_ir_builder->CreateICmpULT(va_v4i32, vb_v4i32); + auto cmpv4i32 = m_ir_builder->CreateZExt(cmpv4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, cmpv4i32); } void Compiler::VADDFP(u32 vd, u32 va, u32 vb) { - auto va_v4f32 = GetVrAsFloatVec(va); - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto sum_v4f32 = m_ir_builder->CreateFAdd(va_v4f32, vb_v4f32); - SetVr(vd, sum_v4f32); + auto va_v4f32 = GetVrAsFloatVec(va); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto sum_v4f32 = m_ir_builder->CreateFAdd(va_v4f32, vb_v4f32); + SetVr(vd, sum_v4f32); } void Compiler::VADDSBS(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto sum_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_padds_b), va_v16i8, vb_v16i8); - SetVr(vd, sum_v16i8); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto sum_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_padds_b), va_v16i8, vb_v16i8); + SetVr(vd, sum_v16i8); - // TODO: Set VSCR.SAT + // TODO: Set VSCR.SAT } void Compiler::VADDSHS(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto sum_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_padds_w), va_v8i16, vb_v8i16); - SetVr(vd, sum_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto sum_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_padds_w), va_v8i16, vb_v8i16); + SetVr(vd, sum_v8i16); - // TODO: Set VSCR.SAT + // TODO: Set VSCR.SAT } void Compiler::VADDSWS(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); - // It looks like x86 does not have an instruction to add 32 bit intergers with signed/unsigned saturation. - // To implement add with saturation, we first determine what the result would be if the operation were to cause - // an overflow. If two -ve numbers are being added and cause an overflow, the result would be 0x80000000. - // If two +ve numbers are being added and cause an overflow, the result would be 0x7FFFFFFF. Addition of a -ve - // number and a +ve number cannot cause overflow. So the result in case of an overflow is 0x7FFFFFFF + sign bit - // of any one of the operands. - auto tmp1_v4i32 = m_ir_builder->CreateLShr(va_v4i32, 31); - tmp1_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x7FFFFFFF))); - auto tmp1_v16i8 = m_ir_builder->CreateBitCast(tmp1_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + // It looks like x86 does not have an instruction to add 32 bit intergers with signed/unsigned saturation. + // To implement add with saturation, we first determine what the result would be if the operation were to cause + // an overflow. If two -ve numbers are being added and cause an overflow, the result would be 0x80000000. + // If two +ve numbers are being added and cause an overflow, the result would be 0x7FFFFFFF. Addition of a -ve + // number and a +ve number cannot cause overflow. So the result in case of an overflow is 0x7FFFFFFF + sign bit + // of any one of the operands. + auto tmp1_v4i32 = m_ir_builder->CreateLShr(va_v4i32, 31); + tmp1_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x7FFFFFFF))); + auto tmp1_v16i8 = m_ir_builder->CreateBitCast(tmp1_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - // Next, we find if the addition can actually result in an overflow. Since an overflow can only happen if the operands - // have the same sign, we bitwise XOR both the operands. If the sign bit of the result is 0 then the operands have the - // same sign and so may cause an overflow. We invert the result so that the sign bit is 1 when the operands have the - // same sign. - auto tmp2_v4i32 = m_ir_builder->CreateXor(va_v4i32, vb_v4i32); - tmp2_v4i32 = m_ir_builder->CreateNot(tmp2_v4i32); + // Next, we find if the addition can actually result in an overflow. Since an overflow can only happen if the operands + // have the same sign, we bitwise XOR both the operands. If the sign bit of the result is 0 then the operands have the + // same sign and so may cause an overflow. We invert the result so that the sign bit is 1 when the operands have the + // same sign. + auto tmp2_v4i32 = m_ir_builder->CreateXor(va_v4i32, vb_v4i32); + tmp2_v4i32 = m_ir_builder->CreateNot(tmp2_v4i32); - // Perform the sum. - auto sum_v4i32 = m_ir_builder->CreateAdd(va_v4i32, vb_v4i32); - auto sum_v16i8 = m_ir_builder->CreateBitCast(sum_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + // Perform the sum. + auto sum_v4i32 = m_ir_builder->CreateAdd(va_v4i32, vb_v4i32); + auto sum_v16i8 = m_ir_builder->CreateBitCast(sum_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - // If an overflow occurs, then the sign of the sum will be different from the sign of the operands. So, we xor the - // result with one of the operands. The sign bit of the result will be 1 if the sign bit of the sum and the sign bit of the - // result is different. This result is again ANDed with tmp3 (the sign bit of tmp3 is 1 only if the operands have the same - // sign and so can cause an overflow). - auto tmp3_v4i32 = m_ir_builder->CreateXor(va_v4i32, sum_v4i32); - tmp3_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, tmp3_v4i32); - tmp3_v4i32 = m_ir_builder->CreateAShr(tmp3_v4i32, 31); - auto tmp3_v16i8 = m_ir_builder->CreateBitCast(tmp3_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + // If an overflow occurs, then the sign of the sum will be different from the sign of the operands. So, we xor the + // result with one of the operands. The sign bit of the result will be 1 if the sign bit of the sum and the sign bit of the + // result is different. This result is again ANDed with tmp3 (the sign bit of tmp3 is 1 only if the operands have the same + // sign and so can cause an overflow). + auto tmp3_v4i32 = m_ir_builder->CreateXor(va_v4i32, sum_v4i32); + tmp3_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, tmp3_v4i32); + tmp3_v4i32 = m_ir_builder->CreateAShr(tmp3_v4i32, 31); + auto tmp3_v16i8 = m_ir_builder->CreateBitCast(tmp3_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - // tmp4 is equal to 0xFFFFFFFF if an overflow occured and 0x00000000 otherwise. - auto res_v16i8 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pblendvb), sum_v16i8, tmp1_v16i8, tmp3_v16i8); - SetVr(vd, res_v16i8); + // tmp4 is equal to 0xFFFFFFFF if an overflow occured and 0x00000000 otherwise. + auto res_v16i8 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pblendvb), sum_v16i8, tmp1_v16i8, tmp3_v16i8); + SetVr(vd, res_v16i8); - // TODO: Set SAT + // TODO: Set SAT } void Compiler::VADDUBM(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto sum_v16i8 = m_ir_builder->CreateAdd(va_v16i8, vb_v16i8); - SetVr(vd, sum_v16i8); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto sum_v16i8 = m_ir_builder->CreateAdd(va_v16i8, vb_v16i8); + SetVr(vd, sum_v16i8); } void Compiler::VADDUBS(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto sum_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_paddus_b), va_v16i8, vb_v16i8); - SetVr(vd, sum_v16i8); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto sum_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_paddus_b), va_v16i8, vb_v16i8); + SetVr(vd, sum_v16i8); - // TODO: Set SAT + // TODO: Set SAT } void Compiler::VADDUHM(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto sum_v8i16 = m_ir_builder->CreateAdd(va_v8i16, vb_v8i16); - SetVr(vd, sum_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto sum_v8i16 = m_ir_builder->CreateAdd(va_v8i16, vb_v8i16); + SetVr(vd, sum_v8i16); } void Compiler::VADDUHS(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto sum_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_paddus_w), va_v8i16, vb_v8i16); - SetVr(vd, sum_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto sum_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_paddus_w), va_v8i16, vb_v8i16); + SetVr(vd, sum_v8i16); - // TODO: Set SAT + // TODO: Set SAT } void Compiler::VADDUWM(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto sum_v4i32 = m_ir_builder->CreateAdd(va_v4i32, vb_v4i32); - SetVr(vd, sum_v4i32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto sum_v4i32 = m_ir_builder->CreateAdd(va_v4i32, vb_v4i32); + SetVr(vd, sum_v4i32); } void Compiler::VADDUWS(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto sum_v4i32 = m_ir_builder->CreateAdd(va_v4i32, vb_v4i32); - auto cmp_v4i1 = m_ir_builder->CreateICmpULT(sum_v4i32, va_v4i32); - auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - auto res_v4i32 = m_ir_builder->CreateOr(sum_v4i32, cmp_v4i32); - SetVr(vd, res_v4i32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto sum_v4i32 = m_ir_builder->CreateAdd(va_v4i32, vb_v4i32); + auto cmp_v4i1 = m_ir_builder->CreateICmpULT(sum_v4i32, va_v4i32); + auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + auto res_v4i32 = m_ir_builder->CreateOr(sum_v4i32, cmp_v4i32); + SetVr(vd, res_v4i32); - // TODO: Set SAT + // TODO: Set SAT } void Compiler::VAND(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto res_v4i32 = m_ir_builder->CreateAnd(va_v4i32, vb_v4i32); - SetVr(vd, res_v4i32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto res_v4i32 = m_ir_builder->CreateAnd(va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); } void Compiler::VANDC(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - vb_v4i32 = m_ir_builder->CreateNot(vb_v4i32); - auto res_v4i32 = m_ir_builder->CreateAnd(va_v4i32, vb_v4i32); - SetVr(vd, res_v4i32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + vb_v4i32 = m_ir_builder->CreateNot(vb_v4i32); + auto res_v4i32 = m_ir_builder->CreateAnd(va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); } void Compiler::VAVGSB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto va_v16i16 = m_ir_builder->CreateSExt(va_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16)); - auto vb_v16i16 = m_ir_builder->CreateSExt(vb_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16)); - auto sum_v16i16 = m_ir_builder->CreateAdd(va_v16i16, vb_v16i16); - sum_v16i16 = m_ir_builder->CreateAdd(sum_v16i16, m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt16(1))); - auto avg_v16i16 = m_ir_builder->CreateAShr(sum_v16i16, 1); - auto avg_v16i8 = m_ir_builder->CreateTrunc(avg_v16i16, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - SetVr(vd, avg_v16i8); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto va_v16i16 = m_ir_builder->CreateSExt(va_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16)); + auto vb_v16i16 = m_ir_builder->CreateSExt(vb_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16)); + auto sum_v16i16 = m_ir_builder->CreateAdd(va_v16i16, vb_v16i16); + sum_v16i16 = m_ir_builder->CreateAdd(sum_v16i16, m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt16(1))); + auto avg_v16i16 = m_ir_builder->CreateAShr(sum_v16i16, 1); + auto avg_v16i8 = m_ir_builder->CreateTrunc(avg_v16i16, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + SetVr(vd, avg_v16i8); } void Compiler::VAVGSH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto va_v8i32 = m_ir_builder->CreateSExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); - auto vb_v8i32 = m_ir_builder->CreateSExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); - auto sum_v8i32 = m_ir_builder->CreateAdd(va_v8i32, vb_v8i32); - sum_v8i32 = m_ir_builder->CreateAdd(sum_v8i32, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt32(1))); - auto avg_v8i32 = m_ir_builder->CreateAShr(sum_v8i32, 1); - auto avg_v8i16 = m_ir_builder->CreateTrunc(avg_v8i32, VectorType::get(m_ir_builder->getInt16Ty(), 8)); - SetVr(vd, avg_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto va_v8i32 = m_ir_builder->CreateSExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto vb_v8i32 = m_ir_builder->CreateSExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto sum_v8i32 = m_ir_builder->CreateAdd(va_v8i32, vb_v8i32); + sum_v8i32 = m_ir_builder->CreateAdd(sum_v8i32, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt32(1))); + auto avg_v8i32 = m_ir_builder->CreateAShr(sum_v8i32, 1); + auto avg_v8i16 = m_ir_builder->CreateTrunc(avg_v8i32, VectorType::get(m_ir_builder->getInt16Ty(), 8)); + SetVr(vd, avg_v8i16); } void Compiler::VAVGSW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto va_v4i64 = m_ir_builder->CreateSExt(va_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); - auto vb_v4i64 = m_ir_builder->CreateSExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); - auto sum_v4i64 = m_ir_builder->CreateAdd(va_v4i64, vb_v4i64); - sum_v4i64 = m_ir_builder->CreateAdd(sum_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(1))); - auto avg_v4i64 = m_ir_builder->CreateAShr(sum_v4i64, 1); - auto avg_v4i32 = m_ir_builder->CreateTrunc(avg_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, avg_v4i32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto va_v4i64 = m_ir_builder->CreateSExt(va_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); + auto vb_v4i64 = m_ir_builder->CreateSExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); + auto sum_v4i64 = m_ir_builder->CreateAdd(va_v4i64, vb_v4i64); + sum_v4i64 = m_ir_builder->CreateAdd(sum_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(1))); + auto avg_v4i64 = m_ir_builder->CreateAShr(sum_v4i64, 1); + auto avg_v4i32 = m_ir_builder->CreateTrunc(avg_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, avg_v4i32); } void Compiler::VAVGUB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto avg_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pavg_b), va_v16i8, vb_v16i8); - SetVr(vd, avg_v16i8); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto avg_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pavg_b), va_v16i8, vb_v16i8); + SetVr(vd, avg_v16i8); } void Compiler::VAVGUH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto avg_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pavg_w), va_v8i16, vb_v8i16); - SetVr(vd, avg_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto avg_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pavg_w), va_v8i16, vb_v8i16); + SetVr(vd, avg_v8i16); } void Compiler::VAVGUW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto va_v4i64 = m_ir_builder->CreateZExt(va_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); - auto vb_v4i64 = m_ir_builder->CreateZExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); - auto sum_v4i64 = m_ir_builder->CreateAdd(va_v4i64, vb_v4i64); - sum_v4i64 = m_ir_builder->CreateAdd(sum_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(1))); - auto avg_v4i64 = m_ir_builder->CreateLShr(sum_v4i64, 1); - auto avg_v4i32 = m_ir_builder->CreateTrunc(avg_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, avg_v4i32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto va_v4i64 = m_ir_builder->CreateZExt(va_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); + auto vb_v4i64 = m_ir_builder->CreateZExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); + auto sum_v4i64 = m_ir_builder->CreateAdd(va_v4i64, vb_v4i64); + sum_v4i64 = m_ir_builder->CreateAdd(sum_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(1))); + auto avg_v4i64 = m_ir_builder->CreateLShr(sum_v4i64, 1); + auto avg_v4i32 = m_ir_builder->CreateTrunc(avg_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, avg_v4i32); } void Compiler::VCFSX(u32 vd, u32 uimm5, u32 vb) { - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto res_v4f32 = m_ir_builder->CreateSIToFP(vb_v4i32, VectorType::get(m_ir_builder->getFloatTy(), 4)); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto res_v4f32 = m_ir_builder->CreateSIToFP(vb_v4i32, VectorType::get(m_ir_builder->getFloatTy(), 4)); - if (uimm5) { - float scale = (float)((u64)1 << uimm5); - res_v4f32 = m_ir_builder->CreateFDiv(res_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), scale))); - } + if (uimm5) { + float scale = (float)((u64)1 << uimm5); + res_v4f32 = m_ir_builder->CreateFDiv(res_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), scale))); + } - SetVr(vd, res_v4f32); + SetVr(vd, res_v4f32); } void Compiler::VCFUX(u32 vd, u32 uimm5, u32 vb) { - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto res_v4f32 = m_ir_builder->CreateUIToFP(vb_v4i32, VectorType::get(m_ir_builder->getFloatTy(), 4)); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto res_v4f32 = m_ir_builder->CreateUIToFP(vb_v4i32, VectorType::get(m_ir_builder->getFloatTy(), 4)); - if (uimm5) { - float scale = (float)((u64)1 << uimm5); - res_v4f32 = m_ir_builder->CreateFDiv(res_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), scale))); - } + if (uimm5) { + float scale = (float)((u64)1 << uimm5); + res_v4f32 = m_ir_builder->CreateFDiv(res_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), scale))); + } - SetVr(vd, res_v4f32); + SetVr(vd, res_v4f32); } void Compiler::VCMPBFP(u32 vd, u32 va, u32 vb) { - auto va_v4f32 = GetVrAsFloatVec(va); - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto cmp_gt_v4i1 = m_ir_builder->CreateFCmpOGT(va_v4f32, vb_v4f32); - vb_v4f32 = m_ir_builder->CreateFNeg(vb_v4f32); - auto cmp_lt_v4i1 = m_ir_builder->CreateFCmpOLT(va_v4f32, vb_v4f32); - auto cmp_gt_v4i32 = m_ir_builder->CreateZExt(cmp_gt_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - auto cmp_lt_v4i32 = m_ir_builder->CreateZExt(cmp_lt_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - cmp_gt_v4i32 = m_ir_builder->CreateShl(cmp_gt_v4i32, 31); - cmp_lt_v4i32 = m_ir_builder->CreateShl(cmp_lt_v4i32, 30); - auto res_v4i32 = m_ir_builder->CreateOr(cmp_gt_v4i32, cmp_lt_v4i32); - SetVr(vd, res_v4i32); + auto va_v4f32 = GetVrAsFloatVec(va); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto cmp_gt_v4i1 = m_ir_builder->CreateFCmpOGT(va_v4f32, vb_v4f32); + vb_v4f32 = m_ir_builder->CreateFNeg(vb_v4f32); + auto cmp_lt_v4i1 = m_ir_builder->CreateFCmpOLT(va_v4f32, vb_v4f32); + auto cmp_gt_v4i32 = m_ir_builder->CreateZExt(cmp_gt_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + auto cmp_lt_v4i32 = m_ir_builder->CreateZExt(cmp_lt_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + cmp_gt_v4i32 = m_ir_builder->CreateShl(cmp_gt_v4i32, 31); + cmp_lt_v4i32 = m_ir_builder->CreateShl(cmp_lt_v4i32, 30); + auto res_v4i32 = m_ir_builder->CreateOr(cmp_gt_v4i32, cmp_lt_v4i32); + SetVr(vd, res_v4i32); - // TODO: Implement NJ mode + // TODO: Implement NJ mode } void Compiler::VCMPBFP_(u32 vd, u32 va, u32 vb) { - VCMPBFP(vd, va, vb); + VCMPBFP(vd, va, vb); - auto vd_v16i8 = GetVrAsIntVec(vd, 8); - u32 mask_v16i32[16] = { 3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - vd_v16i8 = m_ir_builder->CreateShuffleVector(vd_v16i8, UndefValue::get(VectorType::get(m_ir_builder->getInt8Ty(), 16)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32)); - auto vd_v4i32 = m_ir_builder->CreateBitCast(vd_v16i8, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - auto vd_mask_i32 = m_ir_builder->CreateExtractElement(vd_v4i32, m_ir_builder->getInt32(0)); - auto cmp_i1 = m_ir_builder->CreateICmpEQ(vd_mask_i32, m_ir_builder->getInt32(0)); - SetCrField(6, nullptr, nullptr, cmp_i1, nullptr); + auto vd_v16i8 = GetVrAsIntVec(vd, 8); + u32 mask_v16i32[16] = { 3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + vd_v16i8 = m_ir_builder->CreateShuffleVector(vd_v16i8, UndefValue::get(VectorType::get(m_ir_builder->getInt8Ty(), 16)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32)); + auto vd_v4i32 = m_ir_builder->CreateBitCast(vd_v16i8, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + auto vd_mask_i32 = m_ir_builder->CreateExtractElement(vd_v4i32, m_ir_builder->getInt32(0)); + auto cmp_i1 = m_ir_builder->CreateICmpEQ(vd_mask_i32, m_ir_builder->getInt32(0)); + SetCrField(6, nullptr, nullptr, cmp_i1, nullptr); } void Compiler::VCMPEQFP(u32 vd, u32 va, u32 vb) { - auto va_v4f32 = GetVrAsFloatVec(va); - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto cmp_v4i1 = m_ir_builder->CreateFCmpOEQ(va_v4f32, vb_v4f32); - auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, cmp_v4i32); + auto va_v4f32 = GetVrAsFloatVec(va); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto cmp_v4i1 = m_ir_builder->CreateFCmpOEQ(va_v4f32, vb_v4f32); + auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, cmp_v4i32); } void Compiler::VCMPEQFP_(u32 vd, u32 va, u32 vb) { - VCMPEQFP(vd, va, vb); - SetCr6AfterVectorCompare(vd); + VCMPEQFP(vd, va, vb); + SetCr6AfterVectorCompare(vd); } void Compiler::VCMPEQUB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto cmp_v16i1 = m_ir_builder->CreateICmpEQ(va_v16i8, vb_v16i8); - auto cmp_v16i8 = m_ir_builder->CreateSExt(cmp_v16i1, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - SetVr(vd, cmp_v16i8); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto cmp_v16i1 = m_ir_builder->CreateICmpEQ(va_v16i8, vb_v16i8); + auto cmp_v16i8 = m_ir_builder->CreateSExt(cmp_v16i1, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + SetVr(vd, cmp_v16i8); } void Compiler::VCMPEQUB_(u32 vd, u32 va, u32 vb) { - VCMPEQUB(vd, va, vb); - SetCr6AfterVectorCompare(vd); + VCMPEQUB(vd, va, vb); + SetCr6AfterVectorCompare(vd); } void Compiler::VCMPEQUH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto cmp_v8i1 = m_ir_builder->CreateICmpEQ(va_v8i16, vb_v8i16); - auto cmp_v8i16 = m_ir_builder->CreateSExt(cmp_v8i1, VectorType::get(m_ir_builder->getInt16Ty(), 8)); - SetVr(vd, cmp_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto cmp_v8i1 = m_ir_builder->CreateICmpEQ(va_v8i16, vb_v8i16); + auto cmp_v8i16 = m_ir_builder->CreateSExt(cmp_v8i1, VectorType::get(m_ir_builder->getInt16Ty(), 8)); + SetVr(vd, cmp_v8i16); } void Compiler::VCMPEQUH_(u32 vd, u32 va, u32 vb) { - VCMPEQUH(vd, va, vb); - SetCr6AfterVectorCompare(vd); + VCMPEQUH(vd, va, vb); + SetCr6AfterVectorCompare(vd); } void Compiler::VCMPEQUW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto cmp_v4i1 = m_ir_builder->CreateICmpEQ(va_v4i32, vb_v4i32); - auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, cmp_v4i32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto cmp_v4i1 = m_ir_builder->CreateICmpEQ(va_v4i32, vb_v4i32); + auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, cmp_v4i32); } void Compiler::VCMPEQUW_(u32 vd, u32 va, u32 vb) { - VCMPEQUW(vd, va, vb); - SetCr6AfterVectorCompare(vd); + VCMPEQUW(vd, va, vb); + SetCr6AfterVectorCompare(vd); } void Compiler::VCMPGEFP(u32 vd, u32 va, u32 vb) { - auto va_v4f32 = GetVrAsFloatVec(va); - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto cmp_v4i1 = m_ir_builder->CreateFCmpOGE(va_v4f32, vb_v4f32); - auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, cmp_v4i32); + auto va_v4f32 = GetVrAsFloatVec(va); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto cmp_v4i1 = m_ir_builder->CreateFCmpOGE(va_v4f32, vb_v4f32); + auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, cmp_v4i32); } void Compiler::VCMPGEFP_(u32 vd, u32 va, u32 vb) { - VCMPGEFP(vd, va, vb); - SetCr6AfterVectorCompare(vd); + VCMPGEFP(vd, va, vb); + SetCr6AfterVectorCompare(vd); } void Compiler::VCMPGTFP(u32 vd, u32 va, u32 vb) { - auto va_v4f32 = GetVrAsFloatVec(va); - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto cmp_v4i1 = m_ir_builder->CreateFCmpOGT(va_v4f32, vb_v4f32); - auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, cmp_v4i32); + auto va_v4f32 = GetVrAsFloatVec(va); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto cmp_v4i1 = m_ir_builder->CreateFCmpOGT(va_v4f32, vb_v4f32); + auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, cmp_v4i32); } void Compiler::VCMPGTFP_(u32 vd, u32 va, u32 vb) { - VCMPGTFP(vd, va, vb); - SetCr6AfterVectorCompare(vd); + VCMPGTFP(vd, va, vb); + SetCr6AfterVectorCompare(vd); } void Compiler::VCMPGTSB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto cmp_v16i1 = m_ir_builder->CreateICmpSGT(va_v16i8, vb_v16i8); - auto cmp_v16i8 = m_ir_builder->CreateSExt(cmp_v16i1, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - SetVr(vd, cmp_v16i8); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto cmp_v16i1 = m_ir_builder->CreateICmpSGT(va_v16i8, vb_v16i8); + auto cmp_v16i8 = m_ir_builder->CreateSExt(cmp_v16i1, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + SetVr(vd, cmp_v16i8); } void Compiler::VCMPGTSB_(u32 vd, u32 va, u32 vb) { - VCMPGTSB(vd, va, vb); - SetCr6AfterVectorCompare(vd); + VCMPGTSB(vd, va, vb); + SetCr6AfterVectorCompare(vd); } void Compiler::VCMPGTSH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto cmp_v8i1 = m_ir_builder->CreateICmpSGT(va_v8i16, vb_v8i16); - auto cmp_v8i16 = m_ir_builder->CreateSExt(cmp_v8i1, VectorType::get(m_ir_builder->getInt16Ty(), 8)); - SetVr(vd, cmp_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto cmp_v8i1 = m_ir_builder->CreateICmpSGT(va_v8i16, vb_v8i16); + auto cmp_v8i16 = m_ir_builder->CreateSExt(cmp_v8i1, VectorType::get(m_ir_builder->getInt16Ty(), 8)); + SetVr(vd, cmp_v8i16); } void Compiler::VCMPGTSH_(u32 vd, u32 va, u32 vb) { - VCMPGTSH(vd, va, vb); - SetCr6AfterVectorCompare(vd); + VCMPGTSH(vd, va, vb); + SetCr6AfterVectorCompare(vd); } void Compiler::VCMPGTSW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto cmp_v4i1 = m_ir_builder->CreateICmpSGT(va_v4i32, vb_v4i32); - auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, cmp_v4i32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto cmp_v4i1 = m_ir_builder->CreateICmpSGT(va_v4i32, vb_v4i32); + auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, cmp_v4i32); } void Compiler::VCMPGTSW_(u32 vd, u32 va, u32 vb) { - VCMPGTSW(vd, va, vb); - SetCr6AfterVectorCompare(vd); + VCMPGTSW(vd, va, vb); + SetCr6AfterVectorCompare(vd); } void Compiler::VCMPGTUB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto cmp_v16i1 = m_ir_builder->CreateICmpUGT(va_v16i8, vb_v16i8); - auto cmp_v16i8 = m_ir_builder->CreateSExt(cmp_v16i1, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - SetVr(vd, cmp_v16i8); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto cmp_v16i1 = m_ir_builder->CreateICmpUGT(va_v16i8, vb_v16i8); + auto cmp_v16i8 = m_ir_builder->CreateSExt(cmp_v16i1, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + SetVr(vd, cmp_v16i8); } void Compiler::VCMPGTUB_(u32 vd, u32 va, u32 vb) { - VCMPGTUB(vd, va, vb); - SetCr6AfterVectorCompare(vd); + VCMPGTUB(vd, va, vb); + SetCr6AfterVectorCompare(vd); } void Compiler::VCMPGTUH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto cmp_v8i1 = m_ir_builder->CreateICmpUGT(va_v8i16, vb_v8i16); - auto cmp_v8i16 = m_ir_builder->CreateSExt(cmp_v8i1, VectorType::get(m_ir_builder->getInt16Ty(), 8)); - SetVr(vd, cmp_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto cmp_v8i1 = m_ir_builder->CreateICmpUGT(va_v8i16, vb_v8i16); + auto cmp_v8i16 = m_ir_builder->CreateSExt(cmp_v8i1, VectorType::get(m_ir_builder->getInt16Ty(), 8)); + SetVr(vd, cmp_v8i16); } void Compiler::VCMPGTUH_(u32 vd, u32 va, u32 vb) { - VCMPGTUH(vd, va, vb); - SetCr6AfterVectorCompare(vd); + VCMPGTUH(vd, va, vb); + SetCr6AfterVectorCompare(vd); } void Compiler::VCMPGTUW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto cmp_v4i1 = m_ir_builder->CreateICmpUGT(va_v4i32, vb_v4i32); - auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, cmp_v4i32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto cmp_v4i1 = m_ir_builder->CreateICmpUGT(va_v4i32, vb_v4i32); + auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, cmp_v4i32); } void Compiler::VCMPGTUW_(u32 vd, u32 va, u32 vb) { - VCMPGTUW(vd, va, vb); - SetCr6AfterVectorCompare(vd); + VCMPGTUW(vd, va, vb); + SetCr6AfterVectorCompare(vd); } void Compiler::VCTSXS(u32 vd, u32 uimm5, u32 vb) { - auto vb_v4f32 = GetVrAsFloatVec(vb); - if (uimm5) { - vb_v4f32 = m_ir_builder->CreateFMul(vb_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 1 << uimm5))); - } + auto vb_v4f32 = GetVrAsFloatVec(vb); + if (uimm5) { + vb_v4f32 = m_ir_builder->CreateFMul(vb_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 1 << uimm5))); + } - auto res_v4i32 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_cvtps2dq), vb_v4f32); - auto cmp_v4i1 = m_ir_builder->CreateFCmpOGE(vb_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 0x7FFFFFFF))); - auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - res_v4i32 = m_ir_builder->CreateXor(cmp_v4i32, res_v4i32); - SetVr(vd, res_v4i32); + auto res_v4i32 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_cvtps2dq), vb_v4f32); + auto cmp_v4i1 = m_ir_builder->CreateFCmpOGE(vb_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 0x7FFFFFFF))); + auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + res_v4i32 = m_ir_builder->CreateXor(cmp_v4i32, res_v4i32); + SetVr(vd, res_v4i32); - // TODO: Set VSCR.SAT + // TODO: Set VSCR.SAT } void Compiler::VCTUXS(u32 vd, u32 uimm5, u32 vb) { - auto vb_v4f32 = GetVrAsFloatVec(vb); - if (uimm5) { - vb_v4f32 = m_ir_builder->CreateFMul(vb_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 1 << uimm5))); - } + auto vb_v4f32 = GetVrAsFloatVec(vb); + if (uimm5) { + vb_v4f32 = m_ir_builder->CreateFMul(vb_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 1 << uimm5))); + } - auto res_v4f32 = (Value *)m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse_max_ps), vb_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 0))); - auto cmp_v4i1 = m_ir_builder->CreateFCmpOGE(res_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 0xFFFFFFFFu))); - auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - auto res_v4i32 = m_ir_builder->CreateFPToUI(res_v4f32, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - res_v4i32 = m_ir_builder->CreateOr(res_v4i32, cmp_v4i32); - SetVr(vd, res_v4i32); + auto res_v4f32 = (Value *)m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse_max_ps), vb_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 0))); + auto cmp_v4i1 = m_ir_builder->CreateFCmpOGE(res_v4f32, m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 0xFFFFFFFFu))); + auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + auto res_v4i32 = m_ir_builder->CreateFPToUI(res_v4f32, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + res_v4i32 = m_ir_builder->CreateOr(res_v4i32, cmp_v4i32); + SetVr(vd, res_v4i32); - // TODO: Set VSCR.SAT + // TODO: Set VSCR.SAT } void Compiler::VEXPTEFP(u32 vd, u32 vb) { - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto res_v4f32 = (Value *)m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::pow, VectorType::get(m_ir_builder->getFloatTy(), 4)), - m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 2.0f)), vb_v4f32); - SetVr(vd, res_v4f32); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto res_v4f32 = (Value *)m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::pow, VectorType::get(m_ir_builder->getFloatTy(), 4)), + m_ir_builder->CreateVectorSplat(4, ConstantFP::get(m_ir_builder->getFloatTy(), 2.0f)), vb_v4f32); + SetVr(vd, res_v4f32); } void Compiler::VLOGEFP(u32 vd, u32 vb) { - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto res_v4f32 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::log2, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); - SetVr(vd, res_v4f32); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto res_v4f32 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::log2, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); + SetVr(vd, res_v4f32); } void Compiler::VMADDFP(u32 vd, u32 va, u32 vc, u32 vb) { - auto va_v4f32 = GetVrAsFloatVec(va); - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto vc_v4f32 = GetVrAsFloatVec(vc); - auto res_v4f32 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, VectorType::get(m_ir_builder->getFloatTy(), 4)), va_v4f32, vc_v4f32, vb_v4f32); - SetVr(vd, res_v4f32); + auto va_v4f32 = GetVrAsFloatVec(va); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto vc_v4f32 = GetVrAsFloatVec(vc); + auto res_v4f32 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, VectorType::get(m_ir_builder->getFloatTy(), 4)), va_v4f32, vc_v4f32, vb_v4f32); + SetVr(vd, res_v4f32); } void Compiler::VMAXFP(u32 vd, u32 va, u32 vb) { - auto va_v4f32 = GetVrAsFloatVec(va); - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto res_v4f32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse_max_ps), va_v4f32, vb_v4f32); - SetVr(vd, res_v4f32); + auto va_v4f32 = GetVrAsFloatVec(va); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto res_v4f32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse_max_ps), va_v4f32, vb_v4f32); + SetVr(vd, res_v4f32); } void Compiler::VMAXSB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pmaxsb), va_v16i8, vb_v16i8); - SetVr(vd, res_v16i8); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pmaxsb), va_v16i8, vb_v16i8); + SetVr(vd, res_v16i8); } void Compiler::VMAXSH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmaxs_w), va_v8i16, vb_v8i16); - SetVr(vd, res_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmaxs_w), va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); } void Compiler::VMAXSW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto res_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pmaxsd), va_v4i32, vb_v4i32); - SetVr(vd, res_v4i32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto res_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pmaxsd), va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); } void Compiler::VMAXUB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmaxu_b), va_v16i8, vb_v16i8); - SetVr(vd, res_v16i8); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmaxu_b), va_v16i8, vb_v16i8); + SetVr(vd, res_v16i8); } void Compiler::VMAXUH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pmaxuw), va_v8i16, vb_v8i16); - SetVr(vd, res_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pmaxuw), va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); } void Compiler::VMAXUW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto res_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pmaxud), va_v4i32, vb_v4i32); - SetVr(vd, res_v4i32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto res_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pmaxud), va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); } void Compiler::VMHADDSHS(u32 vd, u32 va, u32 vb, u32 vc) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto vc_v8i16 = GetVrAsIntVec(vc, 16); - auto va_v8i32 = m_ir_builder->CreateSExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); - auto vb_v8i32 = m_ir_builder->CreateSExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); - auto vc_v8i32 = m_ir_builder->CreateSExt(vc_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); - auto res_v8i32 = m_ir_builder->CreateMul(va_v8i32, vb_v8i32); - res_v8i32 = m_ir_builder->CreateAShr(res_v8i32, 15); - res_v8i32 = m_ir_builder->CreateAdd(res_v8i32, vc_v8i32); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto vc_v8i16 = GetVrAsIntVec(vc, 16); + auto va_v8i32 = m_ir_builder->CreateSExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto vb_v8i32 = m_ir_builder->CreateSExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto vc_v8i32 = m_ir_builder->CreateSExt(vc_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto res_v8i32 = m_ir_builder->CreateMul(va_v8i32, vb_v8i32); + res_v8i32 = m_ir_builder->CreateAShr(res_v8i32, 15); + res_v8i32 = m_ir_builder->CreateAdd(res_v8i32, vc_v8i32); - u32 mask1_v4i32[4] = { 0, 1, 2, 3 }; - auto res1_v4i32 = m_ir_builder->CreateShuffleVector(res_v8i32, UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); - u32 mask2_v4i32[4] = { 4, 5, 6, 7 }; - auto res2_v4i32 = m_ir_builder->CreateShuffleVector(res_v8i32, UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); - auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_packssdw_128), res1_v4i32, res2_v4i32); - SetVr(vd, res_v8i16); + u32 mask1_v4i32[4] = { 0, 1, 2, 3 }; + auto res1_v4i32 = m_ir_builder->CreateShuffleVector(res_v8i32, UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); + u32 mask2_v4i32[4] = { 4, 5, 6, 7 }; + auto res2_v4i32 = m_ir_builder->CreateShuffleVector(res_v8i32, UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); + auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_packssdw_128), res1_v4i32, res2_v4i32); + SetVr(vd, res_v8i16); - // TODO: Set VSCR.SAT + // TODO: Set VSCR.SAT } void Compiler::VMHRADDSHS(u32 vd, u32 va, u32 vb, u32 vc) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto vc_v8i16 = GetVrAsIntVec(vc, 16); - auto va_v8i32 = m_ir_builder->CreateSExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); - auto vb_v8i32 = m_ir_builder->CreateSExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); - auto vc_v8i32 = m_ir_builder->CreateSExt(vc_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); - auto res_v8i32 = m_ir_builder->CreateMul(va_v8i32, vb_v8i32); - res_v8i32 = m_ir_builder->CreateAdd(res_v8i32, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt32(0x4000))); - res_v8i32 = m_ir_builder->CreateAShr(res_v8i32, 15); - res_v8i32 = m_ir_builder->CreateAdd(res_v8i32, vc_v8i32); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto vc_v8i16 = GetVrAsIntVec(vc, 16); + auto va_v8i32 = m_ir_builder->CreateSExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto vb_v8i32 = m_ir_builder->CreateSExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto vc_v8i32 = m_ir_builder->CreateSExt(vc_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto res_v8i32 = m_ir_builder->CreateMul(va_v8i32, vb_v8i32); + res_v8i32 = m_ir_builder->CreateAdd(res_v8i32, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt32(0x4000))); + res_v8i32 = m_ir_builder->CreateAShr(res_v8i32, 15); + res_v8i32 = m_ir_builder->CreateAdd(res_v8i32, vc_v8i32); - u32 mask1_v4i32[4] = { 0, 1, 2, 3 }; - auto res1_v4i32 = m_ir_builder->CreateShuffleVector(res_v8i32, UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); - u32 mask2_v4i32[4] = { 4, 5, 6, 7 }; - auto res2_v4i32 = m_ir_builder->CreateShuffleVector(res_v8i32, UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); - auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_packssdw_128), res1_v4i32, res2_v4i32); - SetVr(vd, res_v8i16); + u32 mask1_v4i32[4] = { 0, 1, 2, 3 }; + auto res1_v4i32 = m_ir_builder->CreateShuffleVector(res_v8i32, UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); + u32 mask2_v4i32[4] = { 4, 5, 6, 7 }; + auto res2_v4i32 = m_ir_builder->CreateShuffleVector(res_v8i32, UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); + auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_packssdw_128), res1_v4i32, res2_v4i32); + SetVr(vd, res_v8i16); - // TODO: Set VSCR.SAT + // TODO: Set VSCR.SAT } void Compiler::VMINFP(u32 vd, u32 va, u32 vb) { - auto va_v4f32 = GetVrAsFloatVec(va); - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto res_v4f32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse_min_ps), va_v4f32, vb_v4f32); - SetVr(vd, res_v4f32); + auto va_v4f32 = GetVrAsFloatVec(va); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto res_v4f32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse_min_ps), va_v4f32, vb_v4f32); + SetVr(vd, res_v4f32); } void Compiler::VMINSB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminsb), va_v16i8, vb_v16i8); - SetVr(vd, res_v16i8); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminsb), va_v16i8, vb_v16i8); + SetVr(vd, res_v16i8); } void Compiler::VMINSH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmins_w), va_v8i16, vb_v8i16); - SetVr(vd, res_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmins_w), va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); } void Compiler::VMINSW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto res_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminsd), va_v4i32, vb_v4i32); - SetVr(vd, res_v4i32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto res_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminsd), va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); } void Compiler::VMINUB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pminu_b), va_v16i8, vb_v16i8); - SetVr(vd, res_v16i8); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pminu_b), va_v16i8, vb_v16i8); + SetVr(vd, res_v16i8); } void Compiler::VMINUH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminuw), va_v8i16, vb_v8i16); - SetVr(vd, res_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminuw), va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); } void Compiler::VMINUW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto res_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminud), va_v4i32, vb_v4i32); - SetVr(vd, res_v4i32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto res_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminud), va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); } void Compiler::VMLADDUHM(u32 vd, u32 va, u32 vb, u32 vc) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto vc_v8i16 = GetVrAsIntVec(vc, 16); - auto res_v8i16 = m_ir_builder->CreateMul(va_v8i16, vb_v8i16); - res_v8i16 = m_ir_builder->CreateAdd(res_v8i16, vc_v8i16); - SetVr(vd, res_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto vc_v8i16 = GetVrAsIntVec(vc, 16); + auto res_v8i16 = m_ir_builder->CreateMul(va_v8i16, vb_v8i16); + res_v8i16 = m_ir_builder->CreateAdd(res_v8i16, vc_v8i16); + SetVr(vd, res_v8i16); } void Compiler::VMRGHB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - u32 mask_v16i32[16] = { 24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15 }; - auto vd_v16i8 = m_ir_builder->CreateShuffleVector(va_v16i8, vb_v16i8, ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32)); - SetVr(vd, vd_v16i8); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + u32 mask_v16i32[16] = { 24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15 }; + auto vd_v16i8 = m_ir_builder->CreateShuffleVector(va_v16i8, vb_v16i8, ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32)); + SetVr(vd, vd_v16i8); } void Compiler::VMRGHH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - u32 mask_v8i32[8] = { 12, 4, 13, 5, 14, 6, 15, 7 }; - auto vd_v8i16 = m_ir_builder->CreateShuffleVector(va_v8i16, vb_v8i16, ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); - SetVr(vd, vd_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + u32 mask_v8i32[8] = { 12, 4, 13, 5, 14, 6, 15, 7 }; + auto vd_v8i16 = m_ir_builder->CreateShuffleVector(va_v8i16, vb_v8i16, ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); + SetVr(vd, vd_v8i16); } void Compiler::VMRGHW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - u32 mask_v4i32[4] = { 6, 2, 7, 3 }; - auto vd_v4i32 = m_ir_builder->CreateShuffleVector(va_v4i32, vb_v4i32, ConstantDataVector::get(m_ir_builder->getContext(), mask_v4i32)); - SetVr(vd, vd_v4i32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + u32 mask_v4i32[4] = { 6, 2, 7, 3 }; + auto vd_v4i32 = m_ir_builder->CreateShuffleVector(va_v4i32, vb_v4i32, ConstantDataVector::get(m_ir_builder->getContext(), mask_v4i32)); + SetVr(vd, vd_v4i32); } void Compiler::VMRGLB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - u32 mask_v16i32[16] = { 16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7 }; - auto vd_v16i8 = m_ir_builder->CreateShuffleVector(va_v16i8, vb_v16i8, ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32)); - SetVr(vd, vd_v16i8); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + u32 mask_v16i32[16] = { 16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7 }; + auto vd_v16i8 = m_ir_builder->CreateShuffleVector(va_v16i8, vb_v16i8, ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32)); + SetVr(vd, vd_v16i8); } void Compiler::VMRGLH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - u32 mask_v8i32[8] = { 8, 0, 9, 1, 10, 2, 11, 3 }; - auto vd_v8i16 = m_ir_builder->CreateShuffleVector(va_v8i16, vb_v8i16, ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); - SetVr(vd, vd_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + u32 mask_v8i32[8] = { 8, 0, 9, 1, 10, 2, 11, 3 }; + auto vd_v8i16 = m_ir_builder->CreateShuffleVector(va_v8i16, vb_v8i16, ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); + SetVr(vd, vd_v8i16); } void Compiler::VMRGLW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - u32 mask_v4i32[4] = { 4, 0, 5, 1 }; - auto vd_v4i32 = m_ir_builder->CreateShuffleVector(va_v4i32, vb_v4i32, ConstantDataVector::get(m_ir_builder->getContext(), mask_v4i32)); - SetVr(vd, vd_v4i32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + u32 mask_v4i32[4] = { 4, 0, 5, 1 }; + auto vd_v4i32 = m_ir_builder->CreateShuffleVector(va_v4i32, vb_v4i32, ConstantDataVector::get(m_ir_builder->getContext(), mask_v4i32)); + SetVr(vd, vd_v4i32); } void Compiler::VMSUMMBM(u32 vd, u32 va, u32 vb, u32 vc) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto va_v16i16 = m_ir_builder->CreateSExt(va_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16)); - auto vb_v16i16 = m_ir_builder->CreateZExt(vb_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16)); - auto tmp_v16i16 = m_ir_builder->CreateMul(va_v16i16, vb_v16i16); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto va_v16i16 = m_ir_builder->CreateSExt(va_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16)); + auto vb_v16i16 = m_ir_builder->CreateZExt(vb_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16)); + auto tmp_v16i16 = m_ir_builder->CreateMul(va_v16i16, vb_v16i16); - auto undef_v16i16 = UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 16)); - u32 mask1_v4i32[4] = { 0, 4, 8, 12 }; - auto tmp1_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); - auto tmp1_v4i32 = m_ir_builder->CreateSExt(tmp1_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - u32 mask2_v4i32[4] = { 1, 5, 9, 13 }; - auto tmp2_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); - auto tmp2_v4i32 = m_ir_builder->CreateSExt(tmp2_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - u32 mask3_v4i32[4] = { 2, 6, 10, 14 }; - auto tmp3_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask3_v4i32)); - auto tmp3_v4i32 = m_ir_builder->CreateSExt(tmp3_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - u32 mask4_v4i32[4] = { 3, 7, 11, 15 }; - auto tmp4_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask4_v4i32)); - auto tmp4_v4i32 = m_ir_builder->CreateSExt(tmp4_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + auto undef_v16i16 = UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 16)); + u32 mask1_v4i32[4] = { 0, 4, 8, 12 }; + auto tmp1_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); + auto tmp1_v4i32 = m_ir_builder->CreateSExt(tmp1_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + u32 mask2_v4i32[4] = { 1, 5, 9, 13 }; + auto tmp2_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); + auto tmp2_v4i32 = m_ir_builder->CreateSExt(tmp2_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + u32 mask3_v4i32[4] = { 2, 6, 10, 14 }; + auto tmp3_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask3_v4i32)); + auto tmp3_v4i32 = m_ir_builder->CreateSExt(tmp3_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + u32 mask4_v4i32[4] = { 3, 7, 11, 15 }; + auto tmp4_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask4_v4i32)); + auto tmp4_v4i32 = m_ir_builder->CreateSExt(tmp4_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - auto vc_v4i32 = GetVrAsIntVec(vc, 32); - auto res_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, tmp2_v4i32); - res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, tmp3_v4i32); - res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, tmp4_v4i32); - res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vc_v4i32); + auto vc_v4i32 = GetVrAsIntVec(vc, 32); + auto res_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, tmp2_v4i32); + res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, tmp3_v4i32); + res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, tmp4_v4i32); + res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vc_v4i32); - SetVr(vd, res_v4i32); + SetVr(vd, res_v4i32); - // TODO: Try to optimize with horizontal add + // TODO: Try to optimize with horizontal add } void Compiler::VMSUMSHM(u32 vd, u32 va, u32 vb, u32 vc) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto vc_v4i32 = GetVrAsIntVec(vc, 32); - auto res_v4i32 = (Value *)m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmadd_wd), va_v8i16, vb_v8i16); - res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vc_v4i32); - SetVr(vd, res_v4i32); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto vc_v4i32 = GetVrAsIntVec(vc, 32); + auto res_v4i32 = (Value *)m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmadd_wd), va_v8i16, vb_v8i16); + res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vc_v4i32); + SetVr(vd, res_v4i32); } void Compiler::VMSUMSHS(u32 vd, u32 va, u32 vb, u32 vc) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto vc_v4i32 = GetVrAsIntVec(vc, 32); - auto res_v4i32 = (Value *)m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmadd_wd), va_v8i16, vb_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto vc_v4i32 = GetVrAsIntVec(vc, 32); + auto res_v4i32 = (Value *)m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmadd_wd), va_v8i16, vb_v8i16); - auto tmp1_v4i32 = m_ir_builder->CreateLShr(vc_v4i32, 31); - tmp1_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x7FFFFFFF))); - auto tmp1_v16i8 = m_ir_builder->CreateBitCast(tmp1_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - auto tmp2_v4i32 = m_ir_builder->CreateXor(vc_v4i32, res_v4i32); - tmp2_v4i32 = m_ir_builder->CreateNot(tmp2_v4i32); - auto sum_v4i32 = m_ir_builder->CreateAdd(vc_v4i32, res_v4i32); - auto sum_v16i8 = m_ir_builder->CreateBitCast(sum_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - auto tmp3_v4i32 = m_ir_builder->CreateXor(vc_v4i32, sum_v4i32); - tmp3_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, tmp3_v4i32); - tmp3_v4i32 = m_ir_builder->CreateAShr(tmp3_v4i32, 31); - auto tmp3_v16i8 = m_ir_builder->CreateBitCast(tmp3_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - auto res_v16i8 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pblendvb), sum_v16i8, tmp1_v16i8, tmp3_v16i8); - SetVr(vd, res_v16i8); + auto tmp1_v4i32 = m_ir_builder->CreateLShr(vc_v4i32, 31); + tmp1_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x7FFFFFFF))); + auto tmp1_v16i8 = m_ir_builder->CreateBitCast(tmp1_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + auto tmp2_v4i32 = m_ir_builder->CreateXor(vc_v4i32, res_v4i32); + tmp2_v4i32 = m_ir_builder->CreateNot(tmp2_v4i32); + auto sum_v4i32 = m_ir_builder->CreateAdd(vc_v4i32, res_v4i32); + auto sum_v16i8 = m_ir_builder->CreateBitCast(sum_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + auto tmp3_v4i32 = m_ir_builder->CreateXor(vc_v4i32, sum_v4i32); + tmp3_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, tmp3_v4i32); + tmp3_v4i32 = m_ir_builder->CreateAShr(tmp3_v4i32, 31); + auto tmp3_v16i8 = m_ir_builder->CreateBitCast(tmp3_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + auto res_v16i8 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pblendvb), sum_v16i8, tmp1_v16i8, tmp3_v16i8); + SetVr(vd, res_v16i8); - // TODO: Set VSCR.SAT + // TODO: Set VSCR.SAT } void Compiler::VMSUMUBM(u32 vd, u32 va, u32 vb, u32 vc) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto va_v16i16 = m_ir_builder->CreateZExt(va_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16)); - auto vb_v16i16 = m_ir_builder->CreateZExt(vb_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16)); - auto tmp_v16i16 = m_ir_builder->CreateMul(va_v16i16, vb_v16i16); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto va_v16i16 = m_ir_builder->CreateZExt(va_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16)); + auto vb_v16i16 = m_ir_builder->CreateZExt(vb_v16i8, VectorType::get(m_ir_builder->getInt16Ty(), 16)); + auto tmp_v16i16 = m_ir_builder->CreateMul(va_v16i16, vb_v16i16); - auto undef_v16i16 = UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 16)); - u32 mask1_v4i32[4] = { 0, 4, 8, 12 }; - auto tmp1_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); - auto tmp1_v4i32 = m_ir_builder->CreateZExt(tmp1_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - u32 mask2_v4i32[4] = { 1, 5, 9, 13 }; - auto tmp2_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); - auto tmp2_v4i32 = m_ir_builder->CreateZExt(tmp2_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - u32 mask3_v4i32[4] = { 2, 6, 10, 14 }; - auto tmp3_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask3_v4i32)); - auto tmp3_v4i32 = m_ir_builder->CreateZExt(tmp3_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - u32 mask4_v4i32[4] = { 3, 7, 11, 15 }; - auto tmp4_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask4_v4i32)); - auto tmp4_v4i32 = m_ir_builder->CreateZExt(tmp4_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + auto undef_v16i16 = UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 16)); + u32 mask1_v4i32[4] = { 0, 4, 8, 12 }; + auto tmp1_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); + auto tmp1_v4i32 = m_ir_builder->CreateZExt(tmp1_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + u32 mask2_v4i32[4] = { 1, 5, 9, 13 }; + auto tmp2_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); + auto tmp2_v4i32 = m_ir_builder->CreateZExt(tmp2_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + u32 mask3_v4i32[4] = { 2, 6, 10, 14 }; + auto tmp3_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask3_v4i32)); + auto tmp3_v4i32 = m_ir_builder->CreateZExt(tmp3_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + u32 mask4_v4i32[4] = { 3, 7, 11, 15 }; + auto tmp4_v4i16 = m_ir_builder->CreateShuffleVector(tmp_v16i16, undef_v16i16, ConstantDataVector::get(m_ir_builder->getContext(), mask4_v4i32)); + auto tmp4_v4i32 = m_ir_builder->CreateZExt(tmp4_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - auto vc_v4i32 = GetVrAsIntVec(vc, 32); - auto res_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, tmp2_v4i32); - res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, tmp3_v4i32); - res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, tmp4_v4i32); - res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vc_v4i32); + auto vc_v4i32 = GetVrAsIntVec(vc, 32); + auto res_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, tmp2_v4i32); + res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, tmp3_v4i32); + res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, tmp4_v4i32); + res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vc_v4i32); - SetVr(vd, res_v4i32); + SetVr(vd, res_v4i32); - // TODO: Try to optimize with horizontal add + // TODO: Try to optimize with horizontal add } void Compiler::VMSUMUHM(u32 vd, u32 va, u32 vb, u32 vc) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto va_v8i32 = m_ir_builder->CreateZExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); - auto vb_v8i32 = m_ir_builder->CreateZExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); - auto tmp_v8i32 = m_ir_builder->CreateMul(va_v8i32, vb_v8i32); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto va_v8i32 = m_ir_builder->CreateZExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto vb_v8i32 = m_ir_builder->CreateZExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto tmp_v8i32 = m_ir_builder->CreateMul(va_v8i32, vb_v8i32); - auto undef_v8i32 = UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8)); - u32 mask1_v4i32[4] = { 0, 2, 4, 6 }; - auto tmp1_v4i32 = m_ir_builder->CreateShuffleVector(tmp_v8i32, undef_v8i32, ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); - u32 mask2_v4i32[4] = { 1, 3, 5, 7 }; - auto tmp2_v4i32 = m_ir_builder->CreateShuffleVector(tmp_v8i32, undef_v8i32, ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); + auto undef_v8i32 = UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 8)); + u32 mask1_v4i32[4] = { 0, 2, 4, 6 }; + auto tmp1_v4i32 = m_ir_builder->CreateShuffleVector(tmp_v8i32, undef_v8i32, ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); + u32 mask2_v4i32[4] = { 1, 3, 5, 7 }; + auto tmp2_v4i32 = m_ir_builder->CreateShuffleVector(tmp_v8i32, undef_v8i32, ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); - auto vc_v4i32 = GetVrAsIntVec(vc, 32); - auto res_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, tmp2_v4i32); - res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vc_v4i32); + auto vc_v4i32 = GetVrAsIntVec(vc, 32); + auto res_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, tmp2_v4i32); + res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vc_v4i32); - SetVr(vd, res_v4i32); + SetVr(vd, res_v4i32); - // TODO: Try to optimize with horizontal add + // TODO: Try to optimize with horizontal add } void Compiler::VMSUMUHS(u32 vd, u32 va, u32 vb, u32 vc) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto va_v8i32 = m_ir_builder->CreateZExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); - auto vb_v8i32 = m_ir_builder->CreateZExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); - auto tmp_v8i32 = m_ir_builder->CreateMul(va_v8i32, vb_v8i32); - auto tmp_v8i64 = m_ir_builder->CreateZExt(tmp_v8i32, VectorType::get(m_ir_builder->getInt64Ty(), 8)); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto va_v8i32 = m_ir_builder->CreateZExt(va_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto vb_v8i32 = m_ir_builder->CreateZExt(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 8)); + auto tmp_v8i32 = m_ir_builder->CreateMul(va_v8i32, vb_v8i32); + auto tmp_v8i64 = m_ir_builder->CreateZExt(tmp_v8i32, VectorType::get(m_ir_builder->getInt64Ty(), 8)); - u32 mask1_v4i32[4] = { 0, 2, 4, 6 }; - u32 mask2_v4i32[4] = { 1, 3, 5, 7 }; - auto tmp1_v4i64 = m_ir_builder->CreateShuffleVector(tmp_v8i64, UndefValue::get(tmp_v8i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); - auto tmp2_v4i64 = m_ir_builder->CreateShuffleVector(tmp_v8i64, UndefValue::get(tmp_v8i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); + u32 mask1_v4i32[4] = { 0, 2, 4, 6 }; + u32 mask2_v4i32[4] = { 1, 3, 5, 7 }; + auto tmp1_v4i64 = m_ir_builder->CreateShuffleVector(tmp_v8i64, UndefValue::get(tmp_v8i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); + auto tmp2_v4i64 = m_ir_builder->CreateShuffleVector(tmp_v8i64, UndefValue::get(tmp_v8i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); - auto vc_v4i32 = GetVrAsIntVec(vc, 32); - auto vc_v4i64 = m_ir_builder->CreateZExt(vc_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); - auto res_v4i64 = m_ir_builder->CreateAdd(tmp1_v4i64, tmp2_v4i64); - res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, vc_v4i64); - auto gt_v4i1 = m_ir_builder->CreateICmpUGT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0xFFFFFFFF))); - auto gt_v4i64 = m_ir_builder->CreateSExt(gt_v4i1, VectorType::get(m_ir_builder->getInt64Ty(), 4)); - res_v4i64 = m_ir_builder->CreateOr(res_v4i64, gt_v4i64); - auto res_v4i32 = m_ir_builder->CreateTrunc(res_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, res_v4i32); + auto vc_v4i32 = GetVrAsIntVec(vc, 32); + auto vc_v4i64 = m_ir_builder->CreateZExt(vc_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); + auto res_v4i64 = m_ir_builder->CreateAdd(tmp1_v4i64, tmp2_v4i64); + res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, vc_v4i64); + auto gt_v4i1 = m_ir_builder->CreateICmpUGT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0xFFFFFFFF))); + auto gt_v4i64 = m_ir_builder->CreateSExt(gt_v4i1, VectorType::get(m_ir_builder->getInt64Ty(), 4)); + res_v4i64 = m_ir_builder->CreateOr(res_v4i64, gt_v4i64); + auto res_v4i32 = m_ir_builder->CreateTrunc(res_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, res_v4i32); - // TODO: Set VSCR.SAT + // TODO: Set VSCR.SAT } void Compiler::VMULESB(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - va_v8i16 = m_ir_builder->CreateAShr(va_v8i16, 8); - vb_v8i16 = m_ir_builder->CreateAShr(vb_v8i16, 8); - auto res_v8i16 = m_ir_builder->CreateMul(va_v8i16, vb_v8i16); - SetVr(vd, res_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + va_v8i16 = m_ir_builder->CreateAShr(va_v8i16, 8); + vb_v8i16 = m_ir_builder->CreateAShr(vb_v8i16, 8); + auto res_v8i16 = m_ir_builder->CreateMul(va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); } void Compiler::VMULESH(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - va_v4i32 = m_ir_builder->CreateAShr(va_v4i32, 16); - vb_v4i32 = m_ir_builder->CreateAShr(vb_v4i32, 16); - auto res_v4i32 = m_ir_builder->CreateMul(va_v4i32, vb_v4i32); - SetVr(vd, res_v4i32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + va_v4i32 = m_ir_builder->CreateAShr(va_v4i32, 16); + vb_v4i32 = m_ir_builder->CreateAShr(vb_v4i32, 16); + auto res_v4i32 = m_ir_builder->CreateMul(va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); } void Compiler::VMULEUB(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - va_v8i16 = m_ir_builder->CreateLShr(va_v8i16, 8); - vb_v8i16 = m_ir_builder->CreateLShr(vb_v8i16, 8); - auto res_v8i16 = m_ir_builder->CreateMul(va_v8i16, vb_v8i16); - SetVr(vd, res_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + va_v8i16 = m_ir_builder->CreateLShr(va_v8i16, 8); + vb_v8i16 = m_ir_builder->CreateLShr(vb_v8i16, 8); + auto res_v8i16 = m_ir_builder->CreateMul(va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); } void Compiler::VMULEUH(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - va_v4i32 = m_ir_builder->CreateLShr(va_v4i32, 16); - vb_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, 16); - auto res_v4i32 = m_ir_builder->CreateMul(va_v4i32, vb_v4i32); - SetVr(vd, res_v4i32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + va_v4i32 = m_ir_builder->CreateLShr(va_v4i32, 16); + vb_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, 16); + auto res_v4i32 = m_ir_builder->CreateMul(va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); } void Compiler::VMULOSB(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - va_v8i16 = m_ir_builder->CreateShl(va_v8i16, 8); - va_v8i16 = m_ir_builder->CreateAShr(va_v8i16, 8); - vb_v8i16 = m_ir_builder->CreateShl(vb_v8i16, 8); - vb_v8i16 = m_ir_builder->CreateAShr(vb_v8i16, 8); - auto res_v8i16 = m_ir_builder->CreateMul(va_v8i16, vb_v8i16); - SetVr(vd, res_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + va_v8i16 = m_ir_builder->CreateShl(va_v8i16, 8); + va_v8i16 = m_ir_builder->CreateAShr(va_v8i16, 8); + vb_v8i16 = m_ir_builder->CreateShl(vb_v8i16, 8); + vb_v8i16 = m_ir_builder->CreateAShr(vb_v8i16, 8); + auto res_v8i16 = m_ir_builder->CreateMul(va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); } void Compiler::VMULOSH(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - va_v4i32 = m_ir_builder->CreateShl(va_v4i32, 16); - va_v4i32 = m_ir_builder->CreateAShr(va_v4i32, 16); - vb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, 16); - vb_v4i32 = m_ir_builder->CreateAShr(vb_v4i32, 16); - auto res_v4i32 = m_ir_builder->CreateMul(va_v4i32, vb_v4i32); - SetVr(vd, res_v4i32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + va_v4i32 = m_ir_builder->CreateShl(va_v4i32, 16); + va_v4i32 = m_ir_builder->CreateAShr(va_v4i32, 16); + vb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, 16); + vb_v4i32 = m_ir_builder->CreateAShr(vb_v4i32, 16); + auto res_v4i32 = m_ir_builder->CreateMul(va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); } void Compiler::VMULOUB(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - va_v8i16 = m_ir_builder->CreateShl(va_v8i16, 8); - va_v8i16 = m_ir_builder->CreateLShr(va_v8i16, 8); - vb_v8i16 = m_ir_builder->CreateShl(vb_v8i16, 8); - vb_v8i16 = m_ir_builder->CreateLShr(vb_v8i16, 8); - auto res_v8i16 = m_ir_builder->CreateMul(va_v8i16, vb_v8i16); - SetVr(vd, res_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + va_v8i16 = m_ir_builder->CreateShl(va_v8i16, 8); + va_v8i16 = m_ir_builder->CreateLShr(va_v8i16, 8); + vb_v8i16 = m_ir_builder->CreateShl(vb_v8i16, 8); + vb_v8i16 = m_ir_builder->CreateLShr(vb_v8i16, 8); + auto res_v8i16 = m_ir_builder->CreateMul(va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); } void Compiler::VMULOUH(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - va_v4i32 = m_ir_builder->CreateShl(va_v4i32, 16); - va_v4i32 = m_ir_builder->CreateLShr(va_v4i32, 16); - vb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, 16); - vb_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, 16); - auto res_v4i32 = m_ir_builder->CreateMul(va_v4i32, vb_v4i32); - SetVr(vd, res_v4i32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + va_v4i32 = m_ir_builder->CreateShl(va_v4i32, 16); + va_v4i32 = m_ir_builder->CreateLShr(va_v4i32, 16); + vb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, 16); + vb_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, 16); + auto res_v4i32 = m_ir_builder->CreateMul(va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); } void Compiler::VNMSUBFP(u32 vd, u32 va, u32 vc, u32 vb) { - auto va_v4f32 = GetVrAsFloatVec(va); - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto vc_v4f32 = GetVrAsFloatVec(vc); - vc_v4f32 = m_ir_builder->CreateFNeg(vc_v4f32); - auto res_v4f32 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, VectorType::get(m_ir_builder->getFloatTy(), 4)), va_v4f32, vc_v4f32, vb_v4f32); - SetVr(vd, res_v4f32); + auto va_v4f32 = GetVrAsFloatVec(va); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto vc_v4f32 = GetVrAsFloatVec(vc); + vc_v4f32 = m_ir_builder->CreateFNeg(vc_v4f32); + auto res_v4f32 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, VectorType::get(m_ir_builder->getFloatTy(), 4)), va_v4f32, vc_v4f32, vb_v4f32); + SetVr(vd, res_v4f32); } void Compiler::VNOR(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto res_v8i16 = m_ir_builder->CreateOr(va_v8i16, vb_v8i16); - res_v8i16 = m_ir_builder->CreateNot(res_v8i16); - SetVr(vd, res_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto res_v8i16 = m_ir_builder->CreateOr(va_v8i16, vb_v8i16); + res_v8i16 = m_ir_builder->CreateNot(res_v8i16); + SetVr(vd, res_v8i16); } void Compiler::VOR(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto res_v8i16 = m_ir_builder->CreateOr(va_v8i16, vb_v8i16); - SetVr(vd, res_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto res_v8i16 = m_ir_builder->CreateOr(va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); } void Compiler::VPERM(u32 vd, u32 va, u32 vb, u32 vc) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto vc_v16i8 = GetVrAsIntVec(vc, 8); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto vc_v16i8 = GetVrAsIntVec(vc, 8); - auto thrity_one_v16i8 = m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(31)); - vc_v16i8 = m_ir_builder->CreateAnd(vc_v16i8, thrity_one_v16i8); + auto thrity_one_v16i8 = m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(31)); + vc_v16i8 = m_ir_builder->CreateAnd(vc_v16i8, thrity_one_v16i8); - auto fifteen_v16i8 = m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(15)); - auto vc_le15_v16i8 = m_ir_builder->CreateSub(fifteen_v16i8, vc_v16i8); - auto res_va_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_ssse3_pshuf_b_128), va_v16i8, vc_le15_v16i8); + auto fifteen_v16i8 = m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(15)); + auto vc_le15_v16i8 = m_ir_builder->CreateSub(fifteen_v16i8, vc_v16i8); + auto res_va_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_ssse3_pshuf_b_128), va_v16i8, vc_le15_v16i8); - auto vc_gt15_v16i8 = m_ir_builder->CreateSub(thrity_one_v16i8, vc_v16i8); - auto cmp_i1 = m_ir_builder->CreateICmpUGT(vc_gt15_v16i8, fifteen_v16i8); - auto cmp_i8 = m_ir_builder->CreateSExt(cmp_i1, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - vc_gt15_v16i8 = m_ir_builder->CreateOr(cmp_i8, vc_gt15_v16i8); - auto res_vb_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_ssse3_pshuf_b_128), vb_v16i8, vc_gt15_v16i8); + auto vc_gt15_v16i8 = m_ir_builder->CreateSub(thrity_one_v16i8, vc_v16i8); + auto cmp_i1 = m_ir_builder->CreateICmpUGT(vc_gt15_v16i8, fifteen_v16i8); + auto cmp_i8 = m_ir_builder->CreateSExt(cmp_i1, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + vc_gt15_v16i8 = m_ir_builder->CreateOr(cmp_i8, vc_gt15_v16i8); + auto res_vb_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_ssse3_pshuf_b_128), vb_v16i8, vc_gt15_v16i8); - auto res_v16i8 = m_ir_builder->CreateOr(res_vb_v16i8, res_va_v16i8); - SetVr(vd, res_v16i8); + auto res_v16i8 = m_ir_builder->CreateOr(res_vb_v16i8, res_va_v16i8); + SetVr(vd, res_v16i8); } void Compiler::VPKPX(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto tmpa_v4i32 = m_ir_builder->CreateShl(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(7))); - tmpa_v4i32 = m_ir_builder->CreateAnd(tmpa_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFC000000))); - va_v4i32 = m_ir_builder->CreateShl(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(10))); - va_v4i32 = m_ir_builder->CreateAnd(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(~0xFC000000))); - tmpa_v4i32 = m_ir_builder->CreateOr(tmpa_v4i32, va_v4i32); - tmpa_v4i32 = m_ir_builder->CreateAnd(tmpa_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFFE00000))); - va_v4i32 = m_ir_builder->CreateShl(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3))); - va_v4i32 = m_ir_builder->CreateAnd(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(~0xFFE00000))); - tmpa_v4i32 = m_ir_builder->CreateOr(tmpa_v4i32, va_v4i32); - auto tmpa_v8i16 = m_ir_builder->CreateBitCast(tmpa_v4i32, VectorType::get(m_ir_builder->getInt16Ty(), 8)); + auto tmpa_v4i32 = m_ir_builder->CreateShl(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(7))); + tmpa_v4i32 = m_ir_builder->CreateAnd(tmpa_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFC000000))); + va_v4i32 = m_ir_builder->CreateShl(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(10))); + va_v4i32 = m_ir_builder->CreateAnd(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(~0xFC000000))); + tmpa_v4i32 = m_ir_builder->CreateOr(tmpa_v4i32, va_v4i32); + tmpa_v4i32 = m_ir_builder->CreateAnd(tmpa_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFFE00000))); + va_v4i32 = m_ir_builder->CreateShl(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3))); + va_v4i32 = m_ir_builder->CreateAnd(va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(~0xFFE00000))); + tmpa_v4i32 = m_ir_builder->CreateOr(tmpa_v4i32, va_v4i32); + auto tmpa_v8i16 = m_ir_builder->CreateBitCast(tmpa_v4i32, VectorType::get(m_ir_builder->getInt16Ty(), 8)); - auto tmpb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(7))); - tmpb_v4i32 = m_ir_builder->CreateAnd(tmpb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFC000000))); - vb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(10))); - vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(~0xFC000000))); - tmpb_v4i32 = m_ir_builder->CreateOr(tmpb_v4i32, vb_v4i32); - tmpb_v4i32 = m_ir_builder->CreateAnd(tmpb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFFE00000))); - vb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3))); - vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(~0xFFE00000))); - tmpb_v4i32 = m_ir_builder->CreateOr(tmpb_v4i32, vb_v4i32); - auto tmpb_v8i16 = m_ir_builder->CreateBitCast(tmpb_v4i32, VectorType::get(m_ir_builder->getInt16Ty(), 8)); + auto tmpb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(7))); + tmpb_v4i32 = m_ir_builder->CreateAnd(tmpb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFC000000))); + vb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(10))); + vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(~0xFC000000))); + tmpb_v4i32 = m_ir_builder->CreateOr(tmpb_v4i32, vb_v4i32); + tmpb_v4i32 = m_ir_builder->CreateAnd(tmpb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFFE00000))); + vb_v4i32 = m_ir_builder->CreateShl(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3))); + vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(~0xFFE00000))); + tmpb_v4i32 = m_ir_builder->CreateOr(tmpb_v4i32, vb_v4i32); + auto tmpb_v8i16 = m_ir_builder->CreateBitCast(tmpb_v4i32, VectorType::get(m_ir_builder->getInt16Ty(), 8)); - u32 mask_v8i32[8] = { 1, 3, 5, 7, 9, 11, 13, 15 }; - auto res_v8i16 = m_ir_builder->CreateShuffleVector(tmpb_v8i16, tmpa_v8i16, ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); + u32 mask_v8i32[8] = { 1, 3, 5, 7, 9, 11, 13, 15 }; + auto res_v8i16 = m_ir_builder->CreateShuffleVector(tmpb_v8i16, tmpa_v8i16, ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); - SetVr(vd, res_v8i16); + SetVr(vd, res_v8i16); - // TODO: Implement with pext on CPUs with BMI + // TODO: Implement with pext on CPUs with BMI } void Compiler::VPKSHSS(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_packsswb_128), vb_v8i16, va_v8i16); - SetVr(vd, res_v16i8); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_packsswb_128), vb_v8i16, va_v8i16); + SetVr(vd, res_v16i8); - // TODO: VSCR.SAT + // TODO: VSCR.SAT } void Compiler::VPKSHUS(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_packuswb_128), vb_v8i16, va_v8i16); - SetVr(vd, res_v16i8); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto res_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_packuswb_128), vb_v8i16, va_v8i16); + SetVr(vd, res_v16i8); - // TODO: VSCR.SAT + // TODO: VSCR.SAT } void Compiler::VPKSWSS(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_packssdw_128), vb_v4i32, va_v4i32); - SetVr(vd, res_v8i16); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_packssdw_128), vb_v4i32, va_v4i32); + SetVr(vd, res_v8i16); - // TODO: VSCR.SAT + // TODO: VSCR.SAT } void Compiler::VPKSWUS(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_packusdw), vb_v4i32, va_v4i32); - SetVr(vd, res_v8i16); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto res_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_packusdw), vb_v4i32, va_v4i32); + SetVr(vd, res_v8i16); - // TODO: VSCR.SAT + // TODO: VSCR.SAT } void Compiler::VPKUHUM(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); - u32 mask_v16i32[16] = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }; - auto res_v16i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, va_v16i8, ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32)); - SetVr(vd, res_v16i8); + u32 mask_v16i32[16] = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }; + auto res_v16i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, va_v16i8, ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32)); + SetVr(vd, res_v16i8); } void Compiler::VPKUHUS(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - va_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminuw), va_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xFF))); - vb_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminuw), vb_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xFF))); - auto va_v16i8 = m_ir_builder->CreateBitCast(va_v8i16, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - auto vb_v16i8 = m_ir_builder->CreateBitCast(vb_v8i16, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + va_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminuw), va_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xFF))); + vb_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminuw), vb_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xFF))); + auto va_v16i8 = m_ir_builder->CreateBitCast(va_v8i16, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + auto vb_v16i8 = m_ir_builder->CreateBitCast(vb_v8i16, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - u32 mask_v16i32[16] = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }; - auto res_v16i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, va_v16i8, ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32)); - SetVr(vd, res_v16i8); + u32 mask_v16i32[16] = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }; + auto res_v16i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, va_v16i8, ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32)); + SetVr(vd, res_v16i8); - // TODO: Set VSCR.SAT + // TODO: Set VSCR.SAT } void Compiler::VPKUWUM(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); - u32 mask_v8i32[8] = { 0, 2, 4, 6, 8, 10, 12, 14 }; - auto res_v8i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, va_v8i16, ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); - SetVr(vd, res_v8i16); + u32 mask_v8i32[8] = { 0, 2, 4, 6, 8, 10, 12, 14 }; + auto res_v8i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, va_v8i16, ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); + SetVr(vd, res_v8i16); } void Compiler::VPKUWUS(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - va_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminud), va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFFFF))); - vb_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminud), vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFFFF))); - auto va_v8i16 = m_ir_builder->CreateBitCast(va_v4i32, VectorType::get(m_ir_builder->getInt16Ty(), 8)); - auto vb_v8i16 = m_ir_builder->CreateBitCast(vb_v4i32, VectorType::get(m_ir_builder->getInt16Ty(), 8)); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + va_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminud), va_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFFFF))); + vb_v4i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pminud), vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFFFF))); + auto va_v8i16 = m_ir_builder->CreateBitCast(va_v4i32, VectorType::get(m_ir_builder->getInt16Ty(), 8)); + auto vb_v8i16 = m_ir_builder->CreateBitCast(vb_v4i32, VectorType::get(m_ir_builder->getInt16Ty(), 8)); - u32 mask_v8i32[8] = { 0, 2, 4, 6, 8, 10, 12, 14 }; - auto res_v8i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, va_v8i16, ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); - SetVr(vd, res_v8i16); + u32 mask_v8i32[8] = { 0, 2, 4, 6, 8, 10, 12, 14 }; + auto res_v8i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, va_v8i16, ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); + SetVr(vd, res_v8i16); - // TODO: Set VSCR.SAT + // TODO: Set VSCR.SAT } void Compiler::VREFP(u32 vd, u32 vb) { - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse_rcp_ps), vb_v4f32); - SetVr(vd, res_v4f32); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse_rcp_ps), vb_v4f32); + SetVr(vd, res_v4f32); } void Compiler::VRFIM(u32 vd, u32 vb) { - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::floor, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); - SetVr(vd, res_v4f32); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::floor, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); + SetVr(vd, res_v4f32); } void Compiler::VRFIN(u32 vd, u32 vb) { - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::nearbyint, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); - SetVr(vd, res_v4f32); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::nearbyint, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); + SetVr(vd, res_v4f32); } void Compiler::VRFIP(u32 vd, u32 vb) { - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::ceil, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); - SetVr(vd, res_v4f32); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::ceil, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); + SetVr(vd, res_v4f32); } void Compiler::VRFIZ(u32 vd, u32 vb) { - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::trunc, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); - SetVr(vd, res_v4f32); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::trunc, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); + SetVr(vd, res_v4f32); } void Compiler::VRLB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - vb_v16i8 = m_ir_builder->CreateAnd(vb_v16i8, m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(7))); - auto tmp1_v16i8 = m_ir_builder->CreateShl(va_v16i8, vb_v16i8); - vb_v16i8 = m_ir_builder->CreateSub(m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(8)), vb_v16i8); - auto tmp2_v16i8 = m_ir_builder->CreateLShr(va_v16i8, vb_v16i8); - auto res_v16i8 = m_ir_builder->CreateOr(tmp1_v16i8, tmp2_v16i8); - SetVr(vd, res_v16i8); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + vb_v16i8 = m_ir_builder->CreateAnd(vb_v16i8, m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(7))); + auto tmp1_v16i8 = m_ir_builder->CreateShl(va_v16i8, vb_v16i8); + vb_v16i8 = m_ir_builder->CreateSub(m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(8)), vb_v16i8); + auto tmp2_v16i8 = m_ir_builder->CreateLShr(va_v16i8, vb_v16i8); + auto res_v16i8 = m_ir_builder->CreateOr(tmp1_v16i8, tmp2_v16i8); + SetVr(vd, res_v16i8); } void Compiler::VRLH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - vb_v8i16 = m_ir_builder->CreateAnd(vb_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xF))); - auto tmp1_v8i16 = m_ir_builder->CreateShl(va_v8i16, vb_v8i16); - vb_v8i16 = m_ir_builder->CreateSub(m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0x10)), vb_v8i16); - auto tmp2_v8i16 = m_ir_builder->CreateLShr(va_v8i16, vb_v8i16); - auto res_v8i16 = m_ir_builder->CreateOr(tmp1_v8i16, tmp2_v8i16); - SetVr(vd, res_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + vb_v8i16 = m_ir_builder->CreateAnd(vb_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xF))); + auto tmp1_v8i16 = m_ir_builder->CreateShl(va_v8i16, vb_v8i16); + vb_v8i16 = m_ir_builder->CreateSub(m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0x10)), vb_v8i16); + auto tmp2_v8i16 = m_ir_builder->CreateLShr(va_v8i16, vb_v8i16); + auto res_v8i16 = m_ir_builder->CreateOr(tmp1_v8i16, tmp2_v8i16); + SetVr(vd, res_v8i16); } void Compiler::VRLW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x1F))); - auto tmp1_v4i32 = m_ir_builder->CreateShl(va_v4i32, vb_v4i32); - vb_v4i32 = m_ir_builder->CreateSub(m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x20)), vb_v4i32); - auto tmp2_v4i32 = m_ir_builder->CreateLShr(va_v4i32, vb_v4i32); - auto res_v4i32 = m_ir_builder->CreateOr(tmp1_v4i32, tmp2_v4i32); - SetVr(vd, res_v4i32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x1F))); + auto tmp1_v4i32 = m_ir_builder->CreateShl(va_v4i32, vb_v4i32); + vb_v4i32 = m_ir_builder->CreateSub(m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x20)), vb_v4i32); + auto tmp2_v4i32 = m_ir_builder->CreateLShr(va_v4i32, vb_v4i32); + auto res_v4i32 = m_ir_builder->CreateOr(tmp1_v4i32, tmp2_v4i32); + SetVr(vd, res_v4i32); } void Compiler::VRSQRTEFP(u32 vd, u32 vb) { - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::sqrt, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); - res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse_rcp_ps), res_v4f32); - SetVr(vd, res_v4f32); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::sqrt, VectorType::get(m_ir_builder->getFloatTy(), 4)), vb_v4f32); + res_v4f32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse_rcp_ps), res_v4f32); + SetVr(vd, res_v4f32); } void Compiler::VSEL(u32 vd, u32 va, u32 vb, u32 vc) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto vc_v4i32 = GetVrAsIntVec(vc, 32); - vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, vc_v4i32); - vc_v4i32 = m_ir_builder->CreateNot(vc_v4i32); - va_v4i32 = m_ir_builder->CreateAnd(va_v4i32, vc_v4i32); - auto vd_v4i32 = m_ir_builder->CreateOr(va_v4i32, vb_v4i32); - SetVr(vd, vd_v4i32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto vc_v4i32 = GetVrAsIntVec(vc, 32); + vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, vc_v4i32); + vc_v4i32 = m_ir_builder->CreateNot(vc_v4i32); + va_v4i32 = m_ir_builder->CreateAnd(va_v4i32, vc_v4i32); + auto vd_v4i32 = m_ir_builder->CreateOr(va_v4i32, vb_v4i32); + SetVr(vd, vd_v4i32); } void Compiler::VSL(u32 vd, u32 va, u32 vb) { - auto va_i128 = GetVr(va); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto sh_i8 = m_ir_builder->CreateExtractElement(vb_v16i8, m_ir_builder->getInt8(0)); - sh_i8 = m_ir_builder->CreateAnd(sh_i8, 0x7); - auto sh_i128 = m_ir_builder->CreateZExt(sh_i8, m_ir_builder->getIntNTy(128)); - va_i128 = m_ir_builder->CreateShl(va_i128, sh_i128); - SetVr(vd, va_i128); + auto va_i128 = GetVr(va); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto sh_i8 = m_ir_builder->CreateExtractElement(vb_v16i8, m_ir_builder->getInt8(0)); + sh_i8 = m_ir_builder->CreateAnd(sh_i8, 0x7); + auto sh_i128 = m_ir_builder->CreateZExt(sh_i8, m_ir_builder->getIntNTy(128)); + va_i128 = m_ir_builder->CreateShl(va_i128, sh_i128); + SetVr(vd, va_i128); } void Compiler::VSLB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - vb_v16i8 = m_ir_builder->CreateAnd(vb_v16i8, m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(0x7))); - auto res_v16i8 = m_ir_builder->CreateShl(va_v16i8, vb_v16i8); - SetVr(vd, res_v16i8); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + vb_v16i8 = m_ir_builder->CreateAnd(vb_v16i8, m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(0x7))); + auto res_v16i8 = m_ir_builder->CreateShl(va_v16i8, vb_v16i8); + SetVr(vd, res_v16i8); } void Compiler::VSLDOI(u32 vd, u32 va, u32 vb, u32 sh) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - sh = 16 - sh; - u32 mask_v16i32[16] = { sh, sh + 1, sh + 2, sh + 3, sh + 4, sh + 5, sh + 6, sh + 7, sh + 8, sh + 9, sh + 10, sh + 11, sh + 12, sh + 13, sh + 14, sh + 15 }; - auto vd_v16i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, va_v16i8, ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32)); - SetVr(vd, vd_v16i8); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + sh = 16 - sh; + u32 mask_v16i32[16] = { sh, sh + 1, sh + 2, sh + 3, sh + 4, sh + 5, sh + 6, sh + 7, sh + 8, sh + 9, sh + 10, sh + 11, sh + 12, sh + 13, sh + 14, sh + 15 }; + auto vd_v16i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, va_v16i8, ConstantDataVector::get(m_ir_builder->getContext(), mask_v16i32)); + SetVr(vd, vd_v16i8); } void Compiler::VSLH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - vb_v8i16 = m_ir_builder->CreateAnd(vb_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xF))); - auto res_v8i16 = m_ir_builder->CreateShl(va_v8i16, vb_v8i16); - SetVr(vd, res_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + vb_v8i16 = m_ir_builder->CreateAnd(vb_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xF))); + auto res_v8i16 = m_ir_builder->CreateShl(va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); } void Compiler::VSLO(u32 vd, u32 va, u32 vb) { - auto va_i128 = GetVr(va); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto sh_i8 = m_ir_builder->CreateExtractElement(vb_v16i8, m_ir_builder->getInt8(0)); - sh_i8 = m_ir_builder->CreateAnd(sh_i8, 0x78); - auto sh_i128 = m_ir_builder->CreateZExt(sh_i8, m_ir_builder->getIntNTy(128)); - va_i128 = m_ir_builder->CreateShl(va_i128, sh_i128); - SetVr(vd, va_i128); + auto va_i128 = GetVr(va); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto sh_i8 = m_ir_builder->CreateExtractElement(vb_v16i8, m_ir_builder->getInt8(0)); + sh_i8 = m_ir_builder->CreateAnd(sh_i8, 0x78); + auto sh_i128 = m_ir_builder->CreateZExt(sh_i8, m_ir_builder->getIntNTy(128)); + va_i128 = m_ir_builder->CreateShl(va_i128, sh_i128); + SetVr(vd, va_i128); } void Compiler::VSLW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x1F))); - auto res_v4i32 = m_ir_builder->CreateShl(va_v4i32, vb_v4i32); - SetVr(vd, res_v4i32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x1F))); + auto res_v4i32 = m_ir_builder->CreateShl(va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); } void Compiler::VSPLTB(u32 vd, u32 uimm5, u32 vb) { - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto undef_v16i8 = UndefValue::get(VectorType::get(m_ir_builder->getInt8Ty(), 16)); - auto mask_v16i32 = m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt32(15 - uimm5)); - auto res_v16i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, undef_v16i8, mask_v16i32); - SetVr(vd, res_v16i8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto undef_v16i8 = UndefValue::get(VectorType::get(m_ir_builder->getInt8Ty(), 16)); + auto mask_v16i32 = m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt32(15 - uimm5)); + auto res_v16i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, undef_v16i8, mask_v16i32); + SetVr(vd, res_v16i8); } void Compiler::VSPLTH(u32 vd, u32 uimm5, u32 vb) { - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto undef_v8i16 = UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)); - auto mask_v8i32 = m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt32(7 - uimm5)); - auto res_v8i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, undef_v8i16, mask_v8i32); - SetVr(vd, res_v8i16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto undef_v8i16 = UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)); + auto mask_v8i32 = m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt32(7 - uimm5)); + auto res_v8i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, undef_v8i16, mask_v8i32); + SetVr(vd, res_v8i16); } void Compiler::VSPLTISB(u32 vd, s32 simm5) { - auto vd_v16i8 = m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8((s8)simm5)); - SetVr(vd, vd_v16i8); + auto vd_v16i8 = m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8((s8)simm5)); + SetVr(vd, vd_v16i8); } void Compiler::VSPLTISH(u32 vd, s32 simm5) { - auto vd_v8i16 = m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16((s16)simm5)); - SetVr(vd, vd_v8i16); + auto vd_v8i16 = m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16((s16)simm5)); + SetVr(vd, vd_v8i16); } void Compiler::VSPLTISW(u32 vd, s32 simm5) { - auto vd_v4i32 = m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32((s32)simm5)); - SetVr(vd, vd_v4i32); + auto vd_v4i32 = m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32((s32)simm5)); + SetVr(vd, vd_v4i32); } void Compiler::VSPLTW(u32 vd, u32 uimm5, u32 vb) { - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto undef_v4i32 = UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 4)); - auto mask_v4i32 = m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3 - uimm5)); - auto res_v4i32 = m_ir_builder->CreateShuffleVector(vb_v4i32, undef_v4i32, mask_v4i32); - SetVr(vd, res_v4i32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto undef_v4i32 = UndefValue::get(VectorType::get(m_ir_builder->getInt32Ty(), 4)); + auto mask_v4i32 = m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3 - uimm5)); + auto res_v4i32 = m_ir_builder->CreateShuffleVector(vb_v4i32, undef_v4i32, mask_v4i32); + SetVr(vd, res_v4i32); } void Compiler::VSR(u32 vd, u32 va, u32 vb) { - auto va_i128 = GetVr(va); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto sh_i8 = m_ir_builder->CreateExtractElement(vb_v16i8, m_ir_builder->getInt8(0)); - sh_i8 = m_ir_builder->CreateAnd(sh_i8, 0x7); - auto sh_i128 = m_ir_builder->CreateZExt(sh_i8, m_ir_builder->getIntNTy(128)); - va_i128 = m_ir_builder->CreateLShr(va_i128, sh_i128); - SetVr(vd, va_i128); + auto va_i128 = GetVr(va); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto sh_i8 = m_ir_builder->CreateExtractElement(vb_v16i8, m_ir_builder->getInt8(0)); + sh_i8 = m_ir_builder->CreateAnd(sh_i8, 0x7); + auto sh_i128 = m_ir_builder->CreateZExt(sh_i8, m_ir_builder->getIntNTy(128)); + va_i128 = m_ir_builder->CreateLShr(va_i128, sh_i128); + SetVr(vd, va_i128); } void Compiler::VSRAB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - vb_v16i8 = m_ir_builder->CreateAnd(vb_v16i8, m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(0x7))); - auto res_v16i8 = m_ir_builder->CreateAShr(va_v16i8, vb_v16i8); - SetVr(vd, res_v16i8); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + vb_v16i8 = m_ir_builder->CreateAnd(vb_v16i8, m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(0x7))); + auto res_v16i8 = m_ir_builder->CreateAShr(va_v16i8, vb_v16i8); + SetVr(vd, res_v16i8); } void Compiler::VSRAH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - vb_v8i16 = m_ir_builder->CreateAnd(vb_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xF))); - auto res_v8i16 = m_ir_builder->CreateAShr(va_v8i16, vb_v8i16); - SetVr(vd, res_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + vb_v8i16 = m_ir_builder->CreateAnd(vb_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xF))); + auto res_v8i16 = m_ir_builder->CreateAShr(va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); } void Compiler::VSRAW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x1F))); - auto res_v4i32 = m_ir_builder->CreateAShr(va_v4i32, vb_v4i32); - SetVr(vd, res_v4i32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x1F))); + auto res_v4i32 = m_ir_builder->CreateAShr(va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); } void Compiler::VSRB(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - vb_v16i8 = m_ir_builder->CreateAnd(vb_v16i8, m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(0x7))); - auto res_v16i8 = m_ir_builder->CreateLShr(va_v16i8, vb_v16i8); - SetVr(vd, res_v16i8); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + vb_v16i8 = m_ir_builder->CreateAnd(vb_v16i8, m_ir_builder->CreateVectorSplat(16, m_ir_builder->getInt8(0x7))); + auto res_v16i8 = m_ir_builder->CreateLShr(va_v16i8, vb_v16i8); + SetVr(vd, res_v16i8); } void Compiler::VSRH(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - vb_v8i16 = m_ir_builder->CreateAnd(vb_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xF))); - auto res_v8i16 = m_ir_builder->CreateLShr(va_v8i16, vb_v8i16); - SetVr(vd, res_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + vb_v8i16 = m_ir_builder->CreateAnd(vb_v8i16, m_ir_builder->CreateVectorSplat(8, m_ir_builder->getInt16(0xF))); + auto res_v8i16 = m_ir_builder->CreateLShr(va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); } void Compiler::VSRO(u32 vd, u32 va, u32 vb) { - auto va_i128 = GetVr(va); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto sh_i8 = m_ir_builder->CreateExtractElement(vb_v16i8, m_ir_builder->getInt8(0)); - sh_i8 = m_ir_builder->CreateAnd(sh_i8, 0x78); - auto sh_i128 = m_ir_builder->CreateZExt(sh_i8, m_ir_builder->getIntNTy(128)); - va_i128 = m_ir_builder->CreateLShr(va_i128, sh_i128); - SetVr(vd, va_i128); + auto va_i128 = GetVr(va); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto sh_i8 = m_ir_builder->CreateExtractElement(vb_v16i8, m_ir_builder->getInt8(0)); + sh_i8 = m_ir_builder->CreateAnd(sh_i8, 0x78); + auto sh_i128 = m_ir_builder->CreateZExt(sh_i8, m_ir_builder->getIntNTy(128)); + va_i128 = m_ir_builder->CreateLShr(va_i128, sh_i128); + SetVr(vd, va_i128); } void Compiler::VSRW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x1F))); - auto res_v4i32 = m_ir_builder->CreateLShr(va_v4i32, vb_v4i32); - SetVr(vd, res_v4i32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + vb_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x1F))); + auto res_v4i32 = m_ir_builder->CreateLShr(va_v4i32, vb_v4i32); + SetVr(vd, res_v4i32); } void Compiler::VSUBCUW(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto cmpv4i1 = m_ir_builder->CreateICmpUGE(va_v4i32, vb_v4i32); - auto cmpv4i32 = m_ir_builder->CreateZExt(cmpv4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, cmpv4i32); + auto cmpv4i1 = m_ir_builder->CreateICmpUGE(va_v4i32, vb_v4i32); + auto cmpv4i32 = m_ir_builder->CreateZExt(cmpv4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, cmpv4i32); } void Compiler::VSUBFP(u32 vd, u32 va, u32 vb) { - auto va_v4f32 = GetVrAsFloatVec(va); - auto vb_v4f32 = GetVrAsFloatVec(vb); - auto diff_v4f32 = m_ir_builder->CreateFSub(va_v4f32, vb_v4f32); - SetVr(vd, diff_v4f32); + auto va_v4f32 = GetVrAsFloatVec(va); + auto vb_v4f32 = GetVrAsFloatVec(vb); + auto diff_v4f32 = m_ir_builder->CreateFSub(va_v4f32, vb_v4f32); + SetVr(vd, diff_v4f32); } void Compiler::VSUBSBS(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto diff_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_psubs_b), va_v16i8, vb_v16i8); - SetVr(vd, diff_v16i8); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto diff_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_psubs_b), va_v16i8, vb_v16i8); + SetVr(vd, diff_v16i8); - // TODO: Set VSCR.SAT + // TODO: Set VSCR.SAT } void Compiler::VSUBSHS(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto diff_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_psubs_w), va_v8i16, vb_v8i16); - SetVr(vd, diff_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto diff_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_psubs_w), va_v8i16, vb_v8i16); + SetVr(vd, diff_v8i16); - // TODO: Set VSCR.SAT + // TODO: Set VSCR.SAT } void Compiler::VSUBSWS(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); - // See the comments for VADDSWS for a detailed description of how this works + // See the comments for VADDSWS for a detailed description of how this works - // Find the result in case of an overflow - auto tmp1_v4i32 = m_ir_builder->CreateLShr(va_v4i32, 31); - tmp1_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x7FFFFFFF))); - auto tmp1_v16i8 = m_ir_builder->CreateBitCast(tmp1_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + // Find the result in case of an overflow + auto tmp1_v4i32 = m_ir_builder->CreateLShr(va_v4i32, 31); + tmp1_v4i32 = m_ir_builder->CreateAdd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x7FFFFFFF))); + auto tmp1_v16i8 = m_ir_builder->CreateBitCast(tmp1_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - // Find the elements that can overflow (elements with opposite sign bits) - auto tmp2_v4i32 = m_ir_builder->CreateXor(va_v4i32, vb_v4i32); + // Find the elements that can overflow (elements with opposite sign bits) + auto tmp2_v4i32 = m_ir_builder->CreateXor(va_v4i32, vb_v4i32); - // Perform the sub - auto diff_v4i32 = m_ir_builder->CreateSub(va_v4i32, vb_v4i32); - auto diff_v16i8 = m_ir_builder->CreateBitCast(diff_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + // Perform the sub + auto diff_v4i32 = m_ir_builder->CreateSub(va_v4i32, vb_v4i32); + auto diff_v16i8 = m_ir_builder->CreateBitCast(diff_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - // Find the elements that overflowed - auto tmp3_v4i32 = m_ir_builder->CreateXor(va_v4i32, diff_v4i32); - tmp3_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, tmp3_v4i32); - tmp3_v4i32 = m_ir_builder->CreateAShr(tmp3_v4i32, 31); - auto tmp3_v16i8 = m_ir_builder->CreateBitCast(tmp3_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); + // Find the elements that overflowed + auto tmp3_v4i32 = m_ir_builder->CreateXor(va_v4i32, diff_v4i32); + tmp3_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, tmp3_v4i32); + tmp3_v4i32 = m_ir_builder->CreateAShr(tmp3_v4i32, 31); + auto tmp3_v16i8 = m_ir_builder->CreateBitCast(tmp3_v4i32, VectorType::get(m_ir_builder->getInt8Ty(), 16)); - // tmp4 is equal to 0xFFFFFFFF if an overflow occured and 0x00000000 otherwise. - auto res_v16i8 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pblendvb), diff_v16i8, tmp1_v16i8, tmp3_v16i8); - SetVr(vd, res_v16i8); + // tmp4 is equal to 0xFFFFFFFF if an overflow occured and 0x00000000 otherwise. + auto res_v16i8 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse41_pblendvb), diff_v16i8, tmp1_v16i8, tmp3_v16i8); + SetVr(vd, res_v16i8); - // TODO: Set SAT + // TODO: Set SAT } void Compiler::VSUBUBM(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto diff_v16i8 = m_ir_builder->CreateSub(va_v16i8, vb_v16i8); - SetVr(vd, diff_v16i8); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto diff_v16i8 = m_ir_builder->CreateSub(va_v16i8, vb_v16i8); + SetVr(vd, diff_v16i8); } void Compiler::VSUBUBS(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - auto diff_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_psubus_b), va_v16i8, vb_v16i8); - SetVr(vd, diff_v16i8); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + auto diff_v16i8 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_psubus_b), va_v16i8, vb_v16i8); + SetVr(vd, diff_v16i8); - // TODO: Set SAT + // TODO: Set SAT } void Compiler::VSUBUHM(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto diff_v8i16 = m_ir_builder->CreateSub(va_v8i16, vb_v8i16); - SetVr(vd, diff_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto diff_v8i16 = m_ir_builder->CreateSub(va_v8i16, vb_v8i16); + SetVr(vd, diff_v8i16); } void Compiler::VSUBUHS(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto diff_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_psubus_w), va_v8i16, vb_v8i16); - SetVr(vd, diff_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto diff_v8i16 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_psubus_w), va_v8i16, vb_v8i16); + SetVr(vd, diff_v8i16); - // TODO: Set SAT + // TODO: Set SAT } void Compiler::VSUBUWM(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto diff_v4i32 = m_ir_builder->CreateSub(va_v4i32, vb_v4i32); - SetVr(vd, diff_v4i32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto diff_v4i32 = m_ir_builder->CreateSub(va_v4i32, vb_v4i32); + SetVr(vd, diff_v4i32); } void Compiler::VSUBUWS(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto diff_v4i32 = m_ir_builder->CreateSub(va_v4i32, vb_v4i32); - auto cmp_v4i1 = m_ir_builder->CreateICmpULE(diff_v4i32, va_v4i32); - auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - auto res_v4i32 = m_ir_builder->CreateAnd(diff_v4i32, cmp_v4i32); - SetVr(vd, res_v4i32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto diff_v4i32 = m_ir_builder->CreateSub(va_v4i32, vb_v4i32); + auto cmp_v4i1 = m_ir_builder->CreateICmpULE(diff_v4i32, va_v4i32); + auto cmp_v4i32 = m_ir_builder->CreateSExt(cmp_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + auto res_v4i32 = m_ir_builder->CreateAnd(diff_v4i32, cmp_v4i32); + SetVr(vd, res_v4i32); - // TODO: Set SAT + // TODO: Set SAT } void Compiler::VSUMSWS(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); - auto res_i32 = m_ir_builder->CreateExtractElement(vb_v4i32, m_ir_builder->getInt32(3)); - auto res_i64 = m_ir_builder->CreateSExt(res_i32, m_ir_builder->getInt64Ty()); - for (auto i = 0; i < 4; i++) { - auto va_i32 = m_ir_builder->CreateExtractElement(va_v4i32, m_ir_builder->getInt32(i)); - auto va_i64 = m_ir_builder->CreateSExt(va_i32, m_ir_builder->getInt64Ty()); - res_i64 = m_ir_builder->CreateAdd(res_i64, va_i64); - } + auto res_i32 = m_ir_builder->CreateExtractElement(vb_v4i32, m_ir_builder->getInt32(3)); + auto res_i64 = m_ir_builder->CreateSExt(res_i32, m_ir_builder->getInt64Ty()); + for (auto i = 0; i < 4; i++) { + auto va_i32 = m_ir_builder->CreateExtractElement(va_v4i32, m_ir_builder->getInt32(i)); + auto va_i64 = m_ir_builder->CreateSExt(va_i32, m_ir_builder->getInt64Ty()); + res_i64 = m_ir_builder->CreateAdd(res_i64, va_i64); + } - auto gt_i1 = m_ir_builder->CreateICmpSGT(res_i64, m_ir_builder->getInt64(0x7FFFFFFFull)); - auto lt_i1 = m_ir_builder->CreateICmpSLT(res_i64, m_ir_builder->getInt64(0xFFFFFFFF80000000ull)); - res_i64 = m_ir_builder->CreateSelect(gt_i1, m_ir_builder->getInt64(0x7FFFFFFFull), res_i64); - res_i64 = m_ir_builder->CreateSelect(lt_i1, m_ir_builder->getInt64(0xFFFFFFFF80000000ull), res_i64); - auto res_i128 = m_ir_builder->CreateZExt(res_i64, m_ir_builder->getIntNTy(128)); + auto gt_i1 = m_ir_builder->CreateICmpSGT(res_i64, m_ir_builder->getInt64(0x7FFFFFFFull)); + auto lt_i1 = m_ir_builder->CreateICmpSLT(res_i64, m_ir_builder->getInt64(0xFFFFFFFF80000000ull)); + res_i64 = m_ir_builder->CreateSelect(gt_i1, m_ir_builder->getInt64(0x7FFFFFFFull), res_i64); + res_i64 = m_ir_builder->CreateSelect(lt_i1, m_ir_builder->getInt64(0xFFFFFFFF80000000ull), res_i64); + auto res_i128 = m_ir_builder->CreateZExt(res_i64, m_ir_builder->getIntNTy(128)); - SetVr(vd, res_i128); + SetVr(vd, res_i128); - // TODO: Set VSCR.SAT + // TODO: Set VSCR.SAT } void Compiler::VSUM2SWS(u32 vd, u32 va, u32 vb) { - auto va_v4i32 = GetVrAsIntVec(va, 32); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto va_v4i32 = GetVrAsIntVec(va, 32); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); - u32 mask1_v2i32[2] = { 0, 2 }; - u32 mask2_v2i32[2] = { 1, 3 }; - auto va_v4i64 = m_ir_builder->CreateSExt(va_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); - auto va1_v2i64 = m_ir_builder->CreateShuffleVector(va_v4i64, UndefValue::get(va_v4i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v2i32)); - auto va2_v2i64 = m_ir_builder->CreateShuffleVector(va_v4i64, UndefValue::get(va_v4i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v2i32)); - auto vb_v4i64 = m_ir_builder->CreateSExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); - auto vb_v2i64 = m_ir_builder->CreateShuffleVector(vb_v4i64, UndefValue::get(vb_v4i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v2i32)); + u32 mask1_v2i32[2] = { 0, 2 }; + u32 mask2_v2i32[2] = { 1, 3 }; + auto va_v4i64 = m_ir_builder->CreateSExt(va_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); + auto va1_v2i64 = m_ir_builder->CreateShuffleVector(va_v4i64, UndefValue::get(va_v4i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v2i32)); + auto va2_v2i64 = m_ir_builder->CreateShuffleVector(va_v4i64, UndefValue::get(va_v4i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v2i32)); + auto vb_v4i64 = m_ir_builder->CreateSExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); + auto vb_v2i64 = m_ir_builder->CreateShuffleVector(vb_v4i64, UndefValue::get(vb_v4i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v2i32)); - auto res_v2i64 = m_ir_builder->CreateAdd(va1_v2i64, va2_v2i64); - res_v2i64 = m_ir_builder->CreateAdd(res_v2i64, vb_v2i64); - auto gt_v2i1 = m_ir_builder->CreateICmpSGT(res_v2i64, m_ir_builder->CreateVectorSplat(2, m_ir_builder->getInt64(0x7FFFFFFFull))); - auto lt_v2i1 = m_ir_builder->CreateICmpSLT(res_v2i64, m_ir_builder->CreateVectorSplat(2, m_ir_builder->getInt64(0xFFFFFFFF80000000ull))); - res_v2i64 = m_ir_builder->CreateSelect(gt_v2i1, m_ir_builder->CreateVectorSplat(2, m_ir_builder->getInt64(0x7FFFFFFFull)), res_v2i64); - res_v2i64 = m_ir_builder->CreateSelect(lt_v2i1, m_ir_builder->CreateVectorSplat(2, m_ir_builder->getInt64(0x80000000ull)), res_v2i64); - SetVr(vd, res_v2i64); + auto res_v2i64 = m_ir_builder->CreateAdd(va1_v2i64, va2_v2i64); + res_v2i64 = m_ir_builder->CreateAdd(res_v2i64, vb_v2i64); + auto gt_v2i1 = m_ir_builder->CreateICmpSGT(res_v2i64, m_ir_builder->CreateVectorSplat(2, m_ir_builder->getInt64(0x7FFFFFFFull))); + auto lt_v2i1 = m_ir_builder->CreateICmpSLT(res_v2i64, m_ir_builder->CreateVectorSplat(2, m_ir_builder->getInt64(0xFFFFFFFF80000000ull))); + res_v2i64 = m_ir_builder->CreateSelect(gt_v2i1, m_ir_builder->CreateVectorSplat(2, m_ir_builder->getInt64(0x7FFFFFFFull)), res_v2i64); + res_v2i64 = m_ir_builder->CreateSelect(lt_v2i1, m_ir_builder->CreateVectorSplat(2, m_ir_builder->getInt64(0x80000000ull)), res_v2i64); + SetVr(vd, res_v2i64); - // TODO: Set VSCR.SAT + // TODO: Set VSCR.SAT } void Compiler::VSUM4SBS(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); - u32 mask1_v4i32[4] = { 0, 4, 8, 12 }; - u32 mask2_v4i32[4] = { 1, 5, 9, 13 }; - u32 mask3_v4i32[4] = { 2, 6, 10, 14 }; - u32 mask4_v4i32[4] = { 3, 7, 11, 15 }; - auto va_v16i64 = m_ir_builder->CreateSExt(va_v16i8, VectorType::get(m_ir_builder->getInt64Ty(), 16)); - auto va1_v4i64 = m_ir_builder->CreateShuffleVector(va_v16i64, UndefValue::get(va_v16i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); - auto va2_v4i64 = m_ir_builder->CreateShuffleVector(va_v16i64, UndefValue::get(va_v16i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); - auto va3_v4i64 = m_ir_builder->CreateShuffleVector(va_v16i64, UndefValue::get(va_v16i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask3_v4i32)); - auto va4_v4i64 = m_ir_builder->CreateShuffleVector(va_v16i64, UndefValue::get(va_v16i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask4_v4i32)); - auto vb_v4i64 = m_ir_builder->CreateSExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); + u32 mask1_v4i32[4] = { 0, 4, 8, 12 }; + u32 mask2_v4i32[4] = { 1, 5, 9, 13 }; + u32 mask3_v4i32[4] = { 2, 6, 10, 14 }; + u32 mask4_v4i32[4] = { 3, 7, 11, 15 }; + auto va_v16i64 = m_ir_builder->CreateSExt(va_v16i8, VectorType::get(m_ir_builder->getInt64Ty(), 16)); + auto va1_v4i64 = m_ir_builder->CreateShuffleVector(va_v16i64, UndefValue::get(va_v16i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); + auto va2_v4i64 = m_ir_builder->CreateShuffleVector(va_v16i64, UndefValue::get(va_v16i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); + auto va3_v4i64 = m_ir_builder->CreateShuffleVector(va_v16i64, UndefValue::get(va_v16i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask3_v4i32)); + auto va4_v4i64 = m_ir_builder->CreateShuffleVector(va_v16i64, UndefValue::get(va_v16i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask4_v4i32)); + auto vb_v4i64 = m_ir_builder->CreateSExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); - auto res_v4i64 = m_ir_builder->CreateAdd(va1_v4i64, va2_v4i64); - res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, va3_v4i64); - res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, va4_v4i64); - res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, vb_v4i64); - auto gt_v4i1 = m_ir_builder->CreateICmpSGT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x7FFFFFFFull))); - auto lt_v4i1 = m_ir_builder->CreateICmpSLT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0xFFFFFFFF80000000ull))); - res_v4i64 = m_ir_builder->CreateSelect(gt_v4i1, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x7FFFFFFFull)), res_v4i64); - res_v4i64 = m_ir_builder->CreateSelect(lt_v4i1, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x80000000ull)), res_v4i64); - auto res_v4i32 = m_ir_builder->CreateTrunc(res_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, res_v4i32); + auto res_v4i64 = m_ir_builder->CreateAdd(va1_v4i64, va2_v4i64); + res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, va3_v4i64); + res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, va4_v4i64); + res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, vb_v4i64); + auto gt_v4i1 = m_ir_builder->CreateICmpSGT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x7FFFFFFFull))); + auto lt_v4i1 = m_ir_builder->CreateICmpSLT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0xFFFFFFFF80000000ull))); + res_v4i64 = m_ir_builder->CreateSelect(gt_v4i1, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x7FFFFFFFull)), res_v4i64); + res_v4i64 = m_ir_builder->CreateSelect(lt_v4i1, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x80000000ull)), res_v4i64); + auto res_v4i32 = m_ir_builder->CreateTrunc(res_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, res_v4i32); - // TODO: Set VSCR.SAT + // TODO: Set VSCR.SAT } void Compiler::VSUM4SHS(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); - u32 mask1_v4i32[4] = { 0, 2, 4, 6 }; - u32 mask2_v4i32[4] = { 1, 3, 5, 7 }; - auto va_v8i64 = m_ir_builder->CreateSExt(va_v8i16, VectorType::get(m_ir_builder->getInt64Ty(), 8)); - auto va1_v4i64 = m_ir_builder->CreateShuffleVector(va_v8i64, UndefValue::get(va_v8i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); - auto va2_v4i64 = m_ir_builder->CreateShuffleVector(va_v8i64, UndefValue::get(va_v8i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); - auto vb_v4i64 = m_ir_builder->CreateSExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); + u32 mask1_v4i32[4] = { 0, 2, 4, 6 }; + u32 mask2_v4i32[4] = { 1, 3, 5, 7 }; + auto va_v8i64 = m_ir_builder->CreateSExt(va_v8i16, VectorType::get(m_ir_builder->getInt64Ty(), 8)); + auto va1_v4i64 = m_ir_builder->CreateShuffleVector(va_v8i64, UndefValue::get(va_v8i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); + auto va2_v4i64 = m_ir_builder->CreateShuffleVector(va_v8i64, UndefValue::get(va_v8i64->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); + auto vb_v4i64 = m_ir_builder->CreateSExt(vb_v4i32, VectorType::get(m_ir_builder->getInt64Ty(), 4)); - auto res_v4i64 = m_ir_builder->CreateAdd(va1_v4i64, va2_v4i64); - res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, vb_v4i64); - auto gt_v4i1 = m_ir_builder->CreateICmpSGT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x7FFFFFFFull))); - auto lt_v4i1 = m_ir_builder->CreateICmpSLT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0xFFFFFFFF80000000ull))); - res_v4i64 = m_ir_builder->CreateSelect(gt_v4i1, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x7FFFFFFFull)), res_v4i64); - res_v4i64 = m_ir_builder->CreateSelect(lt_v4i1, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x80000000ull)), res_v4i64); - auto res_v4i32 = m_ir_builder->CreateTrunc(res_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, res_v4i32); + auto res_v4i64 = m_ir_builder->CreateAdd(va1_v4i64, va2_v4i64); + res_v4i64 = m_ir_builder->CreateAdd(res_v4i64, vb_v4i64); + auto gt_v4i1 = m_ir_builder->CreateICmpSGT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x7FFFFFFFull))); + auto lt_v4i1 = m_ir_builder->CreateICmpSLT(res_v4i64, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0xFFFFFFFF80000000ull))); + res_v4i64 = m_ir_builder->CreateSelect(gt_v4i1, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x7FFFFFFFull)), res_v4i64); + res_v4i64 = m_ir_builder->CreateSelect(lt_v4i1, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt64(0x80000000ull)), res_v4i64); + auto res_v4i32 = m_ir_builder->CreateTrunc(res_v4i64, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, res_v4i32); - // TODO: Set VSCR.SAT + // TODO: Set VSCR.SAT } void Compiler::VSUM4UBS(u32 vd, u32 va, u32 vb) { - auto va_v16i8 = GetVrAsIntVec(va, 8); - auto vb_v4i32 = GetVrAsIntVec(vb, 32); + auto va_v16i8 = GetVrAsIntVec(va, 8); + auto vb_v4i32 = GetVrAsIntVec(vb, 32); - u32 mask1_v4i32[4] = { 0, 4, 8, 12 }; - u32 mask2_v4i32[4] = { 1, 5, 9, 13 }; - u32 mask3_v4i32[4] = { 2, 6, 10, 14 }; - u32 mask4_v4i32[4] = { 3, 7, 11, 15 }; - auto va1_v4i8 = m_ir_builder->CreateShuffleVector(va_v16i8, UndefValue::get(va_v16i8->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); - auto va1_v4i32 = m_ir_builder->CreateZExt(va1_v4i8, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - auto va2_v4i8 = m_ir_builder->CreateShuffleVector(va_v16i8, UndefValue::get(va_v16i8->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); - auto va2_v4i32 = m_ir_builder->CreateZExt(va2_v4i8, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - auto va3_v4i8 = m_ir_builder->CreateShuffleVector(va_v16i8, UndefValue::get(va_v16i8->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask3_v4i32)); - auto va3_v4i32 = m_ir_builder->CreateZExt(va3_v4i8, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - auto va4_v4i8 = m_ir_builder->CreateShuffleVector(va_v16i8, UndefValue::get(va_v16i8->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask4_v4i32)); - auto va4_v4i32 = m_ir_builder->CreateZExt(va4_v4i8, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + u32 mask1_v4i32[4] = { 0, 4, 8, 12 }; + u32 mask2_v4i32[4] = { 1, 5, 9, 13 }; + u32 mask3_v4i32[4] = { 2, 6, 10, 14 }; + u32 mask4_v4i32[4] = { 3, 7, 11, 15 }; + auto va1_v4i8 = m_ir_builder->CreateShuffleVector(va_v16i8, UndefValue::get(va_v16i8->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask1_v4i32)); + auto va1_v4i32 = m_ir_builder->CreateZExt(va1_v4i8, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + auto va2_v4i8 = m_ir_builder->CreateShuffleVector(va_v16i8, UndefValue::get(va_v16i8->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask2_v4i32)); + auto va2_v4i32 = m_ir_builder->CreateZExt(va2_v4i8, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + auto va3_v4i8 = m_ir_builder->CreateShuffleVector(va_v16i8, UndefValue::get(va_v16i8->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask3_v4i32)); + auto va3_v4i32 = m_ir_builder->CreateZExt(va3_v4i8, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + auto va4_v4i8 = m_ir_builder->CreateShuffleVector(va_v16i8, UndefValue::get(va_v16i8->getType()), ConstantDataVector::get(m_ir_builder->getContext(), mask4_v4i32)); + auto va4_v4i32 = m_ir_builder->CreateZExt(va4_v4i8, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - auto res_v4i32 = m_ir_builder->CreateAdd(va1_v4i32, va2_v4i32); - res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, va3_v4i32); - res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, va4_v4i32); - res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vb_v4i32); - auto lt_v4i1 = m_ir_builder->CreateICmpULT(res_v4i32, vb_v4i32); - auto lt_v4i32 = m_ir_builder->CreateSExt(lt_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - res_v4i32 = m_ir_builder->CreateOr(lt_v4i32, res_v4i32); - SetVr(vd, res_v4i32); + auto res_v4i32 = m_ir_builder->CreateAdd(va1_v4i32, va2_v4i32); + res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, va3_v4i32); + res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, va4_v4i32); + res_v4i32 = m_ir_builder->CreateAdd(res_v4i32, vb_v4i32); + auto lt_v4i1 = m_ir_builder->CreateICmpULT(res_v4i32, vb_v4i32); + auto lt_v4i32 = m_ir_builder->CreateSExt(lt_v4i1, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + res_v4i32 = m_ir_builder->CreateOr(lt_v4i32, res_v4i32); + SetVr(vd, res_v4i32); - // TODO: Set VSCR.SAT + // TODO: Set VSCR.SAT } void Compiler::VUPKHPX(u32 vd, u32 vb) { - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - u32 mask_v8i32[8] = { 4, 4, 5, 5, 6, 6, 7, 7 }; - vb_v8i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + u32 mask_v8i32[8] = { 4, 4, 5, 5, 6, 6, 7, 7 }; + vb_v8i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); - auto vb_v4i32 = m_ir_builder->CreateBitCast(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - vb_v4i32 = m_ir_builder->CreateAShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(10))); - auto tmp1_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3))); - tmp1_v4i32 = m_ir_builder->CreateAnd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x00001F00))); - auto tmp2_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(6))); - tmp2_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x0000001F))); - auto res_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFF1F0000))); - res_v4i32 = m_ir_builder->CreateOr(res_v4i32, tmp1_v4i32); - res_v4i32 = m_ir_builder->CreateOr(res_v4i32, tmp2_v4i32); - SetVr(vd, res_v4i32); + auto vb_v4i32 = m_ir_builder->CreateBitCast(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + vb_v4i32 = m_ir_builder->CreateAShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(10))); + auto tmp1_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3))); + tmp1_v4i32 = m_ir_builder->CreateAnd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x00001F00))); + auto tmp2_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(6))); + tmp2_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x0000001F))); + auto res_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFF1F0000))); + res_v4i32 = m_ir_builder->CreateOr(res_v4i32, tmp1_v4i32); + res_v4i32 = m_ir_builder->CreateOr(res_v4i32, tmp2_v4i32); + SetVr(vd, res_v4i32); } void Compiler::VUPKHSB(u32 vd, u32 vb) { - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - u32 mask_v8i32[8] = { 8, 9, 10, 11, 12, 13, 14, 15 }; - auto vb_v8i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, UndefValue::get(VectorType::get(m_ir_builder->getInt8Ty(), 16)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); - auto res_v8i16 = m_ir_builder->CreateSExt(vb_v8i8, VectorType::get(m_ir_builder->getInt16Ty(), 8)); - SetVr(vd, res_v8i16); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + u32 mask_v8i32[8] = { 8, 9, 10, 11, 12, 13, 14, 15 }; + auto vb_v8i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, UndefValue::get(VectorType::get(m_ir_builder->getInt8Ty(), 16)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); + auto res_v8i16 = m_ir_builder->CreateSExt(vb_v8i8, VectorType::get(m_ir_builder->getInt16Ty(), 8)); + SetVr(vd, res_v8i16); } void Compiler::VUPKHSH(u32 vd, u32 vb) { - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - u32 mask_v4i32[4] = { 4, 5, 6, 7 }; - auto vb_v4i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v4i32)); - auto res_v4i32 = m_ir_builder->CreateSExt(vb_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, res_v4i32); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + u32 mask_v4i32[4] = { 4, 5, 6, 7 }; + auto vb_v4i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v4i32)); + auto res_v4i32 = m_ir_builder->CreateSExt(vb_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, res_v4i32); } void Compiler::VUPKLPX(u32 vd, u32 vb) { - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - u32 mask_v8i32[8] = { 0, 0, 1, 1, 2, 2, 3, 3 }; - vb_v8i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + u32 mask_v8i32[8] = { 0, 0, 1, 1, 2, 2, 3, 3 }; + vb_v8i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); - auto vb_v4i32 = m_ir_builder->CreateBitCast(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - vb_v4i32 = m_ir_builder->CreateAShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(10))); - auto tmp1_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3))); - tmp1_v4i32 = m_ir_builder->CreateAnd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x00001F00))); - auto tmp2_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(6))); - tmp2_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x0000001F))); - auto res_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFF1F0000))); - res_v4i32 = m_ir_builder->CreateOr(res_v4i32, tmp1_v4i32); - res_v4i32 = m_ir_builder->CreateOr(res_v4i32, tmp2_v4i32); - SetVr(vd, res_v4i32); + auto vb_v4i32 = m_ir_builder->CreateBitCast(vb_v8i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + vb_v4i32 = m_ir_builder->CreateAShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(10))); + auto tmp1_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(3))); + tmp1_v4i32 = m_ir_builder->CreateAnd(tmp1_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x00001F00))); + auto tmp2_v4i32 = m_ir_builder->CreateLShr(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(6))); + tmp2_v4i32 = m_ir_builder->CreateAnd(tmp2_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0x0000001F))); + auto res_v4i32 = m_ir_builder->CreateAnd(vb_v4i32, m_ir_builder->CreateVectorSplat(4, m_ir_builder->getInt32(0xFF1F0000))); + res_v4i32 = m_ir_builder->CreateOr(res_v4i32, tmp1_v4i32); + res_v4i32 = m_ir_builder->CreateOr(res_v4i32, tmp2_v4i32); + SetVr(vd, res_v4i32); } void Compiler::VUPKLSB(u32 vd, u32 vb) { - auto vb_v16i8 = GetVrAsIntVec(vb, 8); - u32 mask_v8i32[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; - auto vb_v8i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, UndefValue::get(VectorType::get(m_ir_builder->getInt8Ty(), 16)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); - auto res_v8i16 = m_ir_builder->CreateSExt(vb_v8i8, VectorType::get(m_ir_builder->getInt16Ty(), 8)); - SetVr(vd, res_v8i16); + auto vb_v16i8 = GetVrAsIntVec(vb, 8); + u32 mask_v8i32[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + auto vb_v8i8 = m_ir_builder->CreateShuffleVector(vb_v16i8, UndefValue::get(VectorType::get(m_ir_builder->getInt8Ty(), 16)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v8i32)); + auto res_v8i16 = m_ir_builder->CreateSExt(vb_v8i8, VectorType::get(m_ir_builder->getInt16Ty(), 8)); + SetVr(vd, res_v8i16); } void Compiler::VUPKLSH(u32 vd, u32 vb) { - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - u32 mask_v4i32[4] = { 0, 1, 2, 3 }; - auto vb_v4i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v4i32)); - auto res_v4i32 = m_ir_builder->CreateSExt(vb_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); - SetVr(vd, res_v4i32); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + u32 mask_v4i32[4] = { 0, 1, 2, 3 }; + auto vb_v4i16 = m_ir_builder->CreateShuffleVector(vb_v8i16, UndefValue::get(VectorType::get(m_ir_builder->getInt16Ty(), 8)), ConstantDataVector::get(m_ir_builder->getContext(), mask_v4i32)); + auto res_v4i32 = m_ir_builder->CreateSExt(vb_v4i16, VectorType::get(m_ir_builder->getInt32Ty(), 4)); + SetVr(vd, res_v4i32); } void Compiler::VXOR(u32 vd, u32 va, u32 vb) { - auto va_v8i16 = GetVrAsIntVec(va, 16); - auto vb_v8i16 = GetVrAsIntVec(vb, 16); - auto res_v8i16 = m_ir_builder->CreateXor(va_v8i16, vb_v8i16); - SetVr(vd, res_v8i16); + auto va_v8i16 = GetVrAsIntVec(va, 16); + auto vb_v8i16 = GetVrAsIntVec(vb, 16); + auto res_v8i16 = m_ir_builder->CreateXor(va_v8i16, vb_v8i16); + SetVr(vd, res_v8i16); } void Compiler::MULLI(u32 rd, u32 ra, s32 simm16) { - auto ra_i64 = GetGpr(ra); - auto res_i64 = m_ir_builder->CreateMul(ra_i64, m_ir_builder->getInt64((s64)simm16)); - SetGpr(rd, res_i64); + auto ra_i64 = GetGpr(ra); + auto res_i64 = m_ir_builder->CreateMul(ra_i64, m_ir_builder->getInt64((s64)simm16)); + SetGpr(rd, res_i64); } void Compiler::SUBFIC(u32 rd, u32 ra, s32 simm16) { - auto ra_i64 = GetGpr(ra); - ra_i64 = m_ir_builder->CreateNeg(ra_i64); - auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, m_ir_builder->getInt64((s64)simm16)); - auto diff_i64 = m_ir_builder->CreateExtractValue(res_s, { 0 }); - auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, { 1 }); - SetGpr(rd, diff_i64); - SetXerCa(carry_i1); + auto ra_i64 = GetGpr(ra); + ra_i64 = m_ir_builder->CreateNeg(ra_i64); + auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, m_ir_builder->getInt64((s64)simm16)); + auto diff_i64 = m_ir_builder->CreateExtractValue(res_s, { 0 }); + auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, { 1 }); + SetGpr(rd, diff_i64); + SetXerCa(carry_i1); } void Compiler::CMPLI(u32 crfd, u32 l, u32 ra, u32 uimm16) { - Value * ra_i64; - if (l == 0) { - ra_i64 = m_ir_builder->CreateZExt(GetGpr(ra, 32), m_ir_builder->getInt64Ty()); - } - else { - ra_i64 = GetGpr(ra); - } + Value * ra_i64; + if (l == 0) { + ra_i64 = m_ir_builder->CreateZExt(GetGpr(ra, 32), m_ir_builder->getInt64Ty()); + } + else { + ra_i64 = GetGpr(ra); + } - SetCrFieldUnsignedCmp(crfd, ra_i64, m_ir_builder->getInt64(uimm16)); + SetCrFieldUnsignedCmp(crfd, ra_i64, m_ir_builder->getInt64(uimm16)); } void Compiler::CMPI(u32 crfd, u32 l, u32 ra, s32 simm16) { - Value * ra_i64; - if (l == 0) { - ra_i64 = m_ir_builder->CreateSExt(GetGpr(ra, 32), m_ir_builder->getInt64Ty()); - } - else { - ra_i64 = GetGpr(ra); - } + Value * ra_i64; + if (l == 0) { + ra_i64 = m_ir_builder->CreateSExt(GetGpr(ra, 32), m_ir_builder->getInt64Ty()); + } + else { + ra_i64 = GetGpr(ra); + } - SetCrFieldSignedCmp(crfd, ra_i64, m_ir_builder->getInt64((s64)simm16)); + SetCrFieldSignedCmp(crfd, ra_i64, m_ir_builder->getInt64((s64)simm16)); } void Compiler::ADDIC(u32 rd, u32 ra, s32 simm16) { - auto ra_i64 = GetGpr(ra); - auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), m_ir_builder->getInt64((s64)simm16), ra_i64); - auto sum_i64 = m_ir_builder->CreateExtractValue(res_s, { 0 }); - auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, { 1 }); - SetGpr(rd, sum_i64); - SetXerCa(carry_i1); + auto ra_i64 = GetGpr(ra); + auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), m_ir_builder->getInt64((s64)simm16), ra_i64); + auto sum_i64 = m_ir_builder->CreateExtractValue(res_s, { 0 }); + auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, { 1 }); + SetGpr(rd, sum_i64); + SetXerCa(carry_i1); } void Compiler::ADDIC_(u32 rd, u32 ra, s32 simm16) { - ADDIC(rd, ra, simm16); - SetCrFieldSignedCmp(0, GetGpr(rd), m_ir_builder->getInt64(0)); + ADDIC(rd, ra, simm16); + SetCrFieldSignedCmp(0, GetGpr(rd), m_ir_builder->getInt64(0)); } void Compiler::ADDI(u32 rd, u32 ra, s32 simm16) { - if (ra == 0) { - SetGpr(rd, m_ir_builder->getInt64((s64)simm16)); - } - else { - auto ra_i64 = GetGpr(ra); - auto sum_i64 = m_ir_builder->CreateAdd(ra_i64, m_ir_builder->getInt64((s64)simm16)); - SetGpr(rd, sum_i64); - } + if (ra == 0) { + SetGpr(rd, m_ir_builder->getInt64((s64)simm16)); + } + else { + auto ra_i64 = GetGpr(ra); + auto sum_i64 = m_ir_builder->CreateAdd(ra_i64, m_ir_builder->getInt64((s64)simm16)); + SetGpr(rd, sum_i64); + } } void Compiler::ADDIS(u32 rd, u32 ra, s32 simm16) { - if (ra == 0) { - SetGpr(rd, m_ir_builder->getInt64((s64)simm16 << 16)); - } - else { - auto ra_i64 = GetGpr(ra); - auto sum_i64 = m_ir_builder->CreateAdd(ra_i64, m_ir_builder->getInt64((s64)simm16 << 16)); - SetGpr(rd, sum_i64); - } + if (ra == 0) { + SetGpr(rd, m_ir_builder->getInt64((s64)simm16 << 16)); + } + else { + auto ra_i64 = GetGpr(ra); + auto sum_i64 = m_ir_builder->CreateAdd(ra_i64, m_ir_builder->getInt64((s64)simm16 << 16)); + SetGpr(rd, sum_i64); + } } void Compiler::BC(u32 bo, u32 bi, s32 bd, u32 aa, u32 lk) { - auto target_i64 = m_ir_builder->getInt64(branchTarget(aa ? 0 : m_state.current_instruction_address, bd)); - auto target_i32 = m_ir_builder->CreateTrunc(target_i64, m_ir_builder->getInt32Ty()); - CreateBranch(CheckBranchCondition(bo, bi), target_i32, lk ? true : false); + auto target_i64 = m_ir_builder->getInt64(branchTarget(aa ? 0 : m_state.current_instruction_address, bd)); + auto target_i32 = m_ir_builder->CreateTrunc(target_i64, m_ir_builder->getInt32Ty()); + CreateBranch(CheckBranchCondition(bo, bi), target_i32, lk ? true : false); } void Compiler::HACK(u32 index) { - Call("execute_ppu_func_by_index", &execute_ppu_func_by_index, m_state.args[CompileTaskState::Args::State], m_ir_builder->getInt32(index & EIF_USE_BRANCH ? index : index & ~EIF_PERFORM_BLR)); - if (index & EIF_PERFORM_BLR || index & EIF_USE_BRANCH) { - auto lr_i32 = index & EIF_USE_BRANCH ? GetPc() : m_ir_builder->CreateTrunc(m_ir_builder->CreateAnd(GetLr(), ~0x3ULL), m_ir_builder->getInt32Ty()); - CreateBranch(nullptr, lr_i32, false, (index & EIF_USE_BRANCH) == 0); - } - // copied from Compiler::SC() - //auto ret_i1 = Call("PollStatus", m_poll_status_function, m_state.args[CompileTaskState::Args::State]); - //auto cmp_i1 = m_ir_builder->CreateICmpEQ(ret_i1, m_ir_builder->getInt1(true)); - //auto then_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "then_true"); - //auto merge_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "merge_true"); - //m_ir_builder->CreateCondBr(cmp_i1, then_bb, merge_bb); - //m_ir_builder->SetInsertPoint(then_bb); - //m_ir_builder->CreateRet(m_ir_builder->getInt32(0xFFFFFFFF)); - //m_ir_builder->SetInsertPoint(merge_bb); + Call("execute_ppu_func_by_index", &execute_ppu_func_by_index, m_state.args[CompileTaskState::Args::State], m_ir_builder->getInt32(index & EIF_USE_BRANCH ? index : index & ~EIF_PERFORM_BLR)); + if (index & EIF_PERFORM_BLR || index & EIF_USE_BRANCH) { + auto lr_i32 = index & EIF_USE_BRANCH ? GetPc() : m_ir_builder->CreateTrunc(m_ir_builder->CreateAnd(GetLr(), ~0x3ULL), m_ir_builder->getInt32Ty()); + CreateBranch(nullptr, lr_i32, false, (index & EIF_USE_BRANCH) == 0); + } + // copied from Compiler::SC() + //auto ret_i1 = Call("PollStatus", m_poll_status_function, m_state.args[CompileTaskState::Args::State]); + //auto cmp_i1 = m_ir_builder->CreateICmpEQ(ret_i1, m_ir_builder->getInt1(true)); + //auto then_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "then_true"); + //auto merge_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "merge_true"); + //m_ir_builder->CreateCondBr(cmp_i1, then_bb, merge_bb); + //m_ir_builder->SetInsertPoint(then_bb); + //m_ir_builder->CreateRet(m_ir_builder->getInt32(0xFFFFFFFF)); + //m_ir_builder->SetInsertPoint(merge_bb); } void Compiler::SC(u32 lev) { - switch (lev) { - case 0: - Call("SysCalls.DoSyscall", SysCalls::DoSyscall, m_state.args[CompileTaskState::Args::State], GetGpr(11)); - break; - case 3: - Call("PPUThread.FastStop", &PPUThread::fast_stop, m_state.args[CompileTaskState::Args::State]); - break; - default: - CompilationError(fmt::Format("SC %u", lev)); - break; - } + switch (lev) { + case 0: + Call("SysCalls.DoSyscall", SysCalls::DoSyscall, m_state.args[CompileTaskState::Args::State], GetGpr(11)); + break; + case 3: + Call("PPUThread.FastStop", &PPUThread::fast_stop, m_state.args[CompileTaskState::Args::State]); + break; + default: + CompilationError(fmt::Format("SC %u", lev)); + break; + } - auto ret_i1 = Call("PollStatus", m_poll_status_function, m_state.args[CompileTaskState::Args::State]); - auto cmp_i1 = m_ir_builder->CreateICmpEQ(ret_i1, m_ir_builder->getInt1(true)); - auto then_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "then_true"); - auto merge_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "merge_true"); - m_ir_builder->CreateCondBr(cmp_i1, then_bb, merge_bb); - m_ir_builder->SetInsertPoint(then_bb); - m_ir_builder->CreateRet(m_ir_builder->getInt32(0xFFFFFFFF)); - m_ir_builder->SetInsertPoint(merge_bb); + auto ret_i1 = Call("PollStatus", m_poll_status_function, m_state.args[CompileTaskState::Args::State]); + auto cmp_i1 = m_ir_builder->CreateICmpEQ(ret_i1, m_ir_builder->getInt1(true)); + auto then_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "then_true"); + auto merge_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "merge_true"); + m_ir_builder->CreateCondBr(cmp_i1, then_bb, merge_bb); + m_ir_builder->SetInsertPoint(then_bb); + m_ir_builder->CreateRet(m_ir_builder->getInt32(0xFFFFFFFF)); + m_ir_builder->SetInsertPoint(merge_bb); } void Compiler::B(s32 ll, u32 aa, u32 lk) { - auto target_i64 = m_ir_builder->getInt64(branchTarget(aa ? 0 : m_state.current_instruction_address, ll)); - auto target_i32 = m_ir_builder->CreateTrunc(target_i64, m_ir_builder->getInt32Ty()); - CreateBranch(nullptr, target_i32, lk ? true : false); + auto target_i64 = m_ir_builder->getInt64(branchTarget(aa ? 0 : m_state.current_instruction_address, ll)); + auto target_i32 = m_ir_builder->CreateTrunc(target_i64, m_ir_builder->getInt32Ty()); + CreateBranch(nullptr, target_i32, lk ? true : false); } void Compiler::MCRF(u32 crfd, u32 crfs) { - if (crfd != crfs) { - auto cr_i32 = GetCr(); - auto crf_i32 = GetNibble(cr_i32, crfs); - cr_i32 = SetNibble(cr_i32, crfd, crf_i32); - SetCr(cr_i32); - } + if (crfd != crfs) { + auto cr_i32 = GetCr(); + auto crf_i32 = GetNibble(cr_i32, crfs); + cr_i32 = SetNibble(cr_i32, crfd, crf_i32); + SetCr(cr_i32); + } } void Compiler::BCLR(u32 bo, u32 bi, u32 bh, u32 lk) { - auto lr_i64 = GetLr(); - lr_i64 = m_ir_builder->CreateAnd(lr_i64, ~0x3ULL); - auto lr_i32 = m_ir_builder->CreateTrunc(lr_i64, m_ir_builder->getInt32Ty()); - CreateBranch(CheckBranchCondition(bo, bi), lr_i32, lk ? true : false, true); + auto lr_i64 = GetLr(); + lr_i64 = m_ir_builder->CreateAnd(lr_i64, ~0x3ULL); + auto lr_i32 = m_ir_builder->CreateTrunc(lr_i64, m_ir_builder->getInt32Ty()); + CreateBranch(CheckBranchCondition(bo, bi), lr_i32, lk ? true : false, true); } void Compiler::CRNOR(u32 crbd, u32 crba, u32 crbb) { - auto cr_i32 = GetCr(); - auto ba_i32 = GetBit(cr_i32, crba); - auto bb_i32 = GetBit(cr_i32, crbb); - auto res_i32 = m_ir_builder->CreateOr(ba_i32, bb_i32); - res_i32 = m_ir_builder->CreateXor(res_i32, 1); - cr_i32 = SetBit(cr_i32, crbd, res_i32); - SetCr(cr_i32); + auto cr_i32 = GetCr(); + auto ba_i32 = GetBit(cr_i32, crba); + auto bb_i32 = GetBit(cr_i32, crbb); + auto res_i32 = m_ir_builder->CreateOr(ba_i32, bb_i32); + res_i32 = m_ir_builder->CreateXor(res_i32, 1); + cr_i32 = SetBit(cr_i32, crbd, res_i32); + SetCr(cr_i32); } void Compiler::CRANDC(u32 crbd, u32 crba, u32 crbb) { - auto cr_i32 = GetCr(); - auto ba_i32 = GetBit(cr_i32, crba); - auto bb_i32 = GetBit(cr_i32, crbb); - auto res_i32 = m_ir_builder->CreateXor(bb_i32, 1); - res_i32 = m_ir_builder->CreateAnd(ba_i32, res_i32); - cr_i32 = SetBit(cr_i32, crbd, res_i32); - SetCr(cr_i32); + auto cr_i32 = GetCr(); + auto ba_i32 = GetBit(cr_i32, crba); + auto bb_i32 = GetBit(cr_i32, crbb); + auto res_i32 = m_ir_builder->CreateXor(bb_i32, 1); + res_i32 = m_ir_builder->CreateAnd(ba_i32, res_i32); + cr_i32 = SetBit(cr_i32, crbd, res_i32); + SetCr(cr_i32); } void Compiler::ISYNC() { - m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_mfence)); + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_mfence)); } void Compiler::CRXOR(u32 crbd, u32 crba, u32 crbb) { - auto cr_i32 = GetCr(); - auto ba_i32 = GetBit(cr_i32, crba); - auto bb_i32 = GetBit(cr_i32, crbb); - auto res_i32 = m_ir_builder->CreateXor(ba_i32, bb_i32); - cr_i32 = SetBit(cr_i32, crbd, res_i32); - SetCr(cr_i32); + auto cr_i32 = GetCr(); + auto ba_i32 = GetBit(cr_i32, crba); + auto bb_i32 = GetBit(cr_i32, crbb); + auto res_i32 = m_ir_builder->CreateXor(ba_i32, bb_i32); + cr_i32 = SetBit(cr_i32, crbd, res_i32); + SetCr(cr_i32); } void Compiler::DCBI(u32 ra, u32 rb) { - // TODO: See if this can be translated to cache flush - m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); + // TODO: See if this can be translated to cache flush + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); } void Compiler::CRNAND(u32 crbd, u32 crba, u32 crbb) { - auto cr_i32 = GetCr(); - auto ba_i32 = GetBit(cr_i32, crba); - auto bb_i32 = GetBit(cr_i32, crbb); - auto res_i32 = m_ir_builder->CreateAnd(ba_i32, bb_i32); - res_i32 = m_ir_builder->CreateXor(res_i32, 1); - cr_i32 = SetBit(cr_i32, crbd, res_i32); - SetCr(cr_i32); + auto cr_i32 = GetCr(); + auto ba_i32 = GetBit(cr_i32, crba); + auto bb_i32 = GetBit(cr_i32, crbb); + auto res_i32 = m_ir_builder->CreateAnd(ba_i32, bb_i32); + res_i32 = m_ir_builder->CreateXor(res_i32, 1); + cr_i32 = SetBit(cr_i32, crbd, res_i32); + SetCr(cr_i32); } void Compiler::CRAND(u32 crbd, u32 crba, u32 crbb) { - auto cr_i32 = GetCr(); - auto ba_i32 = GetBit(cr_i32, crba); - auto bb_i32 = GetBit(cr_i32, crbb); - auto res_i32 = m_ir_builder->CreateAnd(ba_i32, bb_i32); - cr_i32 = SetBit(cr_i32, crbd, res_i32); - SetCr(cr_i32); + auto cr_i32 = GetCr(); + auto ba_i32 = GetBit(cr_i32, crba); + auto bb_i32 = GetBit(cr_i32, crbb); + auto res_i32 = m_ir_builder->CreateAnd(ba_i32, bb_i32); + cr_i32 = SetBit(cr_i32, crbd, res_i32); + SetCr(cr_i32); } void Compiler::CREQV(u32 crbd, u32 crba, u32 crbb) { - auto cr_i32 = GetCr(); - auto ba_i32 = GetBit(cr_i32, crba); - auto bb_i32 = GetBit(cr_i32, crbb); - auto res_i32 = m_ir_builder->CreateXor(ba_i32, bb_i32); - res_i32 = m_ir_builder->CreateXor(res_i32, 1); - cr_i32 = SetBit(cr_i32, crbd, res_i32); - SetCr(cr_i32); + auto cr_i32 = GetCr(); + auto ba_i32 = GetBit(cr_i32, crba); + auto bb_i32 = GetBit(cr_i32, crbb); + auto res_i32 = m_ir_builder->CreateXor(ba_i32, bb_i32); + res_i32 = m_ir_builder->CreateXor(res_i32, 1); + cr_i32 = SetBit(cr_i32, crbd, res_i32); + SetCr(cr_i32); } void Compiler::CRORC(u32 crbd, u32 crba, u32 crbb) { - auto cr_i32 = GetCr(); - auto ba_i32 = GetBit(cr_i32, crba); - auto bb_i32 = GetBit(cr_i32, crbb); - auto res_i32 = m_ir_builder->CreateXor(bb_i32, 1); - res_i32 = m_ir_builder->CreateOr(ba_i32, res_i32); - cr_i32 = SetBit(cr_i32, crbd, res_i32); - SetCr(cr_i32); + auto cr_i32 = GetCr(); + auto ba_i32 = GetBit(cr_i32, crba); + auto bb_i32 = GetBit(cr_i32, crbb); + auto res_i32 = m_ir_builder->CreateXor(bb_i32, 1); + res_i32 = m_ir_builder->CreateOr(ba_i32, res_i32); + cr_i32 = SetBit(cr_i32, crbd, res_i32); + SetCr(cr_i32); } void Compiler::CROR(u32 crbd, u32 crba, u32 crbb) { - auto cr_i32 = GetCr(); - auto ba_i32 = GetBit(cr_i32, crba); - auto bb_i32 = GetBit(cr_i32, crbb); - auto res_i32 = m_ir_builder->CreateOr(ba_i32, bb_i32); - cr_i32 = SetBit(cr_i32, crbd, res_i32); - SetCr(cr_i32); + auto cr_i32 = GetCr(); + auto ba_i32 = GetBit(cr_i32, crba); + auto bb_i32 = GetBit(cr_i32, crbb); + auto res_i32 = m_ir_builder->CreateOr(ba_i32, bb_i32); + cr_i32 = SetBit(cr_i32, crbd, res_i32); + SetCr(cr_i32); } void Compiler::BCCTR(u32 bo, u32 bi, u32 bh, u32 lk) { - auto ctr_i64 = GetCtr(); - ctr_i64 = m_ir_builder->CreateAnd(ctr_i64, ~0x3ULL); - auto ctr_i32 = m_ir_builder->CreateTrunc(ctr_i64, m_ir_builder->getInt32Ty()); - CreateBranch(CheckBranchCondition(bo, bi), ctr_i32, lk ? true : false); + auto ctr_i64 = GetCtr(); + ctr_i64 = m_ir_builder->CreateAnd(ctr_i64, ~0x3ULL); + auto ctr_i32 = m_ir_builder->CreateTrunc(ctr_i64, m_ir_builder->getInt32Ty()); + CreateBranch(CheckBranchCondition(bo, bi), ctr_i32, lk ? true : false); } void Compiler::RLWIMI(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, u32 rc) { - auto rs_i32 = GetGpr(rs, 32); - auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); - auto rsh_i64 = m_ir_builder->CreateShl(rs_i64, 32); - rs_i64 = m_ir_builder->CreateOr(rs_i64, rsh_i64); - auto ra_i64 = GetGpr(ra); - auto res_i64 = rs_i64; - if (sh) { - auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh); - auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh); - res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); - } + auto rs_i32 = GetGpr(rs, 32); + auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); + auto rsh_i64 = m_ir_builder->CreateShl(rs_i64, 32); + rs_i64 = m_ir_builder->CreateOr(rs_i64, rsh_i64); + auto ra_i64 = GetGpr(ra); + auto res_i64 = rs_i64; + if (sh) { + auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh); + auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh); + res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); + } - u64 mask = s_rotate_mask[32 + mb][32 + me]; - res_i64 = m_ir_builder->CreateAnd(res_i64, mask); - ra_i64 = m_ir_builder->CreateAnd(ra_i64, ~mask); - res_i64 = m_ir_builder->CreateOr(res_i64, ra_i64); - SetGpr(ra, res_i64); + u64 mask = s_rotate_mask[32 + mb][32 + me]; + res_i64 = m_ir_builder->CreateAnd(res_i64, mask); + ra_i64 = m_ir_builder->CreateAnd(ra_i64, ~mask); + res_i64 = m_ir_builder->CreateOr(res_i64, ra_i64); + SetGpr(ra, res_i64); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } } void Compiler::RLWINM(u32 ra, u32 rs, u32 sh, u32 mb, u32 me, u32 rc) { - auto rs_i32 = GetGpr(rs, 32); - auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); - auto rsh_i64 = m_ir_builder->CreateShl(rs_i64, 32); - rs_i64 = m_ir_builder->CreateOr(rs_i64, rsh_i64); - auto res_i64 = rs_i64; - if (sh) { - auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh); - auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh); - res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); - } + auto rs_i32 = GetGpr(rs, 32); + auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); + auto rsh_i64 = m_ir_builder->CreateShl(rs_i64, 32); + rs_i64 = m_ir_builder->CreateOr(rs_i64, rsh_i64); + auto res_i64 = rs_i64; + if (sh) { + auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh); + auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh); + res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); + } - res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[32 + mb][32 + me]); - SetGpr(ra, res_i64); + res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[32 + mb][32 + me]); + SetGpr(ra, res_i64); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } } void Compiler::RLWNM(u32 ra, u32 rs, u32 rb, u32 mb, u32 me, u32 rc) { - auto rs_i32 = GetGpr(rs, 32); - auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); - auto rsh_i64 = m_ir_builder->CreateShl(rs_i64, 32); - rs_i64 = m_ir_builder->CreateOr(rs_i64, rsh_i64); - auto rb_i64 = GetGpr(rb); - auto shl_i64 = m_ir_builder->CreateAnd(rb_i64, 0x1F); - auto shr_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(32), shl_i64); - auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, shr_i64); - auto resh_i64 = m_ir_builder->CreateShl(rs_i64, shl_i64); - auto res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); - res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[32 + mb][32 + me]); - SetGpr(ra, res_i64); + auto rs_i32 = GetGpr(rs, 32); + auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); + auto rsh_i64 = m_ir_builder->CreateShl(rs_i64, 32); + rs_i64 = m_ir_builder->CreateOr(rs_i64, rsh_i64); + auto rb_i64 = GetGpr(rb); + auto shl_i64 = m_ir_builder->CreateAnd(rb_i64, 0x1F); + auto shr_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(32), shl_i64); + auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, shr_i64); + auto resh_i64 = m_ir_builder->CreateShl(rs_i64, shl_i64); + auto res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); + res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[32 + mb][32 + me]); + SetGpr(ra, res_i64); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } } void Compiler::ORI(u32 ra, u32 rs, u32 uimm16) { - auto rs_i64 = GetGpr(rs); - auto res_i64 = m_ir_builder->CreateOr(rs_i64, uimm16); - SetGpr(ra, res_i64); + auto rs_i64 = GetGpr(rs); + auto res_i64 = m_ir_builder->CreateOr(rs_i64, uimm16); + SetGpr(ra, res_i64); } void Compiler::ORIS(u32 ra, u32 rs, u32 uimm16) { - auto rs_i64 = GetGpr(rs); - auto res_i64 = m_ir_builder->CreateOr(rs_i64, (u64)uimm16 << 16); - SetGpr(ra, res_i64); + auto rs_i64 = GetGpr(rs); + auto res_i64 = m_ir_builder->CreateOr(rs_i64, (u64)uimm16 << 16); + SetGpr(ra, res_i64); } void Compiler::XORI(u32 ra, u32 rs, u32 uimm16) { - auto rs_i64 = GetGpr(rs); - auto res_i64 = m_ir_builder->CreateXor(rs_i64, uimm16); - SetGpr(ra, res_i64); + auto rs_i64 = GetGpr(rs); + auto res_i64 = m_ir_builder->CreateXor(rs_i64, uimm16); + SetGpr(ra, res_i64); } void Compiler::XORIS(u32 ra, u32 rs, u32 uimm16) { - auto rs_i64 = GetGpr(rs); - auto res_i64 = m_ir_builder->CreateXor(rs_i64, (u64)uimm16 << 16); - SetGpr(ra, res_i64); + auto rs_i64 = GetGpr(rs); + auto res_i64 = m_ir_builder->CreateXor(rs_i64, (u64)uimm16 << 16); + SetGpr(ra, res_i64); } void Compiler::ANDI_(u32 ra, u32 rs, u32 uimm16) { - auto rs_i64 = GetGpr(rs); - auto res_i64 = m_ir_builder->CreateAnd(rs_i64, uimm16); - SetGpr(ra, res_i64); - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + auto rs_i64 = GetGpr(rs); + auto res_i64 = m_ir_builder->CreateAnd(rs_i64, uimm16); + SetGpr(ra, res_i64); + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); } void Compiler::ANDIS_(u32 ra, u32 rs, u32 uimm16) { - auto rs_i64 = GetGpr(rs); - auto res_i64 = m_ir_builder->CreateAnd(rs_i64, (u64)uimm16 << 16); - SetGpr(ra, res_i64); - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + auto rs_i64 = GetGpr(rs); + auto res_i64 = m_ir_builder->CreateAnd(rs_i64, (u64)uimm16 << 16); + SetGpr(ra, res_i64); + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); } void Compiler::RLDICL(u32 ra, u32 rs, u32 sh, u32 mb, u32 rc) { - auto rs_i64 = GetGpr(rs); - auto res_i64 = rs_i64; - if (sh) { - auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh); - auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh); - res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); - } + auto rs_i64 = GetGpr(rs); + auto res_i64 = rs_i64; + if (sh) { + auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh); + auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh); + res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); + } - res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[mb][63]); - SetGpr(ra, res_i64); + res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[mb][63]); + SetGpr(ra, res_i64); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } } void Compiler::RLDICR(u32 ra, u32 rs, u32 sh, u32 me, u32 rc) { - auto rs_i64 = GetGpr(rs); - auto res_i64 = rs_i64; - if (sh) { - auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh); - auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh); - res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); - } + auto rs_i64 = GetGpr(rs); + auto res_i64 = rs_i64; + if (sh) { + auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh); + auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh); + res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); + } - res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[0][me]); - SetGpr(ra, res_i64); + res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[0][me]); + SetGpr(ra, res_i64); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } } void Compiler::RLDIC(u32 ra, u32 rs, u32 sh, u32 mb, u32 rc) { - auto rs_i64 = GetGpr(rs); - auto res_i64 = rs_i64; - if (sh) { - auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh); - auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh); - res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); - } + auto rs_i64 = GetGpr(rs); + auto res_i64 = rs_i64; + if (sh) { + auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh); + auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh); + res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); + } - res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[mb][63 - sh]); - SetGpr(ra, res_i64); + res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[mb][63 - sh]); + SetGpr(ra, res_i64); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } } void Compiler::RLDIMI(u32 ra, u32 rs, u32 sh, u32 mb, u32 rc) { - auto rs_i64 = GetGpr(rs); - auto ra_i64 = GetGpr(ra); - auto res_i64 = rs_i64; - if (sh) { - auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh); - auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh); - res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); - } + auto rs_i64 = GetGpr(rs); + auto ra_i64 = GetGpr(ra); + auto res_i64 = rs_i64; + if (sh) { + auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, 64 - sh); + auto resh_i64 = m_ir_builder->CreateShl(rs_i64, sh); + res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); + } - u64 mask = s_rotate_mask[mb][63 - sh]; - res_i64 = m_ir_builder->CreateAnd(res_i64, mask); - ra_i64 = m_ir_builder->CreateAnd(ra_i64, ~mask); - res_i64 = m_ir_builder->CreateOr(res_i64, ra_i64); - SetGpr(ra, res_i64); + u64 mask = s_rotate_mask[mb][63 - sh]; + res_i64 = m_ir_builder->CreateAnd(res_i64, mask); + ra_i64 = m_ir_builder->CreateAnd(ra_i64, ~mask); + res_i64 = m_ir_builder->CreateOr(res_i64, ra_i64); + SetGpr(ra, res_i64); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } } void Compiler::RLDC_LR(u32 ra, u32 rs, u32 rb, u32 m_eb, u32 is_r, u32 rc) { - auto rs_i64 = GetGpr(rs); - auto rb_i64 = GetGpr(rb); - auto shl_i64 = m_ir_builder->CreateAnd(rb_i64, 0x3F); - auto shr_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(64), shl_i64); - auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, shr_i64); - auto resh_i64 = m_ir_builder->CreateShl(rs_i64, shl_i64); - auto res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); + auto rs_i64 = GetGpr(rs); + auto rb_i64 = GetGpr(rb); + auto shl_i64 = m_ir_builder->CreateAnd(rb_i64, 0x3F); + auto shr_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(64), shl_i64); + auto resl_i64 = m_ir_builder->CreateLShr(rs_i64, shr_i64); + auto resh_i64 = m_ir_builder->CreateShl(rs_i64, shl_i64); + auto res_i64 = m_ir_builder->CreateOr(resh_i64, resl_i64); - if (is_r) { - res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[0][m_eb]); - } - else { - res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[m_eb][63]); - } + if (is_r) { + res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[0][m_eb]); + } + else { + res_i64 = m_ir_builder->CreateAnd(res_i64, s_rotate_mask[m_eb][63]); + } - SetGpr(ra, res_i64); + SetGpr(ra, res_i64); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } } void Compiler::CMP(u32 crfd, u32 l, u32 ra, u32 rb) { - Value * ra_i64; - Value * rb_i64; - if (l == 0) { - ra_i64 = m_ir_builder->CreateSExt(GetGpr(ra, 32), m_ir_builder->getInt64Ty()); - rb_i64 = m_ir_builder->CreateSExt(GetGpr(rb, 32), m_ir_builder->getInt64Ty()); - } - else { - ra_i64 = GetGpr(ra); - rb_i64 = GetGpr(rb); - } + Value * ra_i64; + Value * rb_i64; + if (l == 0) { + ra_i64 = m_ir_builder->CreateSExt(GetGpr(ra, 32), m_ir_builder->getInt64Ty()); + rb_i64 = m_ir_builder->CreateSExt(GetGpr(rb, 32), m_ir_builder->getInt64Ty()); + } + else { + ra_i64 = GetGpr(ra); + rb_i64 = GetGpr(rb); + } - SetCrFieldSignedCmp(crfd, ra_i64, rb_i64); + SetCrFieldSignedCmp(crfd, ra_i64, rb_i64); } void Compiler::TW(u32 to, u32 ra, u32 rb) { - CompilationError("TW"); + CompilationError("TW"); } void Compiler::LVSL(u32 vd, u32 ra, u32 rb) { - static const u128 s_lvsl_values[] = { - { 0x08090A0B0C0D0E0F, 0x0001020304050607 }, - { 0x090A0B0C0D0E0F10, 0x0102030405060708 }, - { 0x0A0B0C0D0E0F1011, 0x0203040506070809 }, - { 0x0B0C0D0E0F101112, 0x030405060708090A }, - { 0x0C0D0E0F10111213, 0x0405060708090A0B }, - { 0x0D0E0F1011121314, 0x05060708090A0B0C }, - { 0x0E0F101112131415, 0x060708090A0B0C0D }, - { 0x0F10111213141516, 0x0708090A0B0C0D0E }, - { 0x1011121314151617, 0x08090A0B0C0D0E0F }, - { 0x1112131415161718, 0x090A0B0C0D0E0F10 }, - { 0x1213141516171819, 0x0A0B0C0D0E0F1011 }, - { 0x131415161718191A, 0x0B0C0D0E0F101112 }, - { 0x1415161718191A1B, 0x0C0D0E0F10111213 }, - { 0x15161718191A1B1C, 0x0D0E0F1011121314 }, - { 0x161718191A1B1C1D, 0x0E0F101112131415 }, - { 0x1718191A1B1C1D1E, 0x0F10111213141516 }, - }; + static const u128 s_lvsl_values[] = { + { 0x08090A0B0C0D0E0F, 0x0001020304050607 }, + { 0x090A0B0C0D0E0F10, 0x0102030405060708 }, + { 0x0A0B0C0D0E0F1011, 0x0203040506070809 }, + { 0x0B0C0D0E0F101112, 0x030405060708090A }, + { 0x0C0D0E0F10111213, 0x0405060708090A0B }, + { 0x0D0E0F1011121314, 0x05060708090A0B0C }, + { 0x0E0F101112131415, 0x060708090A0B0C0D }, + { 0x0F10111213141516, 0x0708090A0B0C0D0E }, + { 0x1011121314151617, 0x08090A0B0C0D0E0F }, + { 0x1112131415161718, 0x090A0B0C0D0E0F10 }, + { 0x1213141516171819, 0x0A0B0C0D0E0F1011 }, + { 0x131415161718191A, 0x0B0C0D0E0F101112 }, + { 0x1415161718191A1B, 0x0C0D0E0F10111213 }, + { 0x15161718191A1B1C, 0x0D0E0F1011121314 }, + { 0x161718191A1B1C1D, 0x0E0F101112131415 }, + { 0x1718191A1B1C1D1E, 0x0F10111213141516 }, + }; - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xF); - auto lvsl_values_v16i8_ptr = m_ir_builder->CreateIntToPtr(m_ir_builder->getInt64((u64)s_lvsl_values), VectorType::get(m_ir_builder->getInt8Ty(), 16)->getPointerTo()); - lvsl_values_v16i8_ptr = m_ir_builder->CreateGEP(lvsl_values_v16i8_ptr, index_i64); - auto val_v16i8 = m_ir_builder->CreateAlignedLoad(lvsl_values_v16i8_ptr, 16); - SetVr(vd, val_v16i8); + auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xF); + auto lvsl_values_v16i8_ptr = m_ir_builder->CreateIntToPtr(m_ir_builder->getInt64((u64)s_lvsl_values), VectorType::get(m_ir_builder->getInt8Ty(), 16)->getPointerTo()); + lvsl_values_v16i8_ptr = m_ir_builder->CreateGEP(lvsl_values_v16i8_ptr, index_i64); + auto val_v16i8 = m_ir_builder->CreateAlignedLoad(lvsl_values_v16i8_ptr, 16); + SetVr(vd, val_v16i8); } void Compiler::LVEBX(u32 vd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto val_i8 = ReadMemory(addr_i64, 8); - auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); - index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(15), index_i64); - auto vd_v16i8 = GetVrAsIntVec(vd, 8); - vd_v16i8 = m_ir_builder->CreateInsertElement(vd_v16i8, val_i8, index_i64); - SetVr(vd, vd_v16i8); + auto val_i8 = ReadMemory(addr_i64, 8); + auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); + index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(15), index_i64); + auto vd_v16i8 = GetVrAsIntVec(vd, 8); + vd_v16i8 = m_ir_builder->CreateInsertElement(vd_v16i8, val_i8, index_i64); + SetVr(vd, vd_v16i8); } void Compiler::SUBFC(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) { - auto ra_i64 = GetGpr(ra); - ra_i64 = m_ir_builder->CreateNeg(ra_i64); - auto rb_i64 = GetGpr(rb); - auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, rb_i64); - auto diff_i64 = m_ir_builder->CreateExtractValue(res_s, { 0 }); - auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, { 1 }); - SetGpr(rd, diff_i64); - SetXerCa(carry_i1); + auto ra_i64 = GetGpr(ra); + ra_i64 = m_ir_builder->CreateNeg(ra_i64); + auto rb_i64 = GetGpr(rb); + auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, rb_i64); + auto diff_i64 = m_ir_builder->CreateExtractValue(res_s, { 0 }); + auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, { 1 }); + SetGpr(rd, diff_i64); + SetXerCa(carry_i1); - if (rc) { - SetCrFieldSignedCmp(0, diff_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, diff_i64, m_ir_builder->getInt64(0)); + } - if (oe) { - // TODO: Implement this - CompilationError("SUBFCO"); - } + if (oe) { + // TODO: Implement this + CompilationError("SUBFCO"); + } } void Compiler::ADDC(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) { - auto ra_i64 = GetGpr(ra); - auto rb_i64 = GetGpr(rb); - auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, rb_i64); - auto sum_i64 = m_ir_builder->CreateExtractValue(res_s, { 0 }); - auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, { 1 }); - SetGpr(rd, sum_i64); - SetXerCa(carry_i1); + auto ra_i64 = GetGpr(ra); + auto rb_i64 = GetGpr(rb); + auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, rb_i64); + auto sum_i64 = m_ir_builder->CreateExtractValue(res_s, { 0 }); + auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, { 1 }); + SetGpr(rd, sum_i64); + SetXerCa(carry_i1); - if (rc) { - SetCrFieldSignedCmp(0, sum_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, sum_i64, m_ir_builder->getInt64(0)); + } - if (oe) { - // TODO: Implement this - } + if (oe) { + // TODO: Implement this + } } void Compiler::MULHDU(u32 rd, u32 ra, u32 rb, u32 rc) { - auto ra_i64 = GetGpr(ra); - auto rb_i64 = GetGpr(rb); - auto ra_i128 = m_ir_builder->CreateZExt(ra_i64, m_ir_builder->getIntNTy(128)); - auto rb_i128 = m_ir_builder->CreateZExt(rb_i64, m_ir_builder->getIntNTy(128)); - auto prod_i128 = m_ir_builder->CreateMul(ra_i128, rb_i128); - prod_i128 = m_ir_builder->CreateLShr(prod_i128, 64); - auto prod_i64 = m_ir_builder->CreateTrunc(prod_i128, m_ir_builder->getInt64Ty()); - SetGpr(rd, prod_i64); + auto ra_i64 = GetGpr(ra); + auto rb_i64 = GetGpr(rb); + auto ra_i128 = m_ir_builder->CreateZExt(ra_i64, m_ir_builder->getIntNTy(128)); + auto rb_i128 = m_ir_builder->CreateZExt(rb_i64, m_ir_builder->getIntNTy(128)); + auto prod_i128 = m_ir_builder->CreateMul(ra_i128, rb_i128); + prod_i128 = m_ir_builder->CreateLShr(prod_i128, 64); + auto prod_i64 = m_ir_builder->CreateTrunc(prod_i128, m_ir_builder->getInt64Ty()); + SetGpr(rd, prod_i64); - if (rc) { - SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); + } } void Compiler::MULHWU(u32 rd, u32 ra, u32 rb, u32 rc) { - auto ra_i32 = GetGpr(ra, 32); - auto rb_i32 = GetGpr(rb, 32); - auto ra_i64 = m_ir_builder->CreateZExt(ra_i32, m_ir_builder->getInt64Ty()); - auto rb_i64 = m_ir_builder->CreateZExt(rb_i32, m_ir_builder->getInt64Ty()); - auto prod_i64 = m_ir_builder->CreateMul(ra_i64, rb_i64); - prod_i64 = m_ir_builder->CreateLShr(prod_i64, 32); - SetGpr(rd, prod_i64); + auto ra_i32 = GetGpr(ra, 32); + auto rb_i32 = GetGpr(rb, 32); + auto ra_i64 = m_ir_builder->CreateZExt(ra_i32, m_ir_builder->getInt64Ty()); + auto rb_i64 = m_ir_builder->CreateZExt(rb_i32, m_ir_builder->getInt64Ty()); + auto prod_i64 = m_ir_builder->CreateMul(ra_i64, rb_i64); + prod_i64 = m_ir_builder->CreateLShr(prod_i64, 32); + SetGpr(rd, prod_i64); - if (rc) { - SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); + } } void Compiler::MFOCRF(u32 a, u32 rd, u32 crm) { - auto cr_i32 = GetCr(); - auto cr_i64 = m_ir_builder->CreateZExt(cr_i32, m_ir_builder->getInt64Ty()); - SetGpr(rd, cr_i64); + auto cr_i32 = GetCr(); + auto cr_i64 = m_ir_builder->CreateZExt(cr_i32, m_ir_builder->getInt64Ty()); + SetGpr(rd, cr_i64); } void Compiler::LWARX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto addr_i32 = m_ir_builder->CreateTrunc(addr_i64, m_ir_builder->getInt32Ty()); - auto val_i32_ptr = m_ir_builder->CreateAlloca(m_ir_builder->getInt32Ty()); - val_i32_ptr->setAlignment(4); - Call("vm.reservation_acquire", vm::reservation_acquire, m_ir_builder->CreateBitCast(val_i32_ptr, m_ir_builder->getInt8PtrTy()), addr_i32, m_ir_builder->getInt32(4)); - auto val_i32 = (Value *)m_ir_builder->CreateLoad(val_i32_ptr); - val_i32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, m_ir_builder->getInt32Ty()), val_i32); - auto val_i64 = m_ir_builder->CreateZExt(val_i32, m_ir_builder->getInt64Ty()); - SetGpr(rd, val_i64); + auto addr_i32 = m_ir_builder->CreateTrunc(addr_i64, m_ir_builder->getInt32Ty()); + auto val_i32_ptr = m_ir_builder->CreateAlloca(m_ir_builder->getInt32Ty()); + val_i32_ptr->setAlignment(4); + Call("vm.reservation_acquire", vm::reservation_acquire, m_ir_builder->CreateBitCast(val_i32_ptr, m_ir_builder->getInt8PtrTy()), addr_i32, m_ir_builder->getInt32(4)); + auto val_i32 = (Value *)m_ir_builder->CreateLoad(val_i32_ptr); + val_i32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, m_ir_builder->getInt32Ty()), val_i32); + auto val_i64 = m_ir_builder->CreateZExt(val_i32, m_ir_builder->getInt64Ty()); + SetGpr(rd, val_i64); } void Compiler::LDX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto mem_i64 = ReadMemory(addr_i64, 64); - SetGpr(rd, mem_i64); + auto mem_i64 = ReadMemory(addr_i64, 64); + SetGpr(rd, mem_i64); } void Compiler::LWZX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto mem_i32 = ReadMemory(addr_i64, 32); - auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); + auto mem_i32 = ReadMemory(addr_i64, 32); + auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); } void Compiler::SLW(u32 ra, u32 rs, u32 rb, u32 rc) { - auto rs_i32 = GetGpr(rs, 32); - auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); - auto rb_i8 = GetGpr(rb, 8); - rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x3F); - auto rb_i64 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getInt64Ty()); - auto res_i64 = m_ir_builder->CreateShl(rs_i64, rb_i64); - auto res_i32 = m_ir_builder->CreateTrunc(res_i64, m_ir_builder->getInt32Ty()); - res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); - SetGpr(ra, res_i64); + auto rs_i32 = GetGpr(rs, 32); + auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); + auto rb_i8 = GetGpr(rb, 8); + rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x3F); + auto rb_i64 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getInt64Ty()); + auto res_i64 = m_ir_builder->CreateShl(rs_i64, rb_i64); + auto res_i32 = m_ir_builder->CreateTrunc(res_i64, m_ir_builder->getInt32Ty()); + res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); + SetGpr(ra, res_i64); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } } void Compiler::CNTLZW(u32 ra, u32 rs, u32 rc) { - auto rs_i32 = GetGpr(rs, 32); - auto res_i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::ctlz, m_ir_builder->getInt32Ty()), rs_i32, m_ir_builder->getInt1(false)); - auto res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); - SetGpr(ra, res_i64); + auto rs_i32 = GetGpr(rs, 32); + auto res_i32 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::ctlz, m_ir_builder->getInt32Ty()), rs_i32, m_ir_builder->getInt1(false)); + auto res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); + SetGpr(ra, res_i64); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } } void Compiler::SLD(u32 ra, u32 rs, u32 rb, u32 rc) { - auto rs_i64 = GetGpr(rs); - auto rs_i128 = m_ir_builder->CreateZExt(rs_i64, m_ir_builder->getIntNTy(128)); - auto rb_i8 = GetGpr(rb, 8); - rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x7F); - auto rb_i128 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getIntNTy(128)); - auto res_i128 = m_ir_builder->CreateShl(rs_i128, rb_i128); - auto res_i64 = m_ir_builder->CreateTrunc(res_i128, m_ir_builder->getInt64Ty()); - SetGpr(ra, res_i64); + auto rs_i64 = GetGpr(rs); + auto rs_i128 = m_ir_builder->CreateZExt(rs_i64, m_ir_builder->getIntNTy(128)); + auto rb_i8 = GetGpr(rb, 8); + rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x7F); + auto rb_i128 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getIntNTy(128)); + auto res_i128 = m_ir_builder->CreateShl(rs_i128, rb_i128); + auto res_i64 = m_ir_builder->CreateTrunc(res_i128, m_ir_builder->getInt64Ty()); + SetGpr(ra, res_i64); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } } void Compiler::AND(u32 ra, u32 rs, u32 rb, u32 rc) { - auto rs_i64 = GetGpr(rs); - auto rb_i64 = GetGpr(rb); - auto res_i64 = m_ir_builder->CreateAnd(rs_i64, rb_i64); - SetGpr(ra, res_i64); + auto rs_i64 = GetGpr(rs); + auto rb_i64 = GetGpr(rb); + auto res_i64 = m_ir_builder->CreateAnd(rs_i64, rb_i64); + SetGpr(ra, res_i64); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } } void Compiler::CMPL(u32 crfd, u32 l, u32 ra, u32 rb) { - Value * ra_i64; - Value * rb_i64; - if (l == 0) { - ra_i64 = m_ir_builder->CreateZExt(GetGpr(ra, 32), m_ir_builder->getInt64Ty()); - rb_i64 = m_ir_builder->CreateZExt(GetGpr(rb, 32), m_ir_builder->getInt64Ty()); - } - else { - ra_i64 = GetGpr(ra); - rb_i64 = GetGpr(rb); - } + Value * ra_i64; + Value * rb_i64; + if (l == 0) { + ra_i64 = m_ir_builder->CreateZExt(GetGpr(ra, 32), m_ir_builder->getInt64Ty()); + rb_i64 = m_ir_builder->CreateZExt(GetGpr(rb, 32), m_ir_builder->getInt64Ty()); + } + else { + ra_i64 = GetGpr(ra); + rb_i64 = GetGpr(rb); + } - SetCrFieldUnsignedCmp(crfd, ra_i64, rb_i64); + SetCrFieldUnsignedCmp(crfd, ra_i64, rb_i64); } void Compiler::LVSR(u32 vd, u32 ra, u32 rb) { - static const u128 s_lvsr_values[] = { - { 0x18191A1B1C1D1E1F, 0x1011121314151617 }, - { 0x1718191A1B1C1D1E, 0x0F10111213141516 }, - { 0x161718191A1B1C1D, 0x0E0F101112131415 }, - { 0x15161718191A1B1C, 0x0D0E0F1011121314 }, - { 0x1415161718191A1B, 0x0C0D0E0F10111213 }, - { 0x131415161718191A, 0x0B0C0D0E0F101112 }, - { 0x1213141516171819, 0x0A0B0C0D0E0F1011 }, - { 0x1112131415161718, 0x090A0B0C0D0E0F10 }, - { 0x1011121314151617, 0x08090A0B0C0D0E0F }, - { 0x0F10111213141516, 0x0708090A0B0C0D0E }, - { 0x0E0F101112131415, 0x060708090A0B0C0D }, - { 0x0D0E0F1011121314, 0x05060708090A0B0C }, - { 0x0C0D0E0F10111213, 0x0405060708090A0B }, - { 0x0B0C0D0E0F101112, 0x030405060708090A }, - { 0x0A0B0C0D0E0F1011, 0x0203040506070809 }, - { 0x090A0B0C0D0E0F10, 0x0102030405060708 }, - }; + static const u128 s_lvsr_values[] = { + { 0x18191A1B1C1D1E1F, 0x1011121314151617 }, + { 0x1718191A1B1C1D1E, 0x0F10111213141516 }, + { 0x161718191A1B1C1D, 0x0E0F101112131415 }, + { 0x15161718191A1B1C, 0x0D0E0F1011121314 }, + { 0x1415161718191A1B, 0x0C0D0E0F10111213 }, + { 0x131415161718191A, 0x0B0C0D0E0F101112 }, + { 0x1213141516171819, 0x0A0B0C0D0E0F1011 }, + { 0x1112131415161718, 0x090A0B0C0D0E0F10 }, + { 0x1011121314151617, 0x08090A0B0C0D0E0F }, + { 0x0F10111213141516, 0x0708090A0B0C0D0E }, + { 0x0E0F101112131415, 0x060708090A0B0C0D }, + { 0x0D0E0F1011121314, 0x05060708090A0B0C }, + { 0x0C0D0E0F10111213, 0x0405060708090A0B }, + { 0x0B0C0D0E0F101112, 0x030405060708090A }, + { 0x0A0B0C0D0E0F1011, 0x0203040506070809 }, + { 0x090A0B0C0D0E0F10, 0x0102030405060708 }, + }; - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xF); - auto lvsr_values_v16i8_ptr = m_ir_builder->CreateIntToPtr(m_ir_builder->getInt64((u64)s_lvsr_values), VectorType::get(m_ir_builder->getInt8Ty(), 16)->getPointerTo()); - lvsr_values_v16i8_ptr = m_ir_builder->CreateGEP(lvsr_values_v16i8_ptr, index_i64); - auto val_v16i8 = m_ir_builder->CreateAlignedLoad(lvsr_values_v16i8_ptr, 16); - SetVr(vd, val_v16i8); + auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xF); + auto lvsr_values_v16i8_ptr = m_ir_builder->CreateIntToPtr(m_ir_builder->getInt64((u64)s_lvsr_values), VectorType::get(m_ir_builder->getInt8Ty(), 16)->getPointerTo()); + lvsr_values_v16i8_ptr = m_ir_builder->CreateGEP(lvsr_values_v16i8_ptr, index_i64); + auto val_v16i8 = m_ir_builder->CreateAlignedLoad(lvsr_values_v16i8_ptr, 16); + SetVr(vd, val_v16i8); } void Compiler::LVEHX(u32 vd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFFEULL); - auto val_i16 = ReadMemory(addr_i64, 16, 2); - auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); - index_i64 = m_ir_builder->CreateLShr(index_i64, 1); - index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(7), index_i64); - auto vd_v8i16 = GetVrAsIntVec(vd, 16); - vd_v8i16 = m_ir_builder->CreateInsertElement(vd_v8i16, val_i16, index_i64); - SetVr(vd, vd_v8i16); + addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFFEULL); + auto val_i16 = ReadMemory(addr_i64, 16, 2); + auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); + index_i64 = m_ir_builder->CreateLShr(index_i64, 1); + index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(7), index_i64); + auto vd_v8i16 = GetVrAsIntVec(vd, 16); + vd_v8i16 = m_ir_builder->CreateInsertElement(vd_v8i16, val_i16, index_i64); + SetVr(vd, vd_v8i16); } void Compiler::SUBF(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) { - auto ra_i64 = GetGpr(ra); - auto rb_i64 = GetGpr(rb); - auto diff_i64 = m_ir_builder->CreateSub(rb_i64, ra_i64); - SetGpr(rd, diff_i64); + auto ra_i64 = GetGpr(ra); + auto rb_i64 = GetGpr(rb); + auto diff_i64 = m_ir_builder->CreateSub(rb_i64, ra_i64); + SetGpr(rd, diff_i64); - if (rc) { - SetCrFieldSignedCmp(0, diff_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, diff_i64, m_ir_builder->getInt64(0)); + } - if (oe) { - // TODO: Implement this - CompilationError("SUBFO"); - } + if (oe) { + // TODO: Implement this + CompilationError("SUBFO"); + } } void Compiler::LDUX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto addr_i64 = GetGpr(rb); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - auto mem_i64 = ReadMemory(addr_i64, 64); - SetGpr(rd, mem_i64); - SetGpr(ra, addr_i64); + auto mem_i64 = ReadMemory(addr_i64, 64); + SetGpr(rd, mem_i64); + SetGpr(ra, addr_i64); } void Compiler::DCBST(u32 ra, u32 rb) { - // TODO: Implement this - m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); + // TODO: Implement this + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); } void Compiler::LWZUX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto addr_i64 = GetGpr(rb); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - auto mem_i32 = ReadMemory(addr_i64, 32); - auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); - SetGpr(ra, addr_i64); + auto mem_i32 = ReadMemory(addr_i64, 32); + auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); + SetGpr(ra, addr_i64); } void Compiler::CNTLZD(u32 ra, u32 rs, u32 rc) { - auto rs_i64 = GetGpr(rs); - auto res_i64 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::ctlz, m_ir_builder->getInt64Ty()), rs_i64, m_ir_builder->getInt1(false)); - SetGpr(ra, res_i64); + auto rs_i64 = GetGpr(rs); + auto res_i64 = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::ctlz, m_ir_builder->getInt64Ty()), rs_i64, m_ir_builder->getInt1(false)); + SetGpr(ra, res_i64); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } } void Compiler::ANDC(u32 ra, u32 rs, u32 rb, u32 rc) { - auto rs_i64 = GetGpr(rs); - auto rb_i64 = GetGpr(rb); - rb_i64 = m_ir_builder->CreateNot(rb_i64); - auto res_i64 = m_ir_builder->CreateAnd(rs_i64, rb_i64); - SetGpr(ra, res_i64); + auto rs_i64 = GetGpr(rs); + auto rb_i64 = GetGpr(rb); + rb_i64 = m_ir_builder->CreateNot(rb_i64); + auto res_i64 = m_ir_builder->CreateAnd(rs_i64, rb_i64); + SetGpr(ra, res_i64); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } } void Compiler::TD(u32 to, u32 ra, u32 rb) { - CompilationError("TD"); + CompilationError("TD"); } void Compiler::LVEWX(u32 vd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFFCULL); - auto val_i32 = ReadMemory(addr_i64, 32, 4); - auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); - index_i64 = m_ir_builder->CreateLShr(index_i64, 2); - index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(3), index_i64); - auto vd_v4i32 = GetVrAsIntVec(vd, 32); - vd_v4i32 = m_ir_builder->CreateInsertElement(vd_v4i32, val_i32, index_i64); - SetVr(vd, vd_v4i32); + addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFFCULL); + auto val_i32 = ReadMemory(addr_i64, 32, 4); + auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); + index_i64 = m_ir_builder->CreateLShr(index_i64, 2); + index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(3), index_i64); + auto vd_v4i32 = GetVrAsIntVec(vd, 32); + vd_v4i32 = m_ir_builder->CreateInsertElement(vd_v4i32, val_i32, index_i64); + SetVr(vd, vd_v4i32); } void Compiler::MULHD(u32 rd, u32 ra, u32 rb, u32 rc) { - auto ra_i64 = GetGpr(ra); - auto rb_i64 = GetGpr(rb); - auto ra_i128 = m_ir_builder->CreateSExt(ra_i64, m_ir_builder->getIntNTy(128)); - auto rb_i128 = m_ir_builder->CreateSExt(rb_i64, m_ir_builder->getIntNTy(128)); - auto prod_i128 = m_ir_builder->CreateMul(ra_i128, rb_i128); - prod_i128 = m_ir_builder->CreateLShr(prod_i128, 64); - auto prod_i64 = m_ir_builder->CreateTrunc(prod_i128, m_ir_builder->getInt64Ty()); - SetGpr(rd, prod_i64); + auto ra_i64 = GetGpr(ra); + auto rb_i64 = GetGpr(rb); + auto ra_i128 = m_ir_builder->CreateSExt(ra_i64, m_ir_builder->getIntNTy(128)); + auto rb_i128 = m_ir_builder->CreateSExt(rb_i64, m_ir_builder->getIntNTy(128)); + auto prod_i128 = m_ir_builder->CreateMul(ra_i128, rb_i128); + prod_i128 = m_ir_builder->CreateLShr(prod_i128, 64); + auto prod_i64 = m_ir_builder->CreateTrunc(prod_i128, m_ir_builder->getInt64Ty()); + SetGpr(rd, prod_i64); - if (rc) { - SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); + } } void Compiler::MULHW(u32 rd, u32 ra, u32 rb, u32 rc) { - auto ra_i32 = GetGpr(ra, 32); - auto rb_i32 = GetGpr(rb, 32); - auto ra_i64 = m_ir_builder->CreateSExt(ra_i32, m_ir_builder->getInt64Ty()); - auto rb_i64 = m_ir_builder->CreateSExt(rb_i32, m_ir_builder->getInt64Ty()); - auto prod_i64 = m_ir_builder->CreateMul(ra_i64, rb_i64); - prod_i64 = m_ir_builder->CreateAShr(prod_i64, 32); - SetGpr(rd, prod_i64); + auto ra_i32 = GetGpr(ra, 32); + auto rb_i32 = GetGpr(rb, 32); + auto ra_i64 = m_ir_builder->CreateSExt(ra_i32, m_ir_builder->getInt64Ty()); + auto rb_i64 = m_ir_builder->CreateSExt(rb_i32, m_ir_builder->getInt64Ty()); + auto prod_i64 = m_ir_builder->CreateMul(ra_i64, rb_i64); + prod_i64 = m_ir_builder->CreateAShr(prod_i64, 32); + SetGpr(rd, prod_i64); - if (rc) { - SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); + } } void Compiler::LDARX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto addr_i32 = m_ir_builder->CreateTrunc(addr_i64, m_ir_builder->getInt32Ty()); - auto val_i64_ptr = m_ir_builder->CreateAlloca(m_ir_builder->getInt64Ty()); - val_i64_ptr->setAlignment(8); - Call("vm.reservation_acquire", vm::reservation_acquire, m_ir_builder->CreateBitCast(val_i64_ptr, m_ir_builder->getInt8PtrTy()), addr_i32, m_ir_builder->getInt32(8)); - auto val_i64 = (Value *)m_ir_builder->CreateLoad(val_i64_ptr); - val_i64 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, m_ir_builder->getInt64Ty()), val_i64); - SetGpr(rd, val_i64); + auto addr_i32 = m_ir_builder->CreateTrunc(addr_i64, m_ir_builder->getInt32Ty()); + auto val_i64_ptr = m_ir_builder->CreateAlloca(m_ir_builder->getInt64Ty()); + val_i64_ptr->setAlignment(8); + Call("vm.reservation_acquire", vm::reservation_acquire, m_ir_builder->CreateBitCast(val_i64_ptr, m_ir_builder->getInt8PtrTy()), addr_i32, m_ir_builder->getInt32(8)); + auto val_i64 = (Value *)m_ir_builder->CreateLoad(val_i64_ptr); + val_i64 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, m_ir_builder->getInt64Ty()), val_i64); + SetGpr(rd, val_i64); } void Compiler::DCBF(u32 ra, u32 rb) { - // TODO: Implement this - m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); + // TODO: Implement this + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); } void Compiler::LBZX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto mem_i8 = ReadMemory(addr_i64, 8); - auto mem_i64 = m_ir_builder->CreateZExt(mem_i8, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); + auto mem_i8 = ReadMemory(addr_i64, 8); + auto mem_i64 = m_ir_builder->CreateZExt(mem_i8, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); } void Compiler::LVX(u32 vd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFF0ULL); - auto mem_i128 = ReadMemory(addr_i64, 128, 16); - SetVr(vd, mem_i128); + addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFF0ULL); + auto mem_i128 = ReadMemory(addr_i64, 128, 16); + SetVr(vd, mem_i128); } void Compiler::NEG(u32 rd, u32 ra, u32 oe, u32 rc) { - auto ra_i64 = GetGpr(ra); - auto diff_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(0), ra_i64); - SetGpr(rd, diff_i64); + auto ra_i64 = GetGpr(ra); + auto diff_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(0), ra_i64); + SetGpr(rd, diff_i64); - if (rc) { - SetCrFieldSignedCmp(0, diff_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, diff_i64, m_ir_builder->getInt64(0)); + } - if (oe) { - // TODO: Implement this - CompilationError("NEGO"); - } + if (oe) { + // TODO: Implement this + CompilationError("NEGO"); + } } void Compiler::LBZUX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto addr_i64 = GetGpr(rb); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - auto mem_i8 = ReadMemory(addr_i64, 8); - auto mem_i64 = m_ir_builder->CreateZExt(mem_i8, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); - SetGpr(ra, addr_i64); + auto mem_i8 = ReadMemory(addr_i64, 8); + auto mem_i64 = m_ir_builder->CreateZExt(mem_i8, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); + SetGpr(ra, addr_i64); } void Compiler::NOR(u32 ra, u32 rs, u32 rb, u32 rc) { - auto rs_i64 = GetGpr(rs); - auto rb_i64 = GetGpr(rb); - auto res_i64 = m_ir_builder->CreateOr(rs_i64, rb_i64); - res_i64 = m_ir_builder->CreateXor(res_i64, (s64)-1); - SetGpr(ra, res_i64); + auto rs_i64 = GetGpr(rs); + auto rb_i64 = GetGpr(rb); + auto res_i64 = m_ir_builder->CreateOr(rs_i64, rb_i64); + res_i64 = m_ir_builder->CreateXor(res_i64, (s64)-1); + SetGpr(ra, res_i64); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } } void Compiler::STVEBX(u32 vs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); - index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(15), index_i64); - auto vs_v16i8 = GetVrAsIntVec(vs, 8); - auto val_i8 = m_ir_builder->CreateExtractElement(vs_v16i8, index_i64); - WriteMemory(addr_i64, val_i8); + auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); + index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(15), index_i64); + auto vs_v16i8 = GetVrAsIntVec(vs, 8); + auto val_i8 = m_ir_builder->CreateExtractElement(vs_v16i8, index_i64); + WriteMemory(addr_i64, val_i8); } void Compiler::SUBFE(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) { - auto ca_i64 = GetXerCa(); - auto ra_i64 = GetGpr(ra); - auto rb_i64 = GetGpr(rb); - ra_i64 = m_ir_builder->CreateNot(ra_i64); - auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); - auto res_i64 = m_ir_builder->CreateExtractValue(res_s, { 0 }); - auto carry1_i1 = m_ir_builder->CreateExtractValue(res_s, { 1 }); - res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), res_i64, rb_i64); - res_i64 = m_ir_builder->CreateExtractValue(res_s, { 0 }); - auto carry2_i1 = m_ir_builder->CreateExtractValue(res_s, { 1 }); - auto carry_i1 = m_ir_builder->CreateOr(carry1_i1, carry2_i1); - SetGpr(rd, res_i64); - SetXerCa(carry_i1); + auto ca_i64 = GetXerCa(); + auto ra_i64 = GetGpr(ra); + auto rb_i64 = GetGpr(rb); + ra_i64 = m_ir_builder->CreateNot(ra_i64); + auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); + auto res_i64 = m_ir_builder->CreateExtractValue(res_s, { 0 }); + auto carry1_i1 = m_ir_builder->CreateExtractValue(res_s, { 1 }); + res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), res_i64, rb_i64); + res_i64 = m_ir_builder->CreateExtractValue(res_s, { 0 }); + auto carry2_i1 = m_ir_builder->CreateExtractValue(res_s, { 1 }); + auto carry_i1 = m_ir_builder->CreateOr(carry1_i1, carry2_i1); + SetGpr(rd, res_i64); + SetXerCa(carry_i1); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } - if (oe) { - // TODO: Implement this - CompilationError("SUBFEO"); - } + if (oe) { + // TODO: Implement this + CompilationError("SUBFEO"); + } } void Compiler::ADDE(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) { - auto ca_i64 = GetXerCa(); - auto ra_i64 = GetGpr(ra); - auto rb_i64 = GetGpr(rb); - auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); - auto res_i64 = m_ir_builder->CreateExtractValue(res_s, { 0 }); - auto carry1_i1 = m_ir_builder->CreateExtractValue(res_s, { 1 }); - res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), res_i64, rb_i64); - res_i64 = m_ir_builder->CreateExtractValue(res_s, { 0 }); - auto carry2_i1 = m_ir_builder->CreateExtractValue(res_s, { 1 }); - auto carry_i1 = m_ir_builder->CreateOr(carry1_i1, carry2_i1); - SetGpr(rd, res_i64); - SetXerCa(carry_i1); + auto ca_i64 = GetXerCa(); + auto ra_i64 = GetGpr(ra); + auto rb_i64 = GetGpr(rb); + auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); + auto res_i64 = m_ir_builder->CreateExtractValue(res_s, { 0 }); + auto carry1_i1 = m_ir_builder->CreateExtractValue(res_s, { 1 }); + res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), res_i64, rb_i64); + res_i64 = m_ir_builder->CreateExtractValue(res_s, { 0 }); + auto carry2_i1 = m_ir_builder->CreateExtractValue(res_s, { 1 }); + auto carry_i1 = m_ir_builder->CreateOr(carry1_i1, carry2_i1); + SetGpr(rd, res_i64); + SetXerCa(carry_i1); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } - if (oe) { - // TODO: Implement this - CompilationError("ADDEO"); - } + if (oe) { + // TODO: Implement this + CompilationError("ADDEO"); + } } void Compiler::MTOCRF(u32 l, u32 crm, u32 rs) { - auto rs_i32 = GetGpr(rs, 32); - auto cr_i32 = GetCr(); - u32 mask = 0; + auto rs_i32 = GetGpr(rs, 32); + auto cr_i32 = GetCr(); + u32 mask = 0; - for (u32 i = 0; i < 8; i++) { - if (crm & (1 << i)) { - mask |= 0xF << ((7 - i) * 4); - if (l) { - break; - } - } - } + for (u32 i = 0; i < 8; i++) { + if (crm & (1 << i)) { + mask |= 0xF << ((7 - i) * 4); + if (l) { + break; + } + } + } - cr_i32 = m_ir_builder->CreateAnd(cr_i32, ~mask); - rs_i32 = m_ir_builder->CreateAnd(rs_i32, ~mask); - cr_i32 = m_ir_builder->CreateOr(cr_i32, rs_i32); - SetCr(cr_i32); + cr_i32 = m_ir_builder->CreateAnd(cr_i32, ~mask); + rs_i32 = m_ir_builder->CreateAnd(rs_i32, ~mask); + cr_i32 = m_ir_builder->CreateOr(cr_i32, rs_i32); + SetCr(cr_i32); } void Compiler::STDX(u32 rs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - WriteMemory(addr_i64, GetGpr(rs, 64)); + WriteMemory(addr_i64, GetGpr(rs, 64)); } void Compiler::STWCX_(u32 rs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto addr_i32 = m_ir_builder->CreateTrunc(addr_i64, m_ir_builder->getInt32Ty()); - auto rs_i32 = GetGpr(rs, 32); - rs_i32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, m_ir_builder->getInt32Ty()), rs_i32); - auto rs_i32_ptr = m_ir_builder->CreateAlloca(m_ir_builder->getInt32Ty()); - rs_i32_ptr->setAlignment(4); - m_ir_builder->CreateStore(rs_i32, rs_i32_ptr); - auto success_i1 = Call("vm.reservation_update", vm::reservation_update, addr_i32, m_ir_builder->CreateBitCast(rs_i32_ptr, m_ir_builder->getInt8PtrTy()), m_ir_builder->getInt32(4)); + auto addr_i32 = m_ir_builder->CreateTrunc(addr_i64, m_ir_builder->getInt32Ty()); + auto rs_i32 = GetGpr(rs, 32); + rs_i32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, m_ir_builder->getInt32Ty()), rs_i32); + auto rs_i32_ptr = m_ir_builder->CreateAlloca(m_ir_builder->getInt32Ty()); + rs_i32_ptr->setAlignment(4); + m_ir_builder->CreateStore(rs_i32, rs_i32_ptr); + auto success_i1 = Call("vm.reservation_update", vm::reservation_update, addr_i32, m_ir_builder->CreateBitCast(rs_i32_ptr, m_ir_builder->getInt8PtrTy()), m_ir_builder->getInt32(4)); - auto cr_i32 = GetCr(); - cr_i32 = SetBit(cr_i32, 2, success_i1); - SetCr(cr_i32); + auto cr_i32 = GetCr(); + cr_i32 = SetBit(cr_i32, 2, success_i1); + SetCr(cr_i32); } void Compiler::STWX(u32 rs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - WriteMemory(addr_i64, GetGpr(rs, 32)); + WriteMemory(addr_i64, GetGpr(rs, 32)); } void Compiler::STVEHX(u32 vs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFFEULL); - auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); - index_i64 = m_ir_builder->CreateLShr(index_i64, 1); - index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(7), index_i64); - auto vs_v8i16 = GetVrAsIntVec(vs, 16); - auto val_i16 = m_ir_builder->CreateExtractElement(vs_v8i16, index_i64); - WriteMemory(addr_i64, val_i16, 2); + addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFFEULL); + auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); + index_i64 = m_ir_builder->CreateLShr(index_i64, 1); + index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(7), index_i64); + auto vs_v8i16 = GetVrAsIntVec(vs, 16); + auto val_i16 = m_ir_builder->CreateExtractElement(vs_v8i16, index_i64); + WriteMemory(addr_i64, val_i16, 2); } void Compiler::STDUX(u32 rs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto addr_i64 = GetGpr(rb); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - WriteMemory(addr_i64, GetGpr(rs, 64)); - SetGpr(ra, addr_i64); + WriteMemory(addr_i64, GetGpr(rs, 64)); + SetGpr(ra, addr_i64); } void Compiler::STWUX(u32 rs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto addr_i64 = GetGpr(rb); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - WriteMemory(addr_i64, GetGpr(rs, 32)); - SetGpr(ra, addr_i64); + WriteMemory(addr_i64, GetGpr(rs, 32)); + SetGpr(ra, addr_i64); } void Compiler::STVEWX(u32 vs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFFCULL); - auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); - index_i64 = m_ir_builder->CreateLShr(index_i64, 2); - index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(3), index_i64); - auto vs_v4i32 = GetVrAsIntVec(vs, 32); - auto val_i32 = m_ir_builder->CreateExtractElement(vs_v4i32, index_i64); - WriteMemory(addr_i64, val_i32, 4); + addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFFCULL); + auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); + index_i64 = m_ir_builder->CreateLShr(index_i64, 2); + index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(3), index_i64); + auto vs_v4i32 = GetVrAsIntVec(vs, 32); + auto val_i32 = m_ir_builder->CreateExtractElement(vs_v4i32, index_i64); + WriteMemory(addr_i64, val_i32, 4); } void Compiler::ADDZE(u32 rd, u32 ra, u32 oe, u32 rc) { - auto ra_i64 = GetGpr(ra); - auto ca_i64 = GetXerCa(); - auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); - auto sum_i64 = m_ir_builder->CreateExtractValue(res_s, { 0 }); - auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, { 1 }); - SetGpr(rd, sum_i64); - SetXerCa(carry_i1); + auto ra_i64 = GetGpr(ra); + auto ca_i64 = GetXerCa(); + auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); + auto sum_i64 = m_ir_builder->CreateExtractValue(res_s, { 0 }); + auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, { 1 }); + SetGpr(rd, sum_i64); + SetXerCa(carry_i1); - if (rc) { - SetCrFieldSignedCmp(0, sum_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, sum_i64, m_ir_builder->getInt64(0)); + } - if (oe) { - // TODO: Implement this - CompilationError("ADDZEO"); - } + if (oe) { + // TODO: Implement this + CompilationError("ADDZEO"); + } } void Compiler::SUBFZE(u32 rd, u32 ra, u32 oe, u32 rc) { - auto ra_i64 = GetGpr(ra); - ra_i64 = m_ir_builder->CreateNot(ra_i64); - auto ca_i64 = GetXerCa(); - auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); - auto res_i64 = m_ir_builder->CreateExtractValue(res_s, { 0 }); - auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, { 1 }); - SetGpr(rd, res_i64); - SetXerCa(carry_i1); + auto ra_i64 = GetGpr(ra); + ra_i64 = m_ir_builder->CreateNot(ra_i64); + auto ca_i64 = GetXerCa(); + auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); + auto res_i64 = m_ir_builder->CreateExtractValue(res_s, { 0 }); + auto carry_i1 = m_ir_builder->CreateExtractValue(res_s, { 1 }); + SetGpr(rd, res_i64); + SetXerCa(carry_i1); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } - if (oe) { - // TODO: Implement this - CompilationError("SUBFZEO"); - } + if (oe) { + // TODO: Implement this + CompilationError("SUBFZEO"); + } } void Compiler::STDCX_(u32 rs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto addr_i32 = m_ir_builder->CreateTrunc(addr_i64, m_ir_builder->getInt32Ty()); - auto rs_i64 = GetGpr(rs); - rs_i64 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, m_ir_builder->getInt64Ty()), rs_i64); - auto rs_i64_ptr = m_ir_builder->CreateAlloca(m_ir_builder->getInt64Ty()); - rs_i64_ptr->setAlignment(8); - m_ir_builder->CreateStore(rs_i64, rs_i64_ptr); - auto success_i1 = Call("vm.reservation_update", vm::reservation_update, addr_i32, m_ir_builder->CreateBitCast(rs_i64_ptr, m_ir_builder->getInt8PtrTy()), m_ir_builder->getInt32(8)); + auto addr_i32 = m_ir_builder->CreateTrunc(addr_i64, m_ir_builder->getInt32Ty()); + auto rs_i64 = GetGpr(rs); + rs_i64 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, m_ir_builder->getInt64Ty()), rs_i64); + auto rs_i64_ptr = m_ir_builder->CreateAlloca(m_ir_builder->getInt64Ty()); + rs_i64_ptr->setAlignment(8); + m_ir_builder->CreateStore(rs_i64, rs_i64_ptr); + auto success_i1 = Call("vm.reservation_update", vm::reservation_update, addr_i32, m_ir_builder->CreateBitCast(rs_i64_ptr, m_ir_builder->getInt8PtrTy()), m_ir_builder->getInt32(8)); - auto cr_i32 = GetCr(); - cr_i32 = SetBit(cr_i32, 2, success_i1); - SetCr(cr_i32); + auto cr_i32 = GetCr(); + cr_i32 = SetBit(cr_i32, 2, success_i1); + SetCr(cr_i32); } void Compiler::STBX(u32 rs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - WriteMemory(addr_i64, GetGpr(rs, 8)); + WriteMemory(addr_i64, GetGpr(rs, 8)); } void Compiler::STVX(u32 vs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFF0ULL); - WriteMemory(addr_i64, GetVr(vs), 16); + addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFF0ULL); + WriteMemory(addr_i64, GetVr(vs), 16); } void Compiler::SUBFME(u32 rd, u32 ra, u32 oe, u32 rc) { - auto ca_i64 = GetXerCa(); - auto ra_i64 = GetGpr(ra); - ra_i64 = m_ir_builder->CreateNot(ra_i64); - auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); - auto res_i64 = m_ir_builder->CreateExtractValue(res_s, { 0 }); - auto carry1_i1 = m_ir_builder->CreateExtractValue(res_s, { 1 }); - res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), res_i64, m_ir_builder->getInt64((s64)-1)); - res_i64 = m_ir_builder->CreateExtractValue(res_s, { 0 }); - auto carry2_i1 = m_ir_builder->CreateExtractValue(res_s, { 1 }); - auto carry_i1 = m_ir_builder->CreateOr(carry1_i1, carry2_i1); - SetGpr(rd, res_i64); - SetXerCa(carry_i1); + auto ca_i64 = GetXerCa(); + auto ra_i64 = GetGpr(ra); + ra_i64 = m_ir_builder->CreateNot(ra_i64); + auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); + auto res_i64 = m_ir_builder->CreateExtractValue(res_s, { 0 }); + auto carry1_i1 = m_ir_builder->CreateExtractValue(res_s, { 1 }); + res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), res_i64, m_ir_builder->getInt64((s64)-1)); + res_i64 = m_ir_builder->CreateExtractValue(res_s, { 0 }); + auto carry2_i1 = m_ir_builder->CreateExtractValue(res_s, { 1 }); + auto carry_i1 = m_ir_builder->CreateOr(carry1_i1, carry2_i1); + SetGpr(rd, res_i64); + SetXerCa(carry_i1); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } - if (oe) { - // TODO: Implement this - CompilationError("SUBFMEO"); - } + if (oe) { + // TODO: Implement this + CompilationError("SUBFMEO"); + } } void Compiler::MULLD(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) { - auto ra_i64 = GetGpr(ra); - auto rb_i64 = GetGpr(rb); - auto prod_i64 = m_ir_builder->CreateMul(ra_i64, rb_i64); - SetGpr(rd, prod_i64); + auto ra_i64 = GetGpr(ra); + auto rb_i64 = GetGpr(rb); + auto prod_i64 = m_ir_builder->CreateMul(ra_i64, rb_i64); + SetGpr(rd, prod_i64); - if (rc) { - SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); + } - if (oe) { - // TODO implement oe - CompilationError("MULLDO"); - } + if (oe) { + // TODO implement oe + CompilationError("MULLDO"); + } } void Compiler::ADDME(u32 rd, u32 ra, u32 oe, u32 rc) { - auto ca_i64 = GetXerCa(); - auto ra_i64 = GetGpr(ra); - auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); - auto res_i64 = m_ir_builder->CreateExtractValue(res_s, { 0 }); - auto carry1_i1 = m_ir_builder->CreateExtractValue(res_s, { 1 }); - res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), res_i64, m_ir_builder->getInt64((s64)-1)); - res_i64 = m_ir_builder->CreateExtractValue(res_s, { 0 }); - auto carry2_i1 = m_ir_builder->CreateExtractValue(res_s, { 1 }); - auto carry_i1 = m_ir_builder->CreateOr(carry1_i1, carry2_i1); - SetGpr(rd, res_i64); - SetXerCa(carry_i1); + auto ca_i64 = GetXerCa(); + auto ra_i64 = GetGpr(ra); + auto res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), ra_i64, ca_i64); + auto res_i64 = m_ir_builder->CreateExtractValue(res_s, { 0 }); + auto carry1_i1 = m_ir_builder->CreateExtractValue(res_s, { 1 }); + res_s = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::uadd_with_overflow, m_ir_builder->getInt64Ty()), res_i64, m_ir_builder->getInt64((s64)-1)); + res_i64 = m_ir_builder->CreateExtractValue(res_s, { 0 }); + auto carry2_i1 = m_ir_builder->CreateExtractValue(res_s, { 1 }); + auto carry_i1 = m_ir_builder->CreateOr(carry1_i1, carry2_i1); + SetGpr(rd, res_i64); + SetXerCa(carry_i1); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } - if (oe) { - // TODO: Implement this - CompilationError("ADDMEO"); - } + if (oe) { + // TODO: Implement this + CompilationError("ADDMEO"); + } } void Compiler::MULLW(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) { - auto ra_i32 = GetGpr(ra, 32); - auto rb_i32 = GetGpr(rb, 32); - auto ra_i64 = m_ir_builder->CreateSExt(ra_i32, m_ir_builder->getInt64Ty()); - auto rb_i64 = m_ir_builder->CreateSExt(rb_i32, m_ir_builder->getInt64Ty()); - auto prod_i64 = m_ir_builder->CreateMul(ra_i64, rb_i64); - SetGpr(rd, prod_i64); + auto ra_i32 = GetGpr(ra, 32); + auto rb_i32 = GetGpr(rb, 32); + auto ra_i64 = m_ir_builder->CreateSExt(ra_i32, m_ir_builder->getInt64Ty()); + auto rb_i64 = m_ir_builder->CreateSExt(rb_i32, m_ir_builder->getInt64Ty()); + auto prod_i64 = m_ir_builder->CreateMul(ra_i64, rb_i64); + SetGpr(rd, prod_i64); - if (rc) { - SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, prod_i64, m_ir_builder->getInt64(0)); + } - if (oe) { - // TODO implement oe - CompilationError("MULLWO"); - } + if (oe) { + // TODO implement oe + CompilationError("MULLWO"); + } } void Compiler::DCBTST(u32 ra, u32 rb, u32 th) { - // TODO: Implement this - m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); + // TODO: Implement this + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); } void Compiler::STBUX(u32 rs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto addr_i64 = GetGpr(rb); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - WriteMemory(addr_i64, GetGpr(rs, 8)); - SetGpr(ra, addr_i64); + WriteMemory(addr_i64, GetGpr(rs, 8)); + SetGpr(ra, addr_i64); } void Compiler::ADD(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) { - auto ra_i64 = GetGpr(ra); - auto rb_i64 = GetGpr(rb); - auto sum_i64 = m_ir_builder->CreateAdd(ra_i64, rb_i64); - SetGpr(rd, sum_i64); + auto ra_i64 = GetGpr(ra); + auto rb_i64 = GetGpr(rb); + auto sum_i64 = m_ir_builder->CreateAdd(ra_i64, rb_i64); + SetGpr(rd, sum_i64); - if (rc) { - SetCrFieldSignedCmp(0, sum_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, sum_i64, m_ir_builder->getInt64(0)); + } - if (oe) { - // TODO: Implement this - CompilationError("ADDO"); - } + if (oe) { + // TODO: Implement this + CompilationError("ADDO"); + } } void Compiler::DCBT(u32 ra, u32 rb, u32 th) { - // TODO: Implement this using prefetch - m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); + // TODO: Implement this using prefetch + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); } void Compiler::LHZX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto mem_i16 = ReadMemory(addr_i64, 16); - auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); + auto mem_i16 = ReadMemory(addr_i64, 16); + auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); } void Compiler::EQV(u32 ra, u32 rs, u32 rb, u32 rc) { - auto rs_i64 = GetGpr(rs); - auto rb_i64 = GetGpr(rb); - auto res_i64 = m_ir_builder->CreateXor(rs_i64, rb_i64); - res_i64 = m_ir_builder->CreateNot(res_i64); - SetGpr(ra, res_i64); + auto rs_i64 = GetGpr(rs); + auto rb_i64 = GetGpr(rb); + auto res_i64 = m_ir_builder->CreateXor(rs_i64, rb_i64); + res_i64 = m_ir_builder->CreateNot(res_i64); + SetGpr(ra, res_i64); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } } void Compiler::ECIWX(u32 rd, u32 ra, u32 rb) { - CompilationError("ECIWX"); - //auto addr_i64 = GetGpr(rb); - //if (ra) { - // auto ra_i64 = GetGpr(ra); - // addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - //} + CompilationError("ECIWX"); + //auto addr_i64 = GetGpr(rb); + //if (ra) { + // auto ra_i64 = GetGpr(ra); + // addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + //} - //auto mem_i32 = ReadMemory(addr_i64, 32); - //auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); - //SetGpr(rd, mem_i64); + //auto mem_i32 = ReadMemory(addr_i64, 32); + //auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); + //SetGpr(rd, mem_i64); } void Compiler::LHZUX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto addr_i64 = GetGpr(rb); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - auto mem_i16 = ReadMemory(addr_i64, 16); - auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); - SetGpr(ra, addr_i64); + auto mem_i16 = ReadMemory(addr_i64, 16); + auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); + SetGpr(ra, addr_i64); } void Compiler::XOR(u32 ra, u32 rs, u32 rb, u32 rc) { - auto rs_i64 = GetGpr(rs); - auto rb_i64 = GetGpr(rb); - auto res_i64 = m_ir_builder->CreateXor(rs_i64, rb_i64); - SetGpr(ra, res_i64); + auto rs_i64 = GetGpr(rs); + auto rb_i64 = GetGpr(rb); + auto res_i64 = m_ir_builder->CreateXor(rs_i64, rb_i64); + SetGpr(ra, res_i64); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } } void Compiler::MFSPR(u32 rd, u32 spr) { - Value * rd_i64; - auto n = (spr >> 5) | ((spr & 0x1f) << 5); + Value * rd_i64; + auto n = (spr >> 5) | ((spr & 0x1f) << 5); - switch (n) { - case 0x001: - rd_i64 = GetXer(); - break; - case 0x008: - rd_i64 = GetLr(); - break; - case 0x009: - rd_i64 = GetCtr(); - break; - case 0x100: - rd_i64 = GetVrsave(); - break; - case 0x10C: - rd_i64 = Call("get_timebased_time", get_timebased_time); - break; - case 0x10D: - rd_i64 = Call("get_timebased_time", get_timebased_time); - rd_i64 = m_ir_builder->CreateLShr(rd_i64, 32); - break; - default: - assert(0); - break; - } + switch (n) { + case 0x001: + rd_i64 = GetXer(); + break; + case 0x008: + rd_i64 = GetLr(); + break; + case 0x009: + rd_i64 = GetCtr(); + break; + case 0x100: + rd_i64 = GetVrsave(); + break; + case 0x10C: + rd_i64 = Call("get_timebased_time", get_timebased_time); + break; + case 0x10D: + rd_i64 = Call("get_timebased_time", get_timebased_time); + rd_i64 = m_ir_builder->CreateLShr(rd_i64, 32); + break; + default: + assert(0); + break; + } - SetGpr(rd, rd_i64); + SetGpr(rd, rd_i64); } void Compiler::LWAX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto mem_i32 = ReadMemory(addr_i64, 32); - auto mem_i64 = m_ir_builder->CreateSExt(mem_i32, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); + auto mem_i32 = ReadMemory(addr_i64, 32); + auto mem_i64 = m_ir_builder->CreateSExt(mem_i32, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); } void Compiler::DST(u32 ra, u32 rb, u32 strm, u32 t) { - // TODO: Revisit - m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); + // TODO: Revisit + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); } void Compiler::LHAX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto mem_i16 = ReadMemory(addr_i64, 16); - auto mem_i64 = m_ir_builder->CreateSExt(mem_i16, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); + auto mem_i16 = ReadMemory(addr_i64, 16); + auto mem_i64 = m_ir_builder->CreateSExt(mem_i16, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); } void Compiler::LVXL(u32 vd, u32 ra, u32 rb) { - LVX(vd, ra, rb); + LVX(vd, ra, rb); } void Compiler::MFTB(u32 rd, u32 spr) { - auto tb_i64 = Call("get_timebased_time", get_timebased_time); + auto tb_i64 = Call("get_timebased_time", get_timebased_time); - u32 n = (spr >> 5) | ((spr & 0x1f) << 5); - if (n == 0x10D) { - tb_i64 = m_ir_builder->CreateLShr(tb_i64, 32); - } + u32 n = (spr >> 5) | ((spr & 0x1f) << 5); + if (n == 0x10D) { + tb_i64 = m_ir_builder->CreateLShr(tb_i64, 32); + } - SetGpr(rd, tb_i64); + SetGpr(rd, tb_i64); } void Compiler::LWAUX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto addr_i64 = GetGpr(rb); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - auto mem_i32 = ReadMemory(addr_i64, 32); - auto mem_i64 = m_ir_builder->CreateSExt(mem_i32, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); - SetGpr(ra, addr_i64); + auto mem_i32 = ReadMemory(addr_i64, 32); + auto mem_i64 = m_ir_builder->CreateSExt(mem_i32, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); + SetGpr(ra, addr_i64); } void Compiler::DSTST(u32 ra, u32 rb, u32 strm, u32 t) { - // TODO: Revisit - m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); + // TODO: Revisit + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); } void Compiler::LHAUX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto addr_i64 = GetGpr(rb); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - auto mem_i16 = ReadMemory(addr_i64, 16); - auto mem_i64 = m_ir_builder->CreateSExt(mem_i16, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); - SetGpr(ra, addr_i64); + auto mem_i16 = ReadMemory(addr_i64, 16); + auto mem_i64 = m_ir_builder->CreateSExt(mem_i16, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); + SetGpr(ra, addr_i64); } void Compiler::STHX(u32 rs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - WriteMemory(addr_i64, GetGpr(rs, 16)); + WriteMemory(addr_i64, GetGpr(rs, 16)); } void Compiler::ORC(u32 ra, u32 rs, u32 rb, u32 rc) { - auto rs_i64 = GetGpr(rs); - auto rb_i64 = GetGpr(rb); - rb_i64 = m_ir_builder->CreateNot(rb_i64); - auto res_i64 = m_ir_builder->CreateOr(rs_i64, rb_i64); - SetGpr(ra, res_i64); + auto rs_i64 = GetGpr(rs); + auto rb_i64 = GetGpr(rb); + rb_i64 = m_ir_builder->CreateNot(rb_i64); + auto res_i64 = m_ir_builder->CreateOr(rs_i64, rb_i64); + SetGpr(ra, res_i64); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } } void Compiler::ECOWX(u32 rs, u32 ra, u32 rb) { - CompilationError("ECOWX"); - //auto addr_i64 = GetGpr(rb); - //if (ra) { - // auto ra_i64 = GetGpr(ra); - // addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - //} + CompilationError("ECOWX"); + //auto addr_i64 = GetGpr(rb); + //if (ra) { + // auto ra_i64 = GetGpr(ra); + // addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + //} - //WriteMemory(addr_i64, GetGpr(rs, 32)); + //WriteMemory(addr_i64, GetGpr(rs, 32)); } void Compiler::STHUX(u32 rs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto addr_i64 = GetGpr(rb); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - WriteMemory(addr_i64, GetGpr(rs, 16)); - SetGpr(ra, addr_i64); + WriteMemory(addr_i64, GetGpr(rs, 16)); + SetGpr(ra, addr_i64); } void Compiler::OR(u32 ra, u32 rs, u32 rb, u32 rc) { - auto rs_i64 = GetGpr(rs); - auto rb_i64 = GetGpr(rb); - auto res_i64 = m_ir_builder->CreateOr(rs_i64, rb_i64); - SetGpr(ra, res_i64); + auto rs_i64 = GetGpr(rs); + auto rb_i64 = GetGpr(rb); + auto res_i64 = m_ir_builder->CreateOr(rs_i64, rb_i64); + SetGpr(ra, res_i64); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } } void Compiler::DIVDU(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) { - auto ra_i64 = GetGpr(ra); - auto rb_i64 = GetGpr(rb); - auto res_i64 = m_ir_builder->CreateUDiv(ra_i64, rb_i64); - SetGpr(rd, res_i64); + auto ra_i64 = GetGpr(ra); + auto rb_i64 = GetGpr(rb); + auto res_i64 = m_ir_builder->CreateUDiv(ra_i64, rb_i64); + SetGpr(rd, res_i64); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } - if (oe) { - // TODO implement oe - CompilationError("DIVDUO"); - } + if (oe) { + // TODO implement oe + CompilationError("DIVDUO"); + } - // TODO make sure an exception does not occur on divide by 0 and overflow + // TODO make sure an exception does not occur on divide by 0 and overflow } void Compiler::DIVWU(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) { - auto ra_i32 = GetGpr(ra, 32); - auto rb_i32 = GetGpr(rb, 32); - auto res_i32 = m_ir_builder->CreateUDiv(ra_i32, rb_i32); - auto res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); - SetGpr(rd, res_i64); + auto ra_i32 = GetGpr(ra, 32); + auto rb_i32 = GetGpr(rb, 32); + auto res_i32 = m_ir_builder->CreateUDiv(ra_i32, rb_i32); + auto res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); + SetGpr(rd, res_i64); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } - if (oe) { - // TODO implement oe - CompilationError("DIVWUO"); - } + if (oe) { + // TODO implement oe + CompilationError("DIVWUO"); + } - // TODO make sure an exception does not occur on divide by 0 and overflow + // TODO make sure an exception does not occur on divide by 0 and overflow } void Compiler::MTSPR(u32 spr, u32 rs) { - auto rs_i64 = GetGpr(rs); - auto n = (spr >> 5) | ((spr & 0x1f) << 5); - - switch (n) { - case 0x001: - SetXer(rs_i64); - break; - case 0x008: - SetLr(rs_i64); - break; - case 0x009: - SetCtr(rs_i64); - break; - case 0x100: - SetVrsave(rs_i64); - break; - default: - assert(0); - break; - } + auto rs_i64 = GetGpr(rs); + auto n = (spr >> 5) | ((spr & 0x1f) << 5); + switch (n) { + case 0x001: + SetXer(rs_i64); + break; + case 0x008: + SetLr(rs_i64); + break; + case 0x009: + SetCtr(rs_i64); + break; + case 0x100: + SetVrsave(rs_i64); + break; + default: + assert(0); + break; + } } void Compiler::NAND(u32 ra, u32 rs, u32 rb, u32 rc) { - auto rs_i64 = GetGpr(rs); - auto rb_i64 = GetGpr(rb); - auto res_i64 = m_ir_builder->CreateAnd(rs_i64, rb_i64); - res_i64 = m_ir_builder->CreateNot(res_i64); - SetGpr(ra, res_i64); + auto rs_i64 = GetGpr(rs); + auto rb_i64 = GetGpr(rb); + auto res_i64 = m_ir_builder->CreateAnd(rs_i64, rb_i64); + res_i64 = m_ir_builder->CreateNot(res_i64); + SetGpr(ra, res_i64); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } } void Compiler::STVXL(u32 vs, u32 ra, u32 rb) { - STVX(vs, ra, rb); + STVX(vs, ra, rb); } void Compiler::DIVD(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) { - auto ra_i64 = GetGpr(ra); - auto rb_i64 = GetGpr(rb); - auto res_i64 = m_ir_builder->CreateSDiv(ra_i64, rb_i64); - SetGpr(rd, res_i64); + auto ra_i64 = GetGpr(ra); + auto rb_i64 = GetGpr(rb); + auto res_i64 = m_ir_builder->CreateSDiv(ra_i64, rb_i64); + SetGpr(rd, res_i64); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } - if (oe) { - // TODO implement oe - CompilationError("DIVDO"); - } + if (oe) { + // TODO implement oe + CompilationError("DIVDO"); + } - // TODO make sure an exception does not occur on divide by 0 and overflow + // TODO make sure an exception does not occur on divide by 0 and overflow } void Compiler::DIVW(u32 rd, u32 ra, u32 rb, u32 oe, u32 rc) { - auto ra_i32 = GetGpr(ra, 32); - auto rb_i32 = GetGpr(rb, 32); - auto res_i32 = m_ir_builder->CreateSDiv(ra_i32, rb_i32); - auto res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); - SetGpr(rd, res_i64); + auto ra_i32 = GetGpr(ra, 32); + auto rb_i32 = GetGpr(rb, 32); + auto res_i32 = m_ir_builder->CreateSDiv(ra_i32, rb_i32); + auto res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); + SetGpr(rd, res_i64); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } - if (oe) { - // TODO implement oe - CompilationError("DIVWO"); - } + if (oe) { + // TODO implement oe + CompilationError("DIVWO"); + } - // TODO make sure an exception does not occur on divide by 0 and overflow + // TODO make sure an exception does not occur on divide by 0 and overflow } void Compiler::LVLX(u32 vd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto eb_i64 = m_ir_builder->CreateAnd(addr_i64, 0xF); - eb_i64 = m_ir_builder->CreateShl(eb_i64, 3); - auto eb_i128 = m_ir_builder->CreateZExt(eb_i64, m_ir_builder->getIntNTy(128)); - addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFF0ULL); - auto mem_i128 = ReadMemory(addr_i64, 128, 16); - mem_i128 = m_ir_builder->CreateShl(mem_i128, eb_i128); - SetVr(vd, mem_i128); + auto eb_i64 = m_ir_builder->CreateAnd(addr_i64, 0xF); + eb_i64 = m_ir_builder->CreateShl(eb_i64, 3); + auto eb_i128 = m_ir_builder->CreateZExt(eb_i64, m_ir_builder->getIntNTy(128)); + addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFF0ULL); + auto mem_i128 = ReadMemory(addr_i64, 128, 16); + mem_i128 = m_ir_builder->CreateShl(mem_i128, eb_i128); + SetVr(vd, mem_i128); } void Compiler::LDBRX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto mem_i64 = ReadMemory(addr_i64, 64, 0, false); - SetGpr(rd, mem_i64); + auto mem_i64 = ReadMemory(addr_i64, 64, 0, false); + SetGpr(rd, mem_i64); } void Compiler::LSWX(u32 rd, u32 ra, u32 rb) { - CompilationError("LSWX"); + CompilationError("LSWX"); } void Compiler::LWBRX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto mem_i32 = ReadMemory(addr_i64, 32, 0, false); - auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); + auto mem_i32 = ReadMemory(addr_i64, 32, 0, false); + auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); } void Compiler::LFSX(u32 frd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto mem_i32 = ReadMemory(addr_i64, 32); - SetFpr(frd, mem_i32); + auto mem_i32 = ReadMemory(addr_i64, 32); + SetFpr(frd, mem_i32); } void Compiler::SRW(u32 ra, u32 rs, u32 rb, u32 rc) { - auto rs_i32 = GetGpr(rs, 32); - auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); - auto rb_i8 = GetGpr(rb, 8); - rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x3F); - auto rb_i64 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getInt64Ty()); - auto res_i64 = m_ir_builder->CreateLShr(rs_i64, rb_i64); - SetGpr(ra, res_i64); + auto rs_i32 = GetGpr(rs, 32); + auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); + auto rb_i8 = GetGpr(rb, 8); + rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x3F); + auto rb_i64 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getInt64Ty()); + auto res_i64 = m_ir_builder->CreateLShr(rs_i64, rb_i64); + SetGpr(ra, res_i64); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } } void Compiler::SRD(u32 ra, u32 rs, u32 rb, u32 rc) { - auto rs_i64 = GetGpr(rs); - auto rs_i128 = m_ir_builder->CreateZExt(rs_i64, m_ir_builder->getIntNTy(128)); - auto rb_i8 = GetGpr(rb, 8); - rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x7F); - auto rb_i128 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getIntNTy(128)); - auto res_i128 = m_ir_builder->CreateLShr(rs_i128, rb_i128); - auto res_i64 = m_ir_builder->CreateTrunc(res_i128, m_ir_builder->getInt64Ty()); - SetGpr(ra, res_i64); + auto rs_i64 = GetGpr(rs); + auto rs_i128 = m_ir_builder->CreateZExt(rs_i64, m_ir_builder->getIntNTy(128)); + auto rb_i8 = GetGpr(rb, 8); + rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x7F); + auto rb_i128 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getIntNTy(128)); + auto res_i128 = m_ir_builder->CreateLShr(rs_i128, rb_i128); + auto res_i64 = m_ir_builder->CreateTrunc(res_i128, m_ir_builder->getInt64Ty()); + SetGpr(ra, res_i64); - if (rc) { - SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, res_i64, m_ir_builder->getInt64(0)); + } } void Compiler::LVRX(u32 vd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto eb_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(16), addr_i64); - eb_i64 = m_ir_builder->CreateAnd(eb_i64, 0xF); - eb_i64 = m_ir_builder->CreateShl(eb_i64, 3); - auto eb_i128 = m_ir_builder->CreateZExt(eb_i64, m_ir_builder->getIntNTy(128)); - addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFF0ULL); - auto mem_i128 = ReadMemory(addr_i64, 128, 16); - mem_i128 = m_ir_builder->CreateLShr(mem_i128, eb_i128); - auto cmp_i1 = m_ir_builder->CreateICmpNE(eb_i64, m_ir_builder->getInt64(0)); - auto cmp_i128 = m_ir_builder->CreateSExt(cmp_i1, m_ir_builder->getIntNTy(128)); - mem_i128 = m_ir_builder->CreateAnd(mem_i128, cmp_i128); - SetVr(vd, mem_i128); + auto eb_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(16), addr_i64); + eb_i64 = m_ir_builder->CreateAnd(eb_i64, 0xF); + eb_i64 = m_ir_builder->CreateShl(eb_i64, 3); + auto eb_i128 = m_ir_builder->CreateZExt(eb_i64, m_ir_builder->getIntNTy(128)); + addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFFFFFFFFF0ULL); + auto mem_i128 = ReadMemory(addr_i64, 128, 16); + mem_i128 = m_ir_builder->CreateLShr(mem_i128, eb_i128); + auto cmp_i1 = m_ir_builder->CreateICmpNE(eb_i64, m_ir_builder->getInt64(0)); + auto cmp_i128 = m_ir_builder->CreateSExt(cmp_i1, m_ir_builder->getIntNTy(128)); + mem_i128 = m_ir_builder->CreateAnd(mem_i128, cmp_i128); + SetVr(vd, mem_i128); } void Compiler::LSWI(u32 rd, u32 ra, u32 nb) { - auto addr_i64 = ra ? GetGpr(ra) : m_ir_builder->getInt64(0); + auto addr_i64 = ra ? GetGpr(ra) : m_ir_builder->getInt64(0); - nb = nb ? nb : 32; - for (u32 i = 0; i < nb; i += 4) { - auto val_i32 = ReadMemory(addr_i64, 32, 0, true, false); + nb = nb ? nb : 32; + for (u32 i = 0; i < nb; i += 4) { + auto val_i32 = ReadMemory(addr_i64, 32, 0, true, false); - if (i + 4 <= nb) { - addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64(4)); - } - else { - u32 mask = 0xFFFFFFFF << ((4 - (nb - i)) * 8); - val_i32 = m_ir_builder->CreateAnd(val_i32, mask); - } + if (i + 4 <= nb) { + addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64(4)); + } + else { + u32 mask = 0xFFFFFFFF << ((4 - (nb - i)) * 8); + val_i32 = m_ir_builder->CreateAnd(val_i32, mask); + } - auto val_i64 = m_ir_builder->CreateZExt(val_i32, m_ir_builder->getInt64Ty()); - SetGpr(rd, val_i64); - rd = (rd + 1) % 32; - } + auto val_i64 = m_ir_builder->CreateZExt(val_i32, m_ir_builder->getInt64Ty()); + SetGpr(rd, val_i64); + rd = (rd + 1) % 32; + } } void Compiler::LFSUX(u32 frd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - auto mem_i32 = ReadMemory(addr_i64, 32); - SetFpr(frd, mem_i32); - SetGpr(ra, addr_i64); + auto addr_i64 = GetGpr(rb); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto mem_i32 = ReadMemory(addr_i64, 32); + SetFpr(frd, mem_i32); + SetGpr(ra, addr_i64); } void Compiler::SYNC(u32 l) { - m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_mfence)); + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_mfence)); } void Compiler::LFDX(u32 frd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto mem_i64 = ReadMemory(addr_i64, 64); - SetFpr(frd, mem_i64); + auto mem_i64 = ReadMemory(addr_i64, 64); + SetFpr(frd, mem_i64); } void Compiler::LFDUX(u32 frd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - auto mem_i64 = ReadMemory(addr_i64, 64); - SetFpr(frd, mem_i64); - SetGpr(ra, addr_i64); + auto addr_i64 = GetGpr(rb); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto mem_i64 = ReadMemory(addr_i64, 64); + SetFpr(frd, mem_i64); + SetGpr(ra, addr_i64); } void Compiler::STVLX(u32 vs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); - auto size_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(16), index_i64); - addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFF); - addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr(0))); - auto addr_i8_ptr = m_ir_builder->CreateIntToPtr(addr_i64, m_ir_builder->getInt8PtrTy()); + auto index_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); + auto size_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(16), index_i64); + addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFF); + addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr(0))); + auto addr_i8_ptr = m_ir_builder->CreateIntToPtr(addr_i64, m_ir_builder->getInt8PtrTy()); - auto vs_i128 = GetVr(vs); - vs_i128 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, vs_i128->getType()), vs_i128); - auto vs_i128_ptr = m_ir_builder->CreateAlloca(vs_i128->getType()); - vs_i128_ptr->setAlignment(16); - m_ir_builder->CreateAlignedStore(vs_i128, vs_i128_ptr, 16); - auto vs_i8_ptr = m_ir_builder->CreateBitCast(vs_i128_ptr, m_ir_builder->getInt8PtrTy()); + auto vs_i128 = GetVr(vs); + vs_i128 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, vs_i128->getType()), vs_i128); + auto vs_i128_ptr = m_ir_builder->CreateAlloca(vs_i128->getType()); + vs_i128_ptr->setAlignment(16); + m_ir_builder->CreateAlignedStore(vs_i128, vs_i128_ptr, 16); + auto vs_i8_ptr = m_ir_builder->CreateBitCast(vs_i128_ptr, m_ir_builder->getInt8PtrTy()); - Type * types[3] = { m_ir_builder->getInt8PtrTy(), m_ir_builder->getInt8PtrTy(), m_ir_builder->getInt64Ty() }; - m_ir_builder->CreateCall5(Intrinsic::getDeclaration(m_module, Intrinsic::memcpy, types), - addr_i8_ptr, vs_i8_ptr, size_i64, m_ir_builder->getInt32(1), m_ir_builder->getInt1(false)); + Type * types[3] = { m_ir_builder->getInt8PtrTy(), m_ir_builder->getInt8PtrTy(), m_ir_builder->getInt64Ty() }; + m_ir_builder->CreateCall5(Intrinsic::getDeclaration(m_module, Intrinsic::memcpy, types), + addr_i8_ptr, vs_i8_ptr, size_i64, m_ir_builder->getInt32(1), m_ir_builder->getInt1(false)); } void Compiler::STDBRX(u32 rs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - WriteMemory(addr_i64, GetGpr(rs), 0, false); + WriteMemory(addr_i64, GetGpr(rs), 0, false); } void Compiler::STSWX(u32 rs, u32 ra, u32 rb) { - CompilationError("STSWX"); + CompilationError("STSWX"); } void Compiler::STWBRX(u32 rs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - WriteMemory(addr_i64, GetGpr(rs, 32), 0, false); + WriteMemory(addr_i64, GetGpr(rs, 32), 0, false); } void Compiler::STFSX(u32 frs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto frs_i32 = m_ir_builder->CreateBitCast(GetFpr(frs, 32), m_ir_builder->getInt32Ty()); - WriteMemory(addr_i64, frs_i32); + auto frs_i32 = m_ir_builder->CreateBitCast(GetFpr(frs, 32), m_ir_builder->getInt32Ty()); + WriteMemory(addr_i64, frs_i32); } void Compiler::STVRX(u32 vs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto size_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); - auto index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(16), size_i64); - addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFF0); - addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr(0))); - auto addr_i8_ptr = m_ir_builder->CreateIntToPtr(addr_i64, m_ir_builder->getInt8PtrTy()); + auto size_i64 = m_ir_builder->CreateAnd(addr_i64, 0xf); + auto index_i64 = m_ir_builder->CreateSub(m_ir_builder->getInt64(16), size_i64); + addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFF0); + addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr(0))); + auto addr_i8_ptr = m_ir_builder->CreateIntToPtr(addr_i64, m_ir_builder->getInt8PtrTy()); - auto vs_i128 = GetVr(vs); - vs_i128 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, vs_i128->getType()), vs_i128); - auto vs_i128_ptr = m_ir_builder->CreateAlloca(vs_i128->getType()); - vs_i128_ptr->setAlignment(16); - m_ir_builder->CreateAlignedStore(vs_i128, vs_i128_ptr, 16); - auto vs_i8_ptr = m_ir_builder->CreateBitCast(vs_i128_ptr, m_ir_builder->getInt8PtrTy()); - vs_i8_ptr = m_ir_builder->CreateGEP(vs_i8_ptr, index_i64); + auto vs_i128 = GetVr(vs); + vs_i128 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, vs_i128->getType()), vs_i128); + auto vs_i128_ptr = m_ir_builder->CreateAlloca(vs_i128->getType()); + vs_i128_ptr->setAlignment(16); + m_ir_builder->CreateAlignedStore(vs_i128, vs_i128_ptr, 16); + auto vs_i8_ptr = m_ir_builder->CreateBitCast(vs_i128_ptr, m_ir_builder->getInt8PtrTy()); + vs_i8_ptr = m_ir_builder->CreateGEP(vs_i8_ptr, index_i64); - Type * types[3] = { m_ir_builder->getInt8PtrTy(), m_ir_builder->getInt8PtrTy(), m_ir_builder->getInt64Ty() }; - m_ir_builder->CreateCall5(Intrinsic::getDeclaration(m_module, Intrinsic::memcpy, types), - addr_i8_ptr, vs_i8_ptr, size_i64, m_ir_builder->getInt32(1), m_ir_builder->getInt1(false)); + Type * types[3] = { m_ir_builder->getInt8PtrTy(), m_ir_builder->getInt8PtrTy(), m_ir_builder->getInt64Ty() }; + m_ir_builder->CreateCall5(Intrinsic::getDeclaration(m_module, Intrinsic::memcpy, types), + addr_i8_ptr, vs_i8_ptr, size_i64, m_ir_builder->getInt32(1), m_ir_builder->getInt1(false)); } void Compiler::STFSUX(u32 frs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto addr_i64 = GetGpr(rb); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - auto frs_i32 = m_ir_builder->CreateBitCast(GetFpr(frs, 32), m_ir_builder->getInt32Ty()); - WriteMemory(addr_i64, frs_i32); - SetGpr(ra, addr_i64); + auto frs_i32 = m_ir_builder->CreateBitCast(GetFpr(frs, 32), m_ir_builder->getInt32Ty()); + WriteMemory(addr_i64, frs_i32); + SetGpr(ra, addr_i64); } void Compiler::STSWI(u32 rd, u32 ra, u32 nb) { - auto addr_i64 = ra ? GetGpr(ra) : m_ir_builder->getInt64(0); + auto addr_i64 = ra ? GetGpr(ra) : m_ir_builder->getInt64(0); - nb = nb ? nb : 32; - for (u32 i = 0; i < nb; i += 4) { - auto val_i32 = GetGpr(rd, 32); + nb = nb ? nb : 32; + for (u32 i = 0; i < nb; i += 4) { + auto val_i32 = GetGpr(rd, 32); - if (i + 4 <= nb) { - WriteMemory(addr_i64, val_i32, 0, true, false); - addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64(4)); - rd = (rd + 1) % 32; - } - else { - u32 n = nb - i; - if (n >= 2) { - auto val_i16 = m_ir_builder->CreateLShr(val_i32, 16); - val_i16 = m_ir_builder->CreateTrunc(val_i16, m_ir_builder->getInt16Ty()); - WriteMemory(addr_i64, val_i16); + if (i + 4 <= nb) { + WriteMemory(addr_i64, val_i32, 0, true, false); + addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64(4)); + rd = (rd + 1) % 32; + } + else { + u32 n = nb - i; + if (n >= 2) { + auto val_i16 = m_ir_builder->CreateLShr(val_i32, 16); + val_i16 = m_ir_builder->CreateTrunc(val_i16, m_ir_builder->getInt16Ty()); + WriteMemory(addr_i64, val_i16); - if (n == 3) { - auto val_i8 = m_ir_builder->CreateLShr(val_i32, 8); - val_i8 = m_ir_builder->CreateTrunc(val_i8, m_ir_builder->getInt8Ty()); - addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64(2)); - WriteMemory(addr_i64, val_i8); - } - } - else { - auto val_i8 = m_ir_builder->CreateLShr(val_i32, 24); - val_i8 = m_ir_builder->CreateTrunc(val_i8, m_ir_builder->getInt8Ty()); - WriteMemory(addr_i64, val_i8); - } - } - } + if (n == 3) { + auto val_i8 = m_ir_builder->CreateLShr(val_i32, 8); + val_i8 = m_ir_builder->CreateTrunc(val_i8, m_ir_builder->getInt8Ty()); + addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64(2)); + WriteMemory(addr_i64, val_i8); + } + } + else { + auto val_i8 = m_ir_builder->CreateLShr(val_i32, 24); + val_i8 = m_ir_builder->CreateTrunc(val_i8, m_ir_builder->getInt8Ty()); + WriteMemory(addr_i64, val_i8); + } + } + } } void Compiler::STFDX(u32 frs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty()); - WriteMemory(addr_i64, frs_i64); + auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty()); + WriteMemory(addr_i64, frs_i64); } void Compiler::STFDUX(u32 frs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto addr_i64 = GetGpr(rb); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty()); - WriteMemory(addr_i64, frs_i64); - SetGpr(ra, addr_i64); + auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty()); + WriteMemory(addr_i64, frs_i64); + SetGpr(ra, addr_i64); } void Compiler::LVLXL(u32 vd, u32 ra, u32 rb) { - LVLX(vd, ra, rb); + LVLX(vd, ra, rb); } void Compiler::LHBRX(u32 rd, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto mem_i16 = ReadMemory(addr_i64, 16, 0, false); - auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); + auto mem_i16 = ReadMemory(addr_i64, 16, 0, false); + auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); } void Compiler::SRAW(u32 ra, u32 rs, u32 rb, u32 rc) { - auto rs_i32 = GetGpr(rs, 32); - auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); - rs_i64 = m_ir_builder->CreateShl(rs_i64, 32); - auto rb_i8 = GetGpr(rb, 8); - rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x3F); - auto rb_i64 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getInt64Ty()); - auto res_i64 = m_ir_builder->CreateAShr(rs_i64, rb_i64); - auto ra_i64 = m_ir_builder->CreateAShr(res_i64, 32); - SetGpr(ra, ra_i64); + auto rs_i32 = GetGpr(rs, 32); + auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); + rs_i64 = m_ir_builder->CreateShl(rs_i64, 32); + auto rb_i8 = GetGpr(rb, 8); + rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x3F); + auto rb_i64 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getInt64Ty()); + auto res_i64 = m_ir_builder->CreateAShr(rs_i64, rb_i64); + auto ra_i64 = m_ir_builder->CreateAShr(res_i64, 32); + SetGpr(ra, ra_i64); - auto res_i32 = m_ir_builder->CreateTrunc(res_i64, m_ir_builder->getInt32Ty()); - auto ca1_i1 = m_ir_builder->CreateICmpSLT(ra_i64, m_ir_builder->getInt64(0)); - auto ca2_i1 = m_ir_builder->CreateICmpNE(res_i32, m_ir_builder->getInt32(0)); - auto ca_i1 = m_ir_builder->CreateAnd(ca1_i1, ca2_i1); - SetXerCa(ca_i1); + auto res_i32 = m_ir_builder->CreateTrunc(res_i64, m_ir_builder->getInt32Ty()); + auto ca1_i1 = m_ir_builder->CreateICmpSLT(ra_i64, m_ir_builder->getInt64(0)); + auto ca2_i1 = m_ir_builder->CreateICmpNE(res_i32, m_ir_builder->getInt32(0)); + auto ca_i1 = m_ir_builder->CreateAnd(ca1_i1, ca2_i1); + SetXerCa(ca_i1); - if (rc) { - SetCrFieldSignedCmp(0, ra_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, ra_i64, m_ir_builder->getInt64(0)); + } } void Compiler::SRAD(u32 ra, u32 rs, u32 rb, u32 rc) { - auto rs_i64 = GetGpr(rs); - auto rs_i128 = m_ir_builder->CreateZExt(rs_i64, m_ir_builder->getIntNTy(128)); - rs_i128 = m_ir_builder->CreateShl(rs_i128, 64); - auto rb_i8 = GetGpr(rb, 8); - rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x7F); - auto rb_i128 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getIntNTy(128)); - auto res_i128 = m_ir_builder->CreateAShr(rs_i128, rb_i128); - auto ra_i128 = m_ir_builder->CreateAShr(res_i128, 64); - auto ra_i64 = m_ir_builder->CreateTrunc(ra_i128, m_ir_builder->getInt64Ty()); - SetGpr(ra, ra_i64); + auto rs_i64 = GetGpr(rs); + auto rs_i128 = m_ir_builder->CreateZExt(rs_i64, m_ir_builder->getIntNTy(128)); + rs_i128 = m_ir_builder->CreateShl(rs_i128, 64); + auto rb_i8 = GetGpr(rb, 8); + rb_i8 = m_ir_builder->CreateAnd(rb_i8, 0x7F); + auto rb_i128 = m_ir_builder->CreateZExt(rb_i8, m_ir_builder->getIntNTy(128)); + auto res_i128 = m_ir_builder->CreateAShr(rs_i128, rb_i128); + auto ra_i128 = m_ir_builder->CreateAShr(res_i128, 64); + auto ra_i64 = m_ir_builder->CreateTrunc(ra_i128, m_ir_builder->getInt64Ty()); + SetGpr(ra, ra_i64); - auto res_i64 = m_ir_builder->CreateTrunc(res_i128, m_ir_builder->getInt64Ty()); - auto ca1_i1 = m_ir_builder->CreateICmpSLT(ra_i64, m_ir_builder->getInt64(0)); - auto ca2_i1 = m_ir_builder->CreateICmpNE(res_i64, m_ir_builder->getInt64(0)); - auto ca_i1 = m_ir_builder->CreateAnd(ca1_i1, ca2_i1); - SetXerCa(ca_i1); + auto res_i64 = m_ir_builder->CreateTrunc(res_i128, m_ir_builder->getInt64Ty()); + auto ca1_i1 = m_ir_builder->CreateICmpSLT(ra_i64, m_ir_builder->getInt64(0)); + auto ca2_i1 = m_ir_builder->CreateICmpNE(res_i64, m_ir_builder->getInt64(0)); + auto ca_i1 = m_ir_builder->CreateAnd(ca1_i1, ca2_i1); + SetXerCa(ca_i1); - if (rc) { - SetCrFieldSignedCmp(0, ra_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, ra_i64, m_ir_builder->getInt64(0)); + } } void Compiler::LVRXL(u32 vd, u32 ra, u32 rb) { - LVRX(vd, ra, rb); + LVRX(vd, ra, rb); } void Compiler::DSS(u32 strm, u32 a) { - // TODO: Revisit - m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); + // TODO: Revisit + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); } void Compiler::SRAWI(u32 ra, u32 rs, u32 sh, u32 rc) { - auto rs_i32 = GetGpr(rs, 32); - auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); - rs_i64 = m_ir_builder->CreateShl(rs_i64, 32); - auto res_i64 = m_ir_builder->CreateAShr(rs_i64, sh); - auto ra_i64 = m_ir_builder->CreateAShr(res_i64, 32); - SetGpr(ra, ra_i64); + auto rs_i32 = GetGpr(rs, 32); + auto rs_i64 = m_ir_builder->CreateZExt(rs_i32, m_ir_builder->getInt64Ty()); + rs_i64 = m_ir_builder->CreateShl(rs_i64, 32); + auto res_i64 = m_ir_builder->CreateAShr(rs_i64, sh); + auto ra_i64 = m_ir_builder->CreateAShr(res_i64, 32); + SetGpr(ra, ra_i64); - auto res_i32 = m_ir_builder->CreateTrunc(res_i64, m_ir_builder->getInt32Ty()); - auto ca1_i1 = m_ir_builder->CreateICmpSLT(ra_i64, m_ir_builder->getInt64(0)); - auto ca2_i1 = m_ir_builder->CreateICmpNE(res_i32, m_ir_builder->getInt32(0)); - auto ca_i1 = m_ir_builder->CreateAnd(ca1_i1, ca2_i1); - SetXerCa(ca_i1); + auto res_i32 = m_ir_builder->CreateTrunc(res_i64, m_ir_builder->getInt32Ty()); + auto ca1_i1 = m_ir_builder->CreateICmpSLT(ra_i64, m_ir_builder->getInt64(0)); + auto ca2_i1 = m_ir_builder->CreateICmpNE(res_i32, m_ir_builder->getInt32(0)); + auto ca_i1 = m_ir_builder->CreateAnd(ca1_i1, ca2_i1); + SetXerCa(ca_i1); - if (rc) { - SetCrFieldSignedCmp(0, ra_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, ra_i64, m_ir_builder->getInt64(0)); + } } void Compiler::SRADI1(u32 ra, u32 rs, u32 sh, u32 rc) { - auto rs_i64 = GetGpr(rs); - auto rs_i128 = m_ir_builder->CreateZExt(rs_i64, m_ir_builder->getIntNTy(128)); - rs_i128 = m_ir_builder->CreateShl(rs_i128, 64); - auto res_i128 = m_ir_builder->CreateAShr(rs_i128, sh); - auto ra_i128 = m_ir_builder->CreateAShr(res_i128, 64); - auto ra_i64 = m_ir_builder->CreateTrunc(ra_i128, m_ir_builder->getInt64Ty()); - SetGpr(ra, ra_i64); + auto rs_i64 = GetGpr(rs); + auto rs_i128 = m_ir_builder->CreateZExt(rs_i64, m_ir_builder->getIntNTy(128)); + rs_i128 = m_ir_builder->CreateShl(rs_i128, 64); + auto res_i128 = m_ir_builder->CreateAShr(rs_i128, sh); + auto ra_i128 = m_ir_builder->CreateAShr(res_i128, 64); + auto ra_i64 = m_ir_builder->CreateTrunc(ra_i128, m_ir_builder->getInt64Ty()); + SetGpr(ra, ra_i64); - auto res_i64 = m_ir_builder->CreateTrunc(res_i128, m_ir_builder->getInt64Ty()); - auto ca1_i1 = m_ir_builder->CreateICmpSLT(ra_i64, m_ir_builder->getInt64(0)); - auto ca2_i1 = m_ir_builder->CreateICmpNE(res_i64, m_ir_builder->getInt64(0)); - auto ca_i1 = m_ir_builder->CreateAnd(ca1_i1, ca2_i1); - SetXerCa(ca_i1); + auto res_i64 = m_ir_builder->CreateTrunc(res_i128, m_ir_builder->getInt64Ty()); + auto ca1_i1 = m_ir_builder->CreateICmpSLT(ra_i64, m_ir_builder->getInt64(0)); + auto ca2_i1 = m_ir_builder->CreateICmpNE(res_i64, m_ir_builder->getInt64(0)); + auto ca_i1 = m_ir_builder->CreateAnd(ca1_i1, ca2_i1); + SetXerCa(ca_i1); - if (rc) { - SetCrFieldSignedCmp(0, ra_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, ra_i64, m_ir_builder->getInt64(0)); + } } void Compiler::SRADI2(u32 ra, u32 rs, u32 sh, u32 rc) { - SRADI1(ra, rs, sh, rc); + SRADI1(ra, rs, sh, rc); } void Compiler::EIEIO() { - m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_mfence)); + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_mfence)); } void Compiler::STVLXL(u32 vs, u32 ra, u32 rb) { - STVLX(vs, ra, rb); + STVLX(vs, ra, rb); } void Compiler::STHBRX(u32 rs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - WriteMemory(addr_i64, GetGpr(rs, 16), 0, false); + WriteMemory(addr_i64, GetGpr(rs, 16), 0, false); } void Compiler::EXTSH(u32 ra, u32 rs, u32 rc) { - auto rs_i16 = GetGpr(rs, 16); - auto rs_i64 = m_ir_builder->CreateSExt(rs_i16, m_ir_builder->getInt64Ty()); - SetGpr(ra, rs_i64); + auto rs_i16 = GetGpr(rs, 16); + auto rs_i64 = m_ir_builder->CreateSExt(rs_i16, m_ir_builder->getInt64Ty()); + SetGpr(ra, rs_i64); - if (rc) { - SetCrFieldSignedCmp(0, rs_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, rs_i64, m_ir_builder->getInt64(0)); + } } void Compiler::STVRXL(u32 vs, u32 ra, u32 rb) { - STVRX(vs, ra, rb); + STVRX(vs, ra, rb); } void Compiler::EXTSB(u32 ra, u32 rs, u32 rc) { - auto rs_i8 = GetGpr(rs, 8); - auto rs_i64 = m_ir_builder->CreateSExt(rs_i8, m_ir_builder->getInt64Ty()); - SetGpr(ra, rs_i64); + auto rs_i8 = GetGpr(rs, 8); + auto rs_i64 = m_ir_builder->CreateSExt(rs_i8, m_ir_builder->getInt64Ty()); + SetGpr(ra, rs_i64); - if (rc) { - SetCrFieldSignedCmp(0, rs_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, rs_i64, m_ir_builder->getInt64(0)); + } } void Compiler::STFIWX(u32 frs, u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty()); - auto frs_i32 = m_ir_builder->CreateTrunc(frs_i64, m_ir_builder->getInt32Ty()); - WriteMemory(addr_i64, frs_i32); + auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty()); + auto frs_i32 = m_ir_builder->CreateTrunc(frs_i64, m_ir_builder->getInt32Ty()); + WriteMemory(addr_i64, frs_i32); } void Compiler::EXTSW(u32 ra, u32 rs, u32 rc) { - auto rs_i32 = GetGpr(rs, 32); - auto rs_i64 = m_ir_builder->CreateSExt(rs_i32, m_ir_builder->getInt64Ty()); - SetGpr(ra, rs_i64); + auto rs_i32 = GetGpr(rs, 32); + auto rs_i64 = m_ir_builder->CreateSExt(rs_i32, m_ir_builder->getInt64Ty()); + SetGpr(ra, rs_i64); - if (rc) { - SetCrFieldSignedCmp(0, rs_i64, m_ir_builder->getInt64(0)); - } + if (rc) { + SetCrFieldSignedCmp(0, rs_i64, m_ir_builder->getInt64(0)); + } } void Compiler::ICBI(u32 ra, u32 rs) { - // TODO: Revisit - m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); + // TODO: Revisit + m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::donothing)); } void Compiler::DCBZ(u32 ra, u32 rb) { - auto addr_i64 = GetGpr(rb); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = GetGpr(rb); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - addr_i64 = m_ir_builder->CreateAnd(addr_i64, ~(127ULL)); - addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr(0))); - auto addr_i8_ptr = m_ir_builder->CreateIntToPtr(addr_i64, m_ir_builder->getInt8PtrTy()); + addr_i64 = m_ir_builder->CreateAnd(addr_i64, ~(127ULL)); + addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr(0))); + auto addr_i8_ptr = m_ir_builder->CreateIntToPtr(addr_i64, m_ir_builder->getInt8PtrTy()); - std::vector types = { (Type *)m_ir_builder->getInt8PtrTy(), (Type *)m_ir_builder->getInt32Ty() }; - m_ir_builder->CreateCall5(Intrinsic::getDeclaration(m_module, Intrinsic::memset, types), - addr_i8_ptr, m_ir_builder->getInt8(0), m_ir_builder->getInt32(128), m_ir_builder->getInt32(128), m_ir_builder->getInt1(true)); + std::vector types = { (Type *)m_ir_builder->getInt8PtrTy(), (Type *)m_ir_builder->getInt32Ty() }; + m_ir_builder->CreateCall5(Intrinsic::getDeclaration(m_module, Intrinsic::memset, types), + addr_i8_ptr, m_ir_builder->getInt8(0), m_ir_builder->getInt32(128), m_ir_builder->getInt32(128), m_ir_builder->getInt1(true)); } void Compiler::LWZ(u32 rd, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto mem_i32 = ReadMemory(addr_i64, 32); - auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); + auto mem_i32 = ReadMemory(addr_i64, 32); + auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); } void Compiler::LWZU(u32 rd, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - auto mem_i32 = ReadMemory(addr_i64, 32); - auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); - SetGpr(ra, addr_i64); + auto mem_i32 = ReadMemory(addr_i64, 32); + auto mem_i64 = m_ir_builder->CreateZExt(mem_i32, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); + SetGpr(ra, addr_i64); } void Compiler::LBZ(u32 rd, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto mem_i8 = ReadMemory(addr_i64, 8); - auto mem_i64 = m_ir_builder->CreateZExt(mem_i8, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); + auto mem_i8 = ReadMemory(addr_i64, 8); + auto mem_i64 = m_ir_builder->CreateZExt(mem_i8, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); } void Compiler::LBZU(u32 rd, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - auto mem_i8 = ReadMemory(addr_i64, 8); - auto mem_i64 = m_ir_builder->CreateZExt(mem_i8, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); - SetGpr(ra, addr_i64); + auto mem_i8 = ReadMemory(addr_i64, 8); + auto mem_i64 = m_ir_builder->CreateZExt(mem_i8, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); + SetGpr(ra, addr_i64); } void Compiler::STW(u32 rs, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - WriteMemory(addr_i64, GetGpr(rs, 32)); + WriteMemory(addr_i64, GetGpr(rs, 32)); } void Compiler::STWU(u32 rs, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - WriteMemory(addr_i64, GetGpr(rs, 32)); - SetGpr(ra, addr_i64); + WriteMemory(addr_i64, GetGpr(rs, 32)); + SetGpr(ra, addr_i64); } void Compiler::STB(u32 rs, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - WriteMemory(addr_i64, GetGpr(rs, 8)); + WriteMemory(addr_i64, GetGpr(rs, 8)); } void Compiler::STBU(u32 rs, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - WriteMemory(addr_i64, GetGpr(rs, 8)); - SetGpr(ra, addr_i64); + WriteMemory(addr_i64, GetGpr(rs, 8)); + SetGpr(ra, addr_i64); } void Compiler::LHZ(u32 rd, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto mem_i16 = ReadMemory(addr_i64, 16); - auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); + auto mem_i16 = ReadMemory(addr_i64, 16); + auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); } void Compiler::LHZU(u32 rd, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - auto mem_i16 = ReadMemory(addr_i64, 16); - auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); - SetGpr(ra, addr_i64); + auto mem_i16 = ReadMemory(addr_i64, 16); + auto mem_i64 = m_ir_builder->CreateZExt(mem_i16, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); + SetGpr(ra, addr_i64); } void Compiler::LHA(u32 rd, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto mem_i16 = ReadMemory(addr_i64, 16); - auto mem_i64 = m_ir_builder->CreateSExt(mem_i16, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); + auto mem_i16 = ReadMemory(addr_i64, 16); + auto mem_i64 = m_ir_builder->CreateSExt(mem_i16, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); } void Compiler::LHAU(u32 rd, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - auto mem_i16 = ReadMemory(addr_i64, 16); - auto mem_i64 = m_ir_builder->CreateSExt(mem_i16, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); - SetGpr(ra, addr_i64); + auto mem_i16 = ReadMemory(addr_i64, 16); + auto mem_i64 = m_ir_builder->CreateSExt(mem_i16, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); + SetGpr(ra, addr_i64); } void Compiler::STH(u32 rs, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - WriteMemory(addr_i64, GetGpr(rs, 16)); + WriteMemory(addr_i64, GetGpr(rs, 16)); } void Compiler::STHU(u32 rs, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - WriteMemory(addr_i64, GetGpr(rs, 16)); - SetGpr(ra, addr_i64); + WriteMemory(addr_i64, GetGpr(rs, 16)); + SetGpr(ra, addr_i64); } void Compiler::LMW(u32 rd, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - if (ra) { - addr_i64 = m_ir_builder->CreateAdd(addr_i64, GetGpr(ra)); - } + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + if (ra) { + addr_i64 = m_ir_builder->CreateAdd(addr_i64, GetGpr(ra)); + } - for (u32 i = rd; i < 32; i++) { - auto val_i32 = ReadMemory(addr_i64, 32); - auto val_i64 = m_ir_builder->CreateZExt(val_i32, m_ir_builder->getInt64Ty()); - SetGpr(i, val_i64); - addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64(4)); - } + for (u32 i = rd; i < 32; i++) { + auto val_i32 = ReadMemory(addr_i64, 32); + auto val_i64 = m_ir_builder->CreateZExt(val_i32, m_ir_builder->getInt64Ty()); + SetGpr(i, val_i64); + addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64(4)); + } } void Compiler::STMW(u32 rs, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - if (ra) { - addr_i64 = m_ir_builder->CreateAdd(addr_i64, GetGpr(ra)); - } + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + if (ra) { + addr_i64 = m_ir_builder->CreateAdd(addr_i64, GetGpr(ra)); + } - for (u32 i = rs; i < 32; i++) { - auto val_i32 = GetGpr(i, 32); - WriteMemory(addr_i64, val_i32); - addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64(4)); - } + for (u32 i = rs; i < 32; i++) { + auto val_i32 = GetGpr(i, 32); + WriteMemory(addr_i64, val_i32); + addr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64(4)); + } } void Compiler::LFS(u32 frd, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto mem_i32 = ReadMemory(addr_i64, 32); - SetFpr(frd, mem_i32); + auto mem_i32 = ReadMemory(addr_i64, 32); + SetFpr(frd, mem_i32); } void Compiler::LFSU(u32 frd, u32 ra, s32 ds) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - auto mem_i32 = ReadMemory(addr_i64, 32); - SetFpr(frd, mem_i32); - SetGpr(ra, addr_i64); + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto mem_i32 = ReadMemory(addr_i64, 32); + SetFpr(frd, mem_i32); + SetGpr(ra, addr_i64); } void Compiler::LFD(u32 frd, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto mem_i64 = ReadMemory(addr_i64, 64); - SetFpr(frd, mem_i64); + auto mem_i64 = ReadMemory(addr_i64, 64); + SetFpr(frd, mem_i64); } void Compiler::LFDU(u32 frd, u32 ra, s32 ds) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - auto mem_i64 = ReadMemory(addr_i64, 64); - SetFpr(frd, mem_i64); - SetGpr(ra, addr_i64); + auto mem_i64 = ReadMemory(addr_i64, 64); + SetFpr(frd, mem_i64); + SetGpr(ra, addr_i64); } void Compiler::STFS(u32 frs, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto frs_i32 = m_ir_builder->CreateBitCast(GetFpr(frs, 32), m_ir_builder->getInt32Ty()); - WriteMemory(addr_i64, frs_i32); + auto frs_i32 = m_ir_builder->CreateBitCast(GetFpr(frs, 32), m_ir_builder->getInt32Ty()); + WriteMemory(addr_i64, frs_i32); } void Compiler::STFSU(u32 frs, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - auto frs_i32 = m_ir_builder->CreateBitCast(GetFpr(frs, 32), m_ir_builder->getInt32Ty()); - WriteMemory(addr_i64, frs_i32); - SetGpr(ra, addr_i64); + auto frs_i32 = m_ir_builder->CreateBitCast(GetFpr(frs, 32), m_ir_builder->getInt32Ty()); + WriteMemory(addr_i64, frs_i32); + SetGpr(ra, addr_i64); } void Compiler::STFD(u32 frs, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty()); - WriteMemory(addr_i64, frs_i64); + auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty()); + WriteMemory(addr_i64, frs_i64); } void Compiler::STFDU(u32 frs, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty()); - WriteMemory(addr_i64, frs_i64); - SetGpr(ra, addr_i64); + auto frs_i64 = m_ir_builder->CreateBitCast(GetFpr(frs), m_ir_builder->getInt64Ty()); + WriteMemory(addr_i64, frs_i64); + SetGpr(ra, addr_i64); } void Compiler::LD(u32 rd, u32 ra, s32 ds) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto mem_i64 = ReadMemory(addr_i64, 64); - SetGpr(rd, mem_i64); + auto mem_i64 = ReadMemory(addr_i64, 64); + SetGpr(rd, mem_i64); } void Compiler::LDU(u32 rd, u32 ra, s32 ds) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - auto mem_i64 = ReadMemory(addr_i64, 64); - SetGpr(rd, mem_i64); - SetGpr(ra, addr_i64); + auto mem_i64 = ReadMemory(addr_i64, 64); + SetGpr(rd, mem_i64); + SetGpr(ra, addr_i64); } void Compiler::LWA(u32 rd, u32 ra, s32 ds) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - auto mem_i32 = ReadMemory(addr_i64, 32); - auto mem_i64 = m_ir_builder->CreateSExt(mem_i32, m_ir_builder->getInt64Ty()); - SetGpr(rd, mem_i64); + auto mem_i32 = ReadMemory(addr_i64, 32); + auto mem_i64 = m_ir_builder->CreateSExt(mem_i32, m_ir_builder->getInt64Ty()); + SetGpr(rd, mem_i64); } void Compiler::FDIVS(u32 frd, u32 fra, u32 frb, u32 rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto res_f64 = m_ir_builder->CreateFDiv(ra_f64, rb_f64); - auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); - SetFpr(frd, res_f32); + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto res_f64 = m_ir_builder->CreateFDiv(ra_f64, rb_f64); + auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); + SetFpr(frd, res_f32); - if (rc) { - // TODO: Implement this - CompilationError("FDIVS."); - } + if (rc) { + // TODO: Implement this + CompilationError("FDIVS."); + } - // TODO: Set flags + // TODO: Set flags } void Compiler::FSUBS(u32 frd, u32 fra, u32 frb, u32 rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto res_f64 = m_ir_builder->CreateFSub(ra_f64, rb_f64); - auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); - SetFpr(frd, res_f32); + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto res_f64 = m_ir_builder->CreateFSub(ra_f64, rb_f64); + auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); + SetFpr(frd, res_f32); - if (rc) { - // TODO: Implement this - CompilationError("FSUBS."); - } + if (rc) { + // TODO: Implement this + CompilationError("FSUBS."); + } - // TODO: Set flags + // TODO: Set flags } void Compiler::FADDS(u32 frd, u32 fra, u32 frb, u32 rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto res_f64 = m_ir_builder->CreateFAdd(ra_f64, rb_f64); - auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); - SetFpr(frd, res_f32); + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto res_f64 = m_ir_builder->CreateFAdd(ra_f64, rb_f64); + auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); + SetFpr(frd, res_f32); - if (rc) { - // TODO: Implement this - CompilationError("FADDS."); - } + if (rc) { + // TODO: Implement this + CompilationError("FADDS."); + } - // TODO: Set flags + // TODO: Set flags } void Compiler::FSQRTS(u32 frd, u32 frb, u32 rc) { - auto rb_f64 = GetFpr(frb); - auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::sqrt, m_ir_builder->getDoubleTy()), rb_f64); - auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); - SetFpr(frd, res_f32); + auto rb_f64 = GetFpr(frb); + auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::sqrt, m_ir_builder->getDoubleTy()), rb_f64); + auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); + SetFpr(frd, res_f32); - if (rc) { - // TODO: Implement this - CompilationError("FSQRTS."); - } + if (rc) { + // TODO: Implement this + CompilationError("FSQRTS."); + } - // TODO: Set flags + // TODO: Set flags } void Compiler::FRES(u32 frd, u32 frb, u32 rc) { - auto rb_f64 = GetFpr(frb); - auto res_f64 = m_ir_builder->CreateFDiv(ConstantFP::get(m_ir_builder->getDoubleTy(), 1.0), rb_f64); - auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); - SetFpr(frd, res_f32); + auto rb_f64 = GetFpr(frb); + auto res_f64 = m_ir_builder->CreateFDiv(ConstantFP::get(m_ir_builder->getDoubleTy(), 1.0), rb_f64); + auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); + SetFpr(frd, res_f32); - if (rc) { - // TODO: Implement this - CompilationError("FRES."); - } + if (rc) { + // TODO: Implement this + CompilationError("FRES."); + } - // TODO: Set flags + // TODO: Set flags } void Compiler::FMULS(u32 frd, u32 fra, u32 frc, u32 rc) { - auto ra_f64 = GetFpr(fra); - auto rc_f64 = GetFpr(frc); - auto res_f64 = m_ir_builder->CreateFMul(ra_f64, rc_f64); - auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); - SetFpr(frd, res_f32); + auto ra_f64 = GetFpr(fra); + auto rc_f64 = GetFpr(frc); + auto res_f64 = m_ir_builder->CreateFMul(ra_f64, rc_f64); + auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); + SetFpr(frd, res_f32); - if (rc) { - // TODO: Implement this - CompilationError("FMULS."); - } + if (rc) { + // TODO: Implement this + CompilationError("FMULS."); + } - // TODO: Set flags + // TODO: Set flags } void Compiler::FMADDS(u32 frd, u32 fra, u32 frc, u32 frb, u32 rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto rc_f64 = GetFpr(frc); - auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); - auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); - SetFpr(frd, res_f32); + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto rc_f64 = GetFpr(frc); + auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); + auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); + SetFpr(frd, res_f32); - if (rc) { - // TODO: Implement this - CompilationError("FMADDS."); - } + if (rc) { + // TODO: Implement this + CompilationError("FMADDS."); + } - // TODO: Set flags + // TODO: Set flags } void Compiler::FMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, u32 rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto rc_f64 = GetFpr(frc); - rb_f64 = m_ir_builder->CreateFNeg(rb_f64); - auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); - auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); - SetFpr(frd, res_f32); + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto rc_f64 = GetFpr(frc); + rb_f64 = m_ir_builder->CreateFNeg(rb_f64); + auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); + auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); + SetFpr(frd, res_f32); - if (rc) { - // TODO: Implement this - CompilationError("FMSUBS."); - } + if (rc) { + // TODO: Implement this + CompilationError("FMSUBS."); + } - // TODO: Set flags + // TODO: Set flags } void Compiler::FNMSUBS(u32 frd, u32 fra, u32 frc, u32 frb, u32 rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto rc_f64 = GetFpr(frc); - rb_f64 = m_ir_builder->CreateFNeg(rb_f64); - auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); - res_f64 = m_ir_builder->CreateFNeg(res_f64); - auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); - SetFpr(frd, res_f32); + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto rc_f64 = GetFpr(frc); + rb_f64 = m_ir_builder->CreateFNeg(rb_f64); + auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); + res_f64 = m_ir_builder->CreateFNeg(res_f64); + auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); + SetFpr(frd, res_f32); - if (rc) { - // TODO: Implement this - CompilationError("FNMSUBS."); - } + if (rc) { + // TODO: Implement this + CompilationError("FNMSUBS."); + } - // TODO: Set flags + // TODO: Set flags } void Compiler::FNMADDS(u32 frd, u32 fra, u32 frc, u32 frb, u32 rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto rc_f64 = GetFpr(frc); - auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); - res_f64 = m_ir_builder->CreateFNeg(res_f64); - auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); - SetFpr(frd, res_f32); + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto rc_f64 = GetFpr(frc); + auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); + res_f64 = m_ir_builder->CreateFNeg(res_f64); + auto res_f32 = m_ir_builder->CreateFPTrunc(res_f64, m_ir_builder->getFloatTy()); + SetFpr(frd, res_f32); - if (rc) { - // TODO: Implement this - CompilationError("FNMADDS."); - } + if (rc) { + // TODO: Implement this + CompilationError("FNMADDS."); + } - // TODO: Set flags + // TODO: Set flags } void Compiler::STD(u32 rs, u32 ra, s32 d) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); - if (ra) { - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - } + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)d); + if (ra) { + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + } - WriteMemory(addr_i64, GetGpr(rs, 64)); + WriteMemory(addr_i64, GetGpr(rs, 64)); } void Compiler::STDU(u32 rs, u32 ra, s32 ds) { - auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); - auto ra_i64 = GetGpr(ra); - addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); + auto addr_i64 = (Value *)m_ir_builder->getInt64((s64)ds); + auto ra_i64 = GetGpr(ra); + addr_i64 = m_ir_builder->CreateAdd(ra_i64, addr_i64); - WriteMemory(addr_i64, GetGpr(rs, 64)); - SetGpr(ra, addr_i64); + WriteMemory(addr_i64, GetGpr(rs, 64)); + SetGpr(ra, addr_i64); } void Compiler::MTFSB1(u32 crbd, u32 rc) { - auto fpscr_i32 = GetFpscr(); - fpscr_i32 = SetBit(fpscr_i32, crbd, m_ir_builder->getInt32(1), false); - SetFpscr(fpscr_i32); + auto fpscr_i32 = GetFpscr(); + fpscr_i32 = SetBit(fpscr_i32, crbd, m_ir_builder->getInt32(1), false); + SetFpscr(fpscr_i32); - if (rc) { - // TODO: Implement this - CompilationError("MTFSB1."); - } + if (rc) { + // TODO: Implement this + CompilationError("MTFSB1."); + } } void Compiler::MCRFS(u32 crbd, u32 crbs) { - auto fpscr_i32 = GetFpscr(); - auto val_i32 = GetNibble(fpscr_i32, crbs); - SetCrField(crbd, val_i32); + auto fpscr_i32 = GetFpscr(); + auto val_i32 = GetNibble(fpscr_i32, crbs); + SetCrField(crbd, val_i32); - switch (crbs) { - case 0: - fpscr_i32 = ClrBit(fpscr_i32, 0); - fpscr_i32 = ClrBit(fpscr_i32, 3); - break; - case 1: - fpscr_i32 = ClrNibble(fpscr_i32, 1); - break; - case 2: - fpscr_i32 = ClrNibble(fpscr_i32, 2); - break; - case 3: - fpscr_i32 = ClrBit(fpscr_i32, 12); - break; - case 5: - fpscr_i32 = ClrBit(fpscr_i32, 21); - fpscr_i32 = ClrBit(fpscr_i32, 22); - fpscr_i32 = ClrBit(fpscr_i32, 23); - break; - default: - break; - } + switch (crbs) { + case 0: + fpscr_i32 = ClrBit(fpscr_i32, 0); + fpscr_i32 = ClrBit(fpscr_i32, 3); + break; + case 1: + fpscr_i32 = ClrNibble(fpscr_i32, 1); + break; + case 2: + fpscr_i32 = ClrNibble(fpscr_i32, 2); + break; + case 3: + fpscr_i32 = ClrBit(fpscr_i32, 12); + break; + case 5: + fpscr_i32 = ClrBit(fpscr_i32, 21); + fpscr_i32 = ClrBit(fpscr_i32, 22); + fpscr_i32 = ClrBit(fpscr_i32, 23); + break; + default: + break; + } - SetFpscr(fpscr_i32); + SetFpscr(fpscr_i32); } void Compiler::MTFSB0(u32 crbd, u32 rc) { - auto fpscr_i32 = GetFpscr(); - fpscr_i32 = ClrBit(fpscr_i32, crbd); - SetFpscr(fpscr_i32); + auto fpscr_i32 = GetFpscr(); + fpscr_i32 = ClrBit(fpscr_i32, crbd); + SetFpscr(fpscr_i32); - if (rc) { - // TODO: Implement this - CompilationError("MTFSB0."); - } + if (rc) { + // TODO: Implement this + CompilationError("MTFSB0."); + } } void Compiler::MTFSFI(u32 crfd, u32 i, u32 rc) { - auto fpscr_i32 = GetFpscr(); - fpscr_i32 = SetNibble(fpscr_i32, crfd, m_ir_builder->getInt32(i & 0xF)); - SetFpscr(fpscr_i32); + auto fpscr_i32 = GetFpscr(); + fpscr_i32 = SetNibble(fpscr_i32, crfd, m_ir_builder->getInt32(i & 0xF)); + SetFpscr(fpscr_i32); - if (rc) { - // TODO: Implement this - CompilationError("MTFSFI."); - } + if (rc) { + // TODO: Implement this + CompilationError("MTFSFI."); + } } void Compiler::MFFS(u32 frd, u32 rc) { - auto fpscr_i32 = GetFpscr(); - auto fpscr_i64 = m_ir_builder->CreateZExt(fpscr_i32, m_ir_builder->getInt64Ty()); - SetFpr(frd, fpscr_i64); + auto fpscr_i32 = GetFpscr(); + auto fpscr_i64 = m_ir_builder->CreateZExt(fpscr_i32, m_ir_builder->getInt64Ty()); + SetFpr(frd, fpscr_i64); - if (rc) { - // TODO: Implement this - CompilationError("MFFS."); - } + if (rc) { + // TODO: Implement this + CompilationError("MFFS."); + } } void Compiler::MTFSF(u32 flm, u32 frb, u32 rc) { - u32 mask = 0; - for (u32 i = 0; i < 8; i++) { - if (flm & (1 << i)) { - mask |= 0xF << (i * 4); - } - } + u32 mask = 0; + for (u32 i = 0; i < 8; i++) { + if (flm & (1 << i)) { + mask |= 0xF << (i * 4); + } + } - auto rb_i32 = GetFpr(frb, 32, true); - auto fpscr_i32 = GetFpscr(); - fpscr_i32 = m_ir_builder->CreateAnd(fpscr_i32, ~mask); - rb_i32 = m_ir_builder->CreateAnd(rb_i32, mask); - fpscr_i32 = m_ir_builder->CreateOr(fpscr_i32, rb_i32); - SetFpscr(fpscr_i32); + auto rb_i32 = GetFpr(frb, 32, true); + auto fpscr_i32 = GetFpscr(); + fpscr_i32 = m_ir_builder->CreateAnd(fpscr_i32, ~mask); + rb_i32 = m_ir_builder->CreateAnd(rb_i32, mask); + fpscr_i32 = m_ir_builder->CreateOr(fpscr_i32, rb_i32); + SetFpscr(fpscr_i32); - if (rc) { - // TODO: Implement this - CompilationError("MTFSF."); - } + if (rc) { + // TODO: Implement this + CompilationError("MTFSF."); + } } void Compiler::FCMPU(u32 crfd, u32 fra, u32 frb) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto lt_i1 = m_ir_builder->CreateFCmpOLT(ra_f64, rb_f64); - auto gt_i1 = m_ir_builder->CreateFCmpOGT(ra_f64, rb_f64); - auto eq_i1 = m_ir_builder->CreateFCmpOEQ(ra_f64, rb_f64); - auto cr_i32 = GetCr(); - cr_i32 = SetNibble(cr_i32, crfd, lt_i1, gt_i1, eq_i1, m_ir_builder->getInt1(false)); - SetCr(cr_i32); + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto lt_i1 = m_ir_builder->CreateFCmpOLT(ra_f64, rb_f64); + auto gt_i1 = m_ir_builder->CreateFCmpOGT(ra_f64, rb_f64); + auto eq_i1 = m_ir_builder->CreateFCmpOEQ(ra_f64, rb_f64); + auto cr_i32 = GetCr(); + cr_i32 = SetNibble(cr_i32, crfd, lt_i1, gt_i1, eq_i1, m_ir_builder->getInt1(false)); + SetCr(cr_i32); - // TODO: Set flags / Handle NaN + // TODO: Set flags / Handle NaN } void Compiler::FRSP(u32 frd, u32 frb, u32 rc) { - auto rb_f64 = GetFpr(frb); - auto res_f32 = m_ir_builder->CreateFPTrunc(rb_f64, m_ir_builder->getFloatTy()); - auto res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy()); - SetFpr(frd, res_f64); + auto rb_f64 = GetFpr(frb); + auto res_f32 = m_ir_builder->CreateFPTrunc(rb_f64, m_ir_builder->getFloatTy()); + auto res_f64 = m_ir_builder->CreateFPExt(res_f32, m_ir_builder->getDoubleTy()); + SetFpr(frd, res_f64); - if (rc) { - // TODO: Implement this - CompilationError("FRSP."); - } + if (rc) { + // TODO: Implement this + CompilationError("FRSP."); + } - // TODO: Revisit this - // TODO: Set flags + // TODO: Revisit this + // TODO: Set flags } void Compiler::FCTIW(u32 frd, u32 frb, u32 rc) { - auto rb_f64 = GetFpr(frb); - auto max_i1 = m_ir_builder->CreateFCmpOGT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 2147483647.0)); - auto min_i1 = m_ir_builder->CreateFCmpULT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), -2147483648.0)); - auto res_i32 = m_ir_builder->CreateFPToSI(rb_f64, m_ir_builder->getInt32Ty()); - auto res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); - res_i64 = m_ir_builder->CreateSelect(max_i1, m_ir_builder->getInt64(0x7FFFFFFF), res_i64); - res_i64 = m_ir_builder->CreateSelect(min_i1, m_ir_builder->getInt64(0x80000000), res_i64); - SetFpr(frd, res_i64); + auto rb_f64 = GetFpr(frb); + auto max_i1 = m_ir_builder->CreateFCmpOGT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 2147483647.0)); + auto min_i1 = m_ir_builder->CreateFCmpULT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), -2147483648.0)); + auto res_i32 = m_ir_builder->CreateFPToSI(rb_f64, m_ir_builder->getInt32Ty()); + auto res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); + res_i64 = m_ir_builder->CreateSelect(max_i1, m_ir_builder->getInt64(0x7FFFFFFF), res_i64); + res_i64 = m_ir_builder->CreateSelect(min_i1, m_ir_builder->getInt64(0x80000000), res_i64); + SetFpr(frd, res_i64); - if (rc) { - // TODO: Implement this - CompilationError("FCTIW."); - } + if (rc) { + // TODO: Implement this + CompilationError("FCTIW."); + } - // TODO: Set flags / Implement rounding modes + // TODO: Set flags / Implement rounding modes } void Compiler::FCTIWZ(u32 frd, u32 frb, u32 rc) { - auto rb_f64 = GetFpr(frb); - auto max_i1 = m_ir_builder->CreateFCmpOGT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 2147483647.0)); - auto min_i1 = m_ir_builder->CreateFCmpULT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), -2147483648.0)); - auto res_i32 = m_ir_builder->CreateFPToSI(rb_f64, m_ir_builder->getInt32Ty()); - auto res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); - res_i64 = m_ir_builder->CreateSelect(max_i1, m_ir_builder->getInt64(0x7FFFFFFF), res_i64); - res_i64 = m_ir_builder->CreateSelect(min_i1, m_ir_builder->getInt64(0x80000000), res_i64); - SetFpr(frd, res_i64); + auto rb_f64 = GetFpr(frb); + auto max_i1 = m_ir_builder->CreateFCmpOGT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 2147483647.0)); + auto min_i1 = m_ir_builder->CreateFCmpULT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), -2147483648.0)); + auto res_i32 = m_ir_builder->CreateFPToSI(rb_f64, m_ir_builder->getInt32Ty()); + auto res_i64 = m_ir_builder->CreateZExt(res_i32, m_ir_builder->getInt64Ty()); + res_i64 = m_ir_builder->CreateSelect(max_i1, m_ir_builder->getInt64(0x7FFFFFFF), res_i64); + res_i64 = m_ir_builder->CreateSelect(min_i1, m_ir_builder->getInt64(0x80000000), res_i64); + SetFpr(frd, res_i64); - if (rc) { - // TODO: Implement this - CompilationError("FCTIWZ."); - } + if (rc) { + // TODO: Implement this + CompilationError("FCTIWZ."); + } - // TODO: Set flags + // TODO: Set flags } void Compiler::FDIV(u32 frd, u32 fra, u32 frb, u32 rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto res_f64 = m_ir_builder->CreateFDiv(ra_f64, rb_f64); - SetFpr(frd, res_f64); + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto res_f64 = m_ir_builder->CreateFDiv(ra_f64, rb_f64); + SetFpr(frd, res_f64); - if (rc) { - // TODO: Implement this - CompilationError("FDIV."); - } + if (rc) { + // TODO: Implement this + CompilationError("FDIV."); + } - // TODO: Set flags + // TODO: Set flags } void Compiler::FSUB(u32 frd, u32 fra, u32 frb, u32 rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto res_f64 = m_ir_builder->CreateFSub(ra_f64, rb_f64); - SetFpr(frd, res_f64); + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto res_f64 = m_ir_builder->CreateFSub(ra_f64, rb_f64); + SetFpr(frd, res_f64); - if (rc) { - // TODO: Implement this - CompilationError("FSUB."); - } + if (rc) { + // TODO: Implement this + CompilationError("FSUB."); + } - // TODO: Set flags + // TODO: Set flags } void Compiler::FADD(u32 frd, u32 fra, u32 frb, u32 rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto res_f64 = m_ir_builder->CreateFAdd(ra_f64, rb_f64); - SetFpr(frd, res_f64); + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto res_f64 = m_ir_builder->CreateFAdd(ra_f64, rb_f64); + SetFpr(frd, res_f64); - if (rc) { - // TODO: Implement this - CompilationError("FADD."); - } + if (rc) { + // TODO: Implement this + CompilationError("FADD."); + } - // TODO: Set flags + // TODO: Set flags } void Compiler::FSQRT(u32 frd, u32 frb, u32 rc) { - auto rb_f64 = GetFpr(frb); - auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::sqrt, m_ir_builder->getDoubleTy()), rb_f64); - SetFpr(frd, res_f64); + auto rb_f64 = GetFpr(frb); + auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::sqrt, m_ir_builder->getDoubleTy()), rb_f64); + SetFpr(frd, res_f64); - if (rc) { - // TODO: Implement this - CompilationError("FSQRT."); - } + if (rc) { + // TODO: Implement this + CompilationError("FSQRT."); + } - // TODO: Set flags + // TODO: Set flags } void Compiler::FSEL(u32 frd, u32 fra, u32 frc, u32 frb, u32 rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto rc_f64 = GetFpr(frc); - auto cmp_i1 = m_ir_builder->CreateFCmpOGE(ra_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 0.0)); - auto res_f64 = m_ir_builder->CreateSelect(cmp_i1, rc_f64, rb_f64); - SetFpr(frd, res_f64); + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto rc_f64 = GetFpr(frc); + auto cmp_i1 = m_ir_builder->CreateFCmpOGE(ra_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 0.0)); + auto res_f64 = m_ir_builder->CreateSelect(cmp_i1, rc_f64, rb_f64); + SetFpr(frd, res_f64); - if (rc) { - // TODO: Implement this - CompilationError("FSEL."); - } + if (rc) { + // TODO: Implement this + CompilationError("FSEL."); + } - // TODO: Set flags + // TODO: Set flags } void Compiler::FMUL(u32 frd, u32 fra, u32 frc, u32 rc) { - auto ra_f64 = GetFpr(fra); - auto rc_f64 = GetFpr(frc); - auto res_f64 = m_ir_builder->CreateFMul(ra_f64, rc_f64); - SetFpr(frd, res_f64); + auto ra_f64 = GetFpr(fra); + auto rc_f64 = GetFpr(frc); + auto res_f64 = m_ir_builder->CreateFMul(ra_f64, rc_f64); + SetFpr(frd, res_f64); - if (rc) { - // TODO: Implement this - CompilationError("FMUL."); - } + if (rc) { + // TODO: Implement this + CompilationError("FMUL."); + } - // TODO: Set flags + // TODO: Set flags } void Compiler::FRSQRTE(u32 frd, u32 frb, u32 rc) { - auto rb_f64 = GetFpr(frb); - auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::sqrt, m_ir_builder->getDoubleTy()), rb_f64); - res_f64 = m_ir_builder->CreateFDiv(ConstantFP::get(m_ir_builder->getDoubleTy(), 1.0), res_f64); - SetFpr(frd, res_f64); + auto rb_f64 = GetFpr(frb); + auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::sqrt, m_ir_builder->getDoubleTy()), rb_f64); + res_f64 = m_ir_builder->CreateFDiv(ConstantFP::get(m_ir_builder->getDoubleTy(), 1.0), res_f64); + SetFpr(frd, res_f64); - if (rc) { - // TODO: Implement this - CompilationError("FRSQRTE."); - } + if (rc) { + // TODO: Implement this + CompilationError("FRSQRTE."); + } } void Compiler::FMSUB(u32 frd, u32 fra, u32 frc, u32 frb, u32 rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto rc_f64 = GetFpr(frc); - rb_f64 = m_ir_builder->CreateFNeg(rb_f64); - auto res_f64 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); - SetFpr(frd, res_f64); + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto rc_f64 = GetFpr(frc); + rb_f64 = m_ir_builder->CreateFNeg(rb_f64); + auto res_f64 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); + SetFpr(frd, res_f64); - if (rc) { - // TODO: Implement this - CompilationError("FMSUB."); - } + if (rc) { + // TODO: Implement this + CompilationError("FMSUB."); + } - // TODO: Set flags + // TODO: Set flags } void Compiler::FMADD(u32 frd, u32 fra, u32 frc, u32 frb, u32 rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto rc_f64 = GetFpr(frc); - auto res_f64 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); - SetFpr(frd, res_f64); + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto rc_f64 = GetFpr(frc); + auto res_f64 = m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); + SetFpr(frd, res_f64); - if (rc) { - // TODO: Implement this - CompilationError("FMADD."); - } + if (rc) { + // TODO: Implement this + CompilationError("FMADD."); + } - // TODO: Set flags + // TODO: Set flags } void Compiler::FNMSUB(u32 frd, u32 fra, u32 frc, u32 frb, u32 rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto rc_f64 = GetFpr(frc); - rc_f64 = m_ir_builder->CreateFNeg(rc_f64); - auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); - SetFpr(frd, res_f64); + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto rc_f64 = GetFpr(frc); + rc_f64 = m_ir_builder->CreateFNeg(rc_f64); + auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); + SetFpr(frd, res_f64); - if (rc) { - // TODO: Implement this - CompilationError("FNMSUB."); - } + if (rc) { + // TODO: Implement this + CompilationError("FNMSUB."); + } - // TODO: Set flags + // TODO: Set flags } void Compiler::FNMADD(u32 frd, u32 fra, u32 frc, u32 frb, u32 rc) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto rc_f64 = GetFpr(frc); - rb_f64 = m_ir_builder->CreateFNeg(rb_f64); - rc_f64 = m_ir_builder->CreateFNeg(rc_f64); - auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); - SetFpr(frd, res_f64); + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto rc_f64 = GetFpr(frc); + rb_f64 = m_ir_builder->CreateFNeg(rb_f64); + rc_f64 = m_ir_builder->CreateFNeg(rc_f64); + auto res_f64 = (Value *)m_ir_builder->CreateCall3(Intrinsic::getDeclaration(m_module, Intrinsic::fmuladd, m_ir_builder->getDoubleTy()), ra_f64, rc_f64, rb_f64); + SetFpr(frd, res_f64); - if (rc) { - // TODO: Implement this - CompilationError("FNMADD."); - } + if (rc) { + // TODO: Implement this + CompilationError("FNMADD."); + } - // TODO: Set flags + // TODO: Set flags } void Compiler::FCMPO(u32 crfd, u32 fra, u32 frb) { - auto ra_f64 = GetFpr(fra); - auto rb_f64 = GetFpr(frb); - auto lt_i1 = m_ir_builder->CreateFCmpOLT(ra_f64, rb_f64); - auto gt_i1 = m_ir_builder->CreateFCmpOGT(ra_f64, rb_f64); - auto eq_i1 = m_ir_builder->CreateFCmpOEQ(ra_f64, rb_f64); - auto cr_i32 = GetCr(); - cr_i32 = SetNibble(cr_i32, crfd, lt_i1, gt_i1, eq_i1, m_ir_builder->getInt1(false)); - SetCr(cr_i32); + auto ra_f64 = GetFpr(fra); + auto rb_f64 = GetFpr(frb); + auto lt_i1 = m_ir_builder->CreateFCmpOLT(ra_f64, rb_f64); + auto gt_i1 = m_ir_builder->CreateFCmpOGT(ra_f64, rb_f64); + auto eq_i1 = m_ir_builder->CreateFCmpOEQ(ra_f64, rb_f64); + auto cr_i32 = GetCr(); + cr_i32 = SetNibble(cr_i32, crfd, lt_i1, gt_i1, eq_i1, m_ir_builder->getInt1(false)); + SetCr(cr_i32); - // TODO: Set flags / Handle NaN + // TODO: Set flags / Handle NaN } void Compiler::FNEG(u32 frd, u32 frb, u32 rc) { - auto rb_f64 = GetFpr(frb); - rb_f64 = m_ir_builder->CreateFNeg(rb_f64); - SetFpr(frd, rb_f64); + auto rb_f64 = GetFpr(frb); + rb_f64 = m_ir_builder->CreateFNeg(rb_f64); + SetFpr(frd, rb_f64); - if (rc) { - // TODO: Implement this - CompilationError("FNEG."); - } + if (rc) { + // TODO: Implement this + CompilationError("FNEG."); + } - // TODO: Set flags + // TODO: Set flags } void Compiler::FMR(u32 frd, u32 frb, u32 rc) { - SetFpr(frd, GetFpr(frb)); + SetFpr(frd, GetFpr(frb)); - if (rc) { - // TODO: Implement this - CompilationError("FMR."); - } + if (rc) { + // TODO: Implement this + CompilationError("FMR."); + } - // TODO: Set flags + // TODO: Set flags } void Compiler::FNABS(u32 frd, u32 frb, u32 rc) { - auto rb_f64 = GetFpr(frb); - auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::fabs, m_ir_builder->getDoubleTy()), rb_f64); - res_f64 = m_ir_builder->CreateFNeg(res_f64); - SetFpr(frd, res_f64); + auto rb_f64 = GetFpr(frb); + auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::fabs, m_ir_builder->getDoubleTy()), rb_f64); + res_f64 = m_ir_builder->CreateFNeg(res_f64); + SetFpr(frd, res_f64); - if (rc) { - // TODO: Implement this - CompilationError("FNABS."); - } + if (rc) { + // TODO: Implement this + CompilationError("FNABS."); + } - // TODO: Set flags + // TODO: Set flags } void Compiler::FABS(u32 frd, u32 frb, u32 rc) { - auto rb_f64 = GetFpr(frb); - auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::fabs, m_ir_builder->getDoubleTy()), rb_f64); - SetFpr(frd, res_f64); + auto rb_f64 = GetFpr(frb); + auto res_f64 = (Value *)m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::fabs, m_ir_builder->getDoubleTy()), rb_f64); + SetFpr(frd, res_f64); - if (rc) { - // TODO: Implement this - CompilationError("FABS."); - } + if (rc) { + // TODO: Implement this + CompilationError("FABS."); + } - // TODO: Set flags + // TODO: Set flags } void Compiler::FCTID(u32 frd, u32 frb, u32 rc) { - auto rb_f64 = GetFpr(frb); - auto max_i1 = m_ir_builder->CreateFCmpOGT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 9223372036854775807.0)); - auto min_i1 = m_ir_builder->CreateFCmpULT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), -9223372036854775808.0)); - auto res_i64 = m_ir_builder->CreateFPToSI(rb_f64, m_ir_builder->getInt64Ty()); - res_i64 = m_ir_builder->CreateSelect(max_i1, m_ir_builder->getInt64(0x7FFFFFFFFFFFFFFF), res_i64); - res_i64 = m_ir_builder->CreateSelect(min_i1, m_ir_builder->getInt64(0x8000000000000000), res_i64); - SetFpr(frd, res_i64); + auto rb_f64 = GetFpr(frb); + auto max_i1 = m_ir_builder->CreateFCmpOGT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 9223372036854775807.0)); + auto min_i1 = m_ir_builder->CreateFCmpULT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), -9223372036854775808.0)); + auto res_i64 = m_ir_builder->CreateFPToSI(rb_f64, m_ir_builder->getInt64Ty()); + res_i64 = m_ir_builder->CreateSelect(max_i1, m_ir_builder->getInt64(0x7FFFFFFFFFFFFFFF), res_i64); + res_i64 = m_ir_builder->CreateSelect(min_i1, m_ir_builder->getInt64(0x8000000000000000), res_i64); + SetFpr(frd, res_i64); - if (rc) { - // TODO: Implement this - CompilationError("FCTID."); - } + if (rc) { + // TODO: Implement this + CompilationError("FCTID."); + } - // TODO: Set flags / Implement rounding modes + // TODO: Set flags / Implement rounding modes } void Compiler::FCTIDZ(u32 frd, u32 frb, u32 rc) { - auto rb_f64 = GetFpr(frb); - auto max_i1 = m_ir_builder->CreateFCmpOGT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 9223372036854775807.0)); - auto min_i1 = m_ir_builder->CreateFCmpULT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), -9223372036854775808.0)); - auto res_i64 = m_ir_builder->CreateFPToSI(rb_f64, m_ir_builder->getInt64Ty()); - res_i64 = m_ir_builder->CreateSelect(max_i1, m_ir_builder->getInt64(0x7FFFFFFFFFFFFFFF), res_i64); - res_i64 = m_ir_builder->CreateSelect(min_i1, m_ir_builder->getInt64(0x8000000000000000), res_i64); - SetFpr(frd, res_i64); + auto rb_f64 = GetFpr(frb); + auto max_i1 = m_ir_builder->CreateFCmpOGT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), 9223372036854775807.0)); + auto min_i1 = m_ir_builder->CreateFCmpULT(rb_f64, ConstantFP::get(m_ir_builder->getDoubleTy(), -9223372036854775808.0)); + auto res_i64 = m_ir_builder->CreateFPToSI(rb_f64, m_ir_builder->getInt64Ty()); + res_i64 = m_ir_builder->CreateSelect(max_i1, m_ir_builder->getInt64(0x7FFFFFFFFFFFFFFF), res_i64); + res_i64 = m_ir_builder->CreateSelect(min_i1, m_ir_builder->getInt64(0x8000000000000000), res_i64); + SetFpr(frd, res_i64); - if (rc) { - // TODO: Implement this - CompilationError("FCTIDZ."); - } + if (rc) { + // TODO: Implement this + CompilationError("FCTIDZ."); + } - // TODO: Set flags + // TODO: Set flags } void Compiler::FCFID(u32 frd, u32 frb, u32 rc) { - auto rb_i64 = GetFpr(frb, 64, true); - auto res_f64 = m_ir_builder->CreateSIToFP(rb_i64, m_ir_builder->getDoubleTy()); - SetFpr(frd, res_f64); + auto rb_i64 = GetFpr(frb, 64, true); + auto res_f64 = m_ir_builder->CreateSIToFP(rb_i64, m_ir_builder->getDoubleTy()); + SetFpr(frd, res_f64); - if (rc) { - // TODO: Implement this - CompilationError("FCFID."); - } + if (rc) { + // TODO: Implement this + CompilationError("FCFID."); + } - // TODO: Set flags + // TODO: Set flags } void Compiler::UNK(const u32 code, const u32 opcode, const u32 gcode) { - CompilationError(fmt::Format("Unknown/Illegal opcode! (0x%08x : 0x%x : 0x%x)", code, opcode, gcode)); + CompilationError(fmt::Format("Unknown/Illegal opcode! (0x%08x : 0x%x : 0x%x)", code, opcode, gcode)); } std::string Compiler::GetBasicBlockNameFromAddress(u32 address, const std::string & suffix) const { - std::string name; + std::string name; - if (address == 0) { - name = "entry"; - } - else if (address == 0xFFFFFFFF) { - name = "default_exit"; - } - else { - name = fmt::Format("instr_0x%08X", address); - } + if (address == 0) { + name = "entry"; + } + else if (address == 0xFFFFFFFF) { + name = "default_exit"; + } + else { + name = fmt::Format("instr_0x%08X", address); + } - if (suffix != "") { - name += "_" + suffix; - } + if (suffix != "") { + name += "_" + suffix; + } - return name; + return name; } u32 Compiler::GetAddressFromBasicBlockName(const std::string & name) const { - if (name.compare(0, 6, "instr_") == 0) { - return strtoul(name.c_str() + 6, nullptr, 0); - } - else if (name == GetBasicBlockNameFromAddress(0)) { - return 0; - } - else if (name == GetBasicBlockNameFromAddress(0xFFFFFFFF)) { - return 0xFFFFFFFF; - } + if (name.compare(0, 6, "instr_") == 0) { + return strtoul(name.c_str() + 6, nullptr, 0); + } + else if (name == GetBasicBlockNameFromAddress(0)) { + return 0; + } + else if (name == GetBasicBlockNameFromAddress(0xFFFFFFFF)) { + return 0xFFFFFFFF; + } - return 0; + return 0; } BasicBlock * Compiler::GetBasicBlockFromAddress(u32 address, const std::string & suffix, bool create_if_not_exist) { - auto block_name = GetBasicBlockNameFromAddress(address, suffix); - BasicBlock * block = nullptr; - BasicBlock * next_block = nullptr; - for (auto i = m_state.function->begin(); i != m_state.function->end(); i++) { - if (i->getName() == block_name) { - block = &(*i); - break; - } + auto block_name = GetBasicBlockNameFromAddress(address, suffix); + BasicBlock * block = nullptr; + BasicBlock * next_block = nullptr; + for (auto i = m_state.function->begin(); i != m_state.function->end(); i++) { + if (i->getName() == block_name) { + block = &(*i); + break; + } - auto block_address = GetAddressFromBasicBlockName(i->getName()); - if (block_address > address) { - next_block = &(*i); - break; - } - } + auto block_address = GetAddressFromBasicBlockName(i->getName()); + if (block_address > address) { + next_block = &(*i); + break; + } + } - if (!block && create_if_not_exist) { - block = BasicBlock::Create(m_ir_builder->getContext(), block_name, m_state.function, next_block); - } + if (!block && create_if_not_exist) { + block = BasicBlock::Create(m_ir_builder->getContext(), block_name, m_state.function, next_block); + } - return block; + return block; } Value * Compiler::GetBit(Value * val, u32 n) { - Value * bit; + Value * bit; #ifdef PPU_LLVM_RECOMPILER_USE_BMI - if (val->getType()->isIntegerTy(32)) { - bit = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_bmi_pext_32), val, m_ir_builder->getInt32(1 << (31 - n))); - } - else if (val->getType()->isIntegerTy(64)) { - bit = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_bmi_pext_64), val, m_ir_builder->getInt64((u64)1 << (63 - n))); - } - else { + if (val->getType()->isIntegerTy(32)) { + bit = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_bmi_pext_32), val, m_ir_builder->getInt32(1 << (31 - n))); + } + else if (val->getType()->isIntegerTy(64)) { + bit = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_bmi_pext_64), val, m_ir_builder->getInt64((u64)1 << (63 - n))); + } + else { #endif - if (val->getType()->getIntegerBitWidth() != (n + 1)) { - bit = m_ir_builder->CreateLShr(val, val->getType()->getIntegerBitWidth() - n - 1); - } + if (val->getType()->getIntegerBitWidth() != (n + 1)) { + bit = m_ir_builder->CreateLShr(val, val->getType()->getIntegerBitWidth() - n - 1); + } - bit = m_ir_builder->CreateAnd(bit, 1); + bit = m_ir_builder->CreateAnd(bit, 1); #ifdef PPU_LLVM_RECOMPILER_USE_BMI - } + } #endif - return bit; + return bit; } Value * Compiler::ClrBit(Value * val, u32 n) { - return m_ir_builder->CreateAnd(val, ~((u64)1 << (val->getType()->getIntegerBitWidth() - n - 1))); + return m_ir_builder->CreateAnd(val, ~((u64)1 << (val->getType()->getIntegerBitWidth() - n - 1))); } Value * Compiler::SetBit(Value * val, u32 n, Value * bit, bool doClear) { - if (doClear) { - val = ClrBit(val, n); - } + if (doClear) { + val = ClrBit(val, n); + } - if (bit->getType()->getIntegerBitWidth() < val->getType()->getIntegerBitWidth()) { - bit = m_ir_builder->CreateZExt(bit, val->getType()); - } - else if (bit->getType()->getIntegerBitWidth() > val->getType()->getIntegerBitWidth()) { - bit = m_ir_builder->CreateTrunc(bit, val->getType()); - } + if (bit->getType()->getIntegerBitWidth() < val->getType()->getIntegerBitWidth()) { + bit = m_ir_builder->CreateZExt(bit, val->getType()); + } + else if (bit->getType()->getIntegerBitWidth() > val->getType()->getIntegerBitWidth()) { + bit = m_ir_builder->CreateTrunc(bit, val->getType()); + } - if (val->getType()->getIntegerBitWidth() != (n + 1)) { - bit = m_ir_builder->CreateShl(bit, bit->getType()->getIntegerBitWidth() - n - 1); - } + if (val->getType()->getIntegerBitWidth() != (n + 1)) { + bit = m_ir_builder->CreateShl(bit, bit->getType()->getIntegerBitWidth() - n - 1); + } - return m_ir_builder->CreateOr(val, bit); + return m_ir_builder->CreateOr(val, bit); } Value * Compiler::GetNibble(Value * val, u32 n) { - Value * nibble; + Value * nibble; #ifdef PPU_LLVM_RECOMPILER_USE_BMI - if (val->getType()->isIntegerTy(32)) { - nibble = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_bmi_pext_32), val, m_ir_builder->getInt32((u64)0xF << ((7 - n) * 4))); - } - else if (val->getType()->isIntegerTy(64)) { - nibble = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_bmi_pext_64), val, m_ir_builder->getInt64((u64)0xF << ((15 - n) * 4))); - } - else { + if (val->getType()->isIntegerTy(32)) { + nibble = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_bmi_pext_32), val, m_ir_builder->getInt32((u64)0xF << ((7 - n) * 4))); + } + else if (val->getType()->isIntegerTy(64)) { + nibble = m_ir_builder->CreateCall2(Intrinsic::getDeclaration(m_module, Intrinsic::x86_bmi_pext_64), val, m_ir_builder->getInt64((u64)0xF << ((15 - n) * 4))); + } + else { #endif - if ((val->getType()->getIntegerBitWidth() >> 2) != (n + 1)) { - val = m_ir_builder->CreateLShr(val, (((val->getType()->getIntegerBitWidth() >> 2) - 1) - n) * 4); - } + if ((val->getType()->getIntegerBitWidth() >> 2) != (n + 1)) { + val = m_ir_builder->CreateLShr(val, (((val->getType()->getIntegerBitWidth() >> 2) - 1) - n) * 4); + } - nibble = m_ir_builder->CreateAnd(val, 0xF); + nibble = m_ir_builder->CreateAnd(val, 0xF); #ifdef PPU_LLVM_RECOMPILER_USE_BMI - } + } #endif - return nibble; + return nibble; } Value * Compiler::ClrNibble(Value * val, u32 n) { - return m_ir_builder->CreateAnd(val, ~((u64)0xF << ((((val->getType()->getIntegerBitWidth() >> 2) - 1) - n) * 4))); + return m_ir_builder->CreateAnd(val, ~((u64)0xF << ((((val->getType()->getIntegerBitWidth() >> 2) - 1) - n) * 4))); } Value * Compiler::SetNibble(Value * val, u32 n, Value * nibble, bool doClear) { - if (doClear) { - val = ClrNibble(val, n); - } + if (doClear) { + val = ClrNibble(val, n); + } - if (nibble->getType()->getIntegerBitWidth() < val->getType()->getIntegerBitWidth()) { - nibble = m_ir_builder->CreateZExt(nibble, val->getType()); - } - else if (nibble->getType()->getIntegerBitWidth() > val->getType()->getIntegerBitWidth()) { - nibble = m_ir_builder->CreateTrunc(nibble, val->getType()); - } + if (nibble->getType()->getIntegerBitWidth() < val->getType()->getIntegerBitWidth()) { + nibble = m_ir_builder->CreateZExt(nibble, val->getType()); + } + else if (nibble->getType()->getIntegerBitWidth() > val->getType()->getIntegerBitWidth()) { + nibble = m_ir_builder->CreateTrunc(nibble, val->getType()); + } - if ((val->getType()->getIntegerBitWidth() >> 2) != (n + 1)) { - nibble = m_ir_builder->CreateShl(nibble, (((val->getType()->getIntegerBitWidth() >> 2) - 1) - n) * 4); - } + if ((val->getType()->getIntegerBitWidth() >> 2) != (n + 1)) { + nibble = m_ir_builder->CreateShl(nibble, (((val->getType()->getIntegerBitWidth() >> 2) - 1) - n) * 4); + } - return m_ir_builder->CreateOr(val, nibble); + return m_ir_builder->CreateOr(val, nibble); } Value * Compiler::SetNibble(Value * val, u32 n, Value * b0, Value * b1, Value * b2, Value * b3, bool doClear) { - if (doClear) { - val = ClrNibble(val, n); - } + if (doClear) { + val = ClrNibble(val, n); + } - if (b0) { - val = SetBit(val, n * 4, b0, false); - } + if (b0) { + val = SetBit(val, n * 4, b0, false); + } - if (b1) { - val = SetBit(val, (n * 4) + 1, b1, false); - } + if (b1) { + val = SetBit(val, (n * 4) + 1, b1, false); + } - if (b2) { - val = SetBit(val, (n * 4) + 2, b2, false); - } + if (b2) { + val = SetBit(val, (n * 4) + 2, b2, false); + } - if (b3) { - val = SetBit(val, (n * 4) + 3, b3, false); - } + if (b3) { + val = SetBit(val, (n * 4) + 3, b3, false); + } - return val; + return val; } Value * Compiler::GetPc() { - auto pc_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, PC)); - auto pc_i32_ptr = m_ir_builder->CreateBitCast(pc_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); - return m_ir_builder->CreateAlignedLoad(pc_i32_ptr, 4); + auto pc_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, PC)); + auto pc_i32_ptr = m_ir_builder->CreateBitCast(pc_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); + return m_ir_builder->CreateAlignedLoad(pc_i32_ptr, 4); } void Compiler::SetPc(Value * val_ix) { - auto pc_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, PC)); - auto pc_i32_ptr = m_ir_builder->CreateBitCast(pc_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); - auto val_i32 = m_ir_builder->CreateZExtOrTrunc(val_ix, m_ir_builder->getInt32Ty()); - m_ir_builder->CreateAlignedStore(val_i32, pc_i32_ptr, 4); + auto pc_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, PC)); + auto pc_i32_ptr = m_ir_builder->CreateBitCast(pc_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); + auto val_i32 = m_ir_builder->CreateZExtOrTrunc(val_ix, m_ir_builder->getInt32Ty()); + m_ir_builder->CreateAlignedStore(val_i32, pc_i32_ptr, 4); } Value * Compiler::GetGpr(u32 r, u32 num_bits) { - auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, GPR[r])); - auto r_ix_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getIntNTy(num_bits)->getPointerTo()); - return m_ir_builder->CreateAlignedLoad(r_ix_ptr, 8); + auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, GPR[r])); + auto r_ix_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getIntNTy(num_bits)->getPointerTo()); + return m_ir_builder->CreateAlignedLoad(r_ix_ptr, 8); } void Compiler::SetGpr(u32 r, Value * val_x64) { - auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, GPR[r])); - auto r_i64_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); - auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); - m_ir_builder->CreateAlignedStore(val_i64, r_i64_ptr, 8); + auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, GPR[r])); + auto r_i64_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); + auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); + m_ir_builder->CreateAlignedStore(val_i64, r_i64_ptr, 8); } Value * Compiler::GetCr() { - auto cr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, CR)); - auto cr_i32_ptr = m_ir_builder->CreateBitCast(cr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); - return m_ir_builder->CreateAlignedLoad(cr_i32_ptr, 4); + auto cr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, CR)); + auto cr_i32_ptr = m_ir_builder->CreateBitCast(cr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); + return m_ir_builder->CreateAlignedLoad(cr_i32_ptr, 4); } Value * Compiler::GetCrField(u32 n) { - return GetNibble(GetCr(), n); + return GetNibble(GetCr(), n); } void Compiler::SetCr(Value * val_x32) { - auto val_i32 = m_ir_builder->CreateBitCast(val_x32, m_ir_builder->getInt32Ty()); - auto cr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, CR)); - auto cr_i32_ptr = m_ir_builder->CreateBitCast(cr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); - m_ir_builder->CreateAlignedStore(val_i32, cr_i32_ptr, 4); + auto val_i32 = m_ir_builder->CreateBitCast(val_x32, m_ir_builder->getInt32Ty()); + auto cr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, CR)); + auto cr_i32_ptr = m_ir_builder->CreateBitCast(cr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); + m_ir_builder->CreateAlignedStore(val_i32, cr_i32_ptr, 4); } void Compiler::SetCrField(u32 n, Value * field) { - SetCr(SetNibble(GetCr(), n, field)); + SetCr(SetNibble(GetCr(), n, field)); } void Compiler::SetCrField(u32 n, Value * b0, Value * b1, Value * b2, Value * b3) { - SetCr(SetNibble(GetCr(), n, b0, b1, b2, b3)); + SetCr(SetNibble(GetCr(), n, b0, b1, b2, b3)); } void Compiler::SetCrFieldSignedCmp(u32 n, Value * a, Value * b) { - auto lt_i1 = m_ir_builder->CreateICmpSLT(a, b); - auto gt_i1 = m_ir_builder->CreateICmpSGT(a, b); - auto eq_i1 = m_ir_builder->CreateICmpEQ(a, b); - auto cr_i32 = GetCr(); - cr_i32 = SetNibble(cr_i32, n, lt_i1, gt_i1, eq_i1, GetXerSo()); - SetCr(cr_i32); + auto lt_i1 = m_ir_builder->CreateICmpSLT(a, b); + auto gt_i1 = m_ir_builder->CreateICmpSGT(a, b); + auto eq_i1 = m_ir_builder->CreateICmpEQ(a, b); + auto cr_i32 = GetCr(); + cr_i32 = SetNibble(cr_i32, n, lt_i1, gt_i1, eq_i1, GetXerSo()); + SetCr(cr_i32); } void Compiler::SetCrFieldUnsignedCmp(u32 n, Value * a, Value * b) { - auto lt_i1 = m_ir_builder->CreateICmpULT(a, b); - auto gt_i1 = m_ir_builder->CreateICmpUGT(a, b); - auto eq_i1 = m_ir_builder->CreateICmpEQ(a, b); - auto cr_i32 = GetCr(); - cr_i32 = SetNibble(cr_i32, n, lt_i1, gt_i1, eq_i1, GetXerSo()); - SetCr(cr_i32); + auto lt_i1 = m_ir_builder->CreateICmpULT(a, b); + auto gt_i1 = m_ir_builder->CreateICmpUGT(a, b); + auto eq_i1 = m_ir_builder->CreateICmpEQ(a, b); + auto cr_i32 = GetCr(); + cr_i32 = SetNibble(cr_i32, n, lt_i1, gt_i1, eq_i1, GetXerSo()); + SetCr(cr_i32); } void Compiler::SetCr6AfterVectorCompare(u32 vr) { - auto vr_v16i8 = GetVrAsIntVec(vr, 8); - auto vr_mask_i32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmovmskb_128), vr_v16i8); - auto cmp0_i1 = m_ir_builder->CreateICmpEQ(vr_mask_i32, m_ir_builder->getInt32(0)); - auto cmp1_i1 = m_ir_builder->CreateICmpEQ(vr_mask_i32, m_ir_builder->getInt32(0xFFFF)); - auto cr_i32 = GetCr(); - cr_i32 = SetNibble(cr_i32, 6, cmp1_i1, nullptr, cmp0_i1, nullptr); - SetCr(cr_i32); + auto vr_v16i8 = GetVrAsIntVec(vr, 8); + auto vr_mask_i32 = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::x86_sse2_pmovmskb_128), vr_v16i8); + auto cmp0_i1 = m_ir_builder->CreateICmpEQ(vr_mask_i32, m_ir_builder->getInt32(0)); + auto cmp1_i1 = m_ir_builder->CreateICmpEQ(vr_mask_i32, m_ir_builder->getInt32(0xFFFF)); + auto cr_i32 = GetCr(); + cr_i32 = SetNibble(cr_i32, 6, cmp1_i1, nullptr, cmp0_i1, nullptr); + SetCr(cr_i32); } Value * Compiler::GetLr() { - auto lr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, LR)); - auto lr_i64_ptr = m_ir_builder->CreateBitCast(lr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); - return m_ir_builder->CreateAlignedLoad(lr_i64_ptr, 8); + auto lr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, LR)); + auto lr_i64_ptr = m_ir_builder->CreateBitCast(lr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); + return m_ir_builder->CreateAlignedLoad(lr_i64_ptr, 8); } void Compiler::SetLr(Value * val_x64) { - auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); - auto lr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, LR)); - auto lr_i64_ptr = m_ir_builder->CreateBitCast(lr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); - m_ir_builder->CreateAlignedStore(val_i64, lr_i64_ptr, 8); + auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); + auto lr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, LR)); + auto lr_i64_ptr = m_ir_builder->CreateBitCast(lr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); + m_ir_builder->CreateAlignedStore(val_i64, lr_i64_ptr, 8); } Value * Compiler::GetCtr() { - auto ctr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, CTR)); - auto ctr_i64_ptr = m_ir_builder->CreateBitCast(ctr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); - return m_ir_builder->CreateAlignedLoad(ctr_i64_ptr, 8); + auto ctr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, CTR)); + auto ctr_i64_ptr = m_ir_builder->CreateBitCast(ctr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); + return m_ir_builder->CreateAlignedLoad(ctr_i64_ptr, 8); } void Compiler::SetCtr(Value * val_x64) { - auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); - auto ctr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, CTR)); - auto ctr_i64_ptr = m_ir_builder->CreateBitCast(ctr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); - m_ir_builder->CreateAlignedStore(val_i64, ctr_i64_ptr, 8); + auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); + auto ctr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, CTR)); + auto ctr_i64_ptr = m_ir_builder->CreateBitCast(ctr_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); + m_ir_builder->CreateAlignedStore(val_i64, ctr_i64_ptr, 8); } Value * Compiler::GetXer() { - auto xer_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, XER)); - auto xer_i64_ptr = m_ir_builder->CreateBitCast(xer_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); - return m_ir_builder->CreateAlignedLoad(xer_i64_ptr, 8); + auto xer_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, XER)); + auto xer_i64_ptr = m_ir_builder->CreateBitCast(xer_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); + return m_ir_builder->CreateAlignedLoad(xer_i64_ptr, 8); } Value * Compiler::GetXerCa() { - return GetBit(GetXer(), 34); + return GetBit(GetXer(), 34); } Value * Compiler::GetXerSo() { - return GetBit(GetXer(), 32); + return GetBit(GetXer(), 32); } void Compiler::SetXer(Value * val_x64) { - auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); - auto xer_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, XER)); - auto xer_i64_ptr = m_ir_builder->CreateBitCast(xer_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); - m_ir_builder->CreateAlignedStore(val_i64, xer_i64_ptr, 8); + auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); + auto xer_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, XER)); + auto xer_i64_ptr = m_ir_builder->CreateBitCast(xer_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); + m_ir_builder->CreateAlignedStore(val_i64, xer_i64_ptr, 8); } void Compiler::SetXerCa(Value * ca) { - auto xer_i64 = GetXer(); - xer_i64 = SetBit(xer_i64, 34, ca); - SetXer(xer_i64); + auto xer_i64 = GetXer(); + xer_i64 = SetBit(xer_i64, 34, ca); + SetXer(xer_i64); } void Compiler::SetXerSo(Value * so) { - auto xer_i64 = GetXer(); - xer_i64 = SetBit(xer_i64, 32, so); - SetXer(xer_i64); + auto xer_i64 = GetXer(); + xer_i64 = SetBit(xer_i64, 32, so); + SetXer(xer_i64); } Value * Compiler::GetVrsave() { - auto vrsave_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VRSAVE)); - auto vrsave_i32_ptr = m_ir_builder->CreateBitCast(vrsave_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); - auto val_i32 = m_ir_builder->CreateAlignedLoad(vrsave_i32_ptr, 4); - return m_ir_builder->CreateZExtOrTrunc(val_i32, m_ir_builder->getInt64Ty()); + auto vrsave_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VRSAVE)); + auto vrsave_i32_ptr = m_ir_builder->CreateBitCast(vrsave_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); + auto val_i32 = m_ir_builder->CreateAlignedLoad(vrsave_i32_ptr, 4); + return m_ir_builder->CreateZExtOrTrunc(val_i32, m_ir_builder->getInt64Ty()); } void Compiler::SetVrsave(Value * val_x64) { - auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); - auto val_i32 = m_ir_builder->CreateZExtOrTrunc(val_i64, m_ir_builder->getInt32Ty()); - auto vrsave_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VRSAVE)); - auto vrsave_i32_ptr = m_ir_builder->CreateBitCast(vrsave_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); - m_ir_builder->CreateAlignedStore(val_i32, vrsave_i32_ptr, 8); + auto val_i64 = m_ir_builder->CreateBitCast(val_x64, m_ir_builder->getInt64Ty()); + auto val_i32 = m_ir_builder->CreateZExtOrTrunc(val_i64, m_ir_builder->getInt32Ty()); + auto vrsave_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VRSAVE)); + auto vrsave_i32_ptr = m_ir_builder->CreateBitCast(vrsave_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); + m_ir_builder->CreateAlignedStore(val_i32, vrsave_i32_ptr, 8); } Value * Compiler::GetFpscr() { - auto fpscr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, FPSCR)); - auto fpscr_i32_ptr = m_ir_builder->CreateBitCast(fpscr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); - return m_ir_builder->CreateAlignedLoad(fpscr_i32_ptr, 4); + auto fpscr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, FPSCR)); + auto fpscr_i32_ptr = m_ir_builder->CreateBitCast(fpscr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); + return m_ir_builder->CreateAlignedLoad(fpscr_i32_ptr, 4); } void Compiler::SetFpscr(Value * val_x32) { - auto val_i32 = m_ir_builder->CreateBitCast(val_x32, m_ir_builder->getInt32Ty()); - auto fpscr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, FPSCR)); - auto fpscr_i32_ptr = m_ir_builder->CreateBitCast(fpscr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); - m_ir_builder->CreateAlignedStore(val_i32, fpscr_i32_ptr, 4); + auto val_i32 = m_ir_builder->CreateBitCast(val_x32, m_ir_builder->getInt32Ty()); + auto fpscr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, FPSCR)); + auto fpscr_i32_ptr = m_ir_builder->CreateBitCast(fpscr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); + m_ir_builder->CreateAlignedStore(val_i32, fpscr_i32_ptr, 4); } Value * Compiler::GetFpr(u32 r, u32 bits, bool as_int) { - auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, FPR[r])); - if (!as_int) { - auto r_f64_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getDoubleTy()->getPointerTo()); - auto r_f64 = m_ir_builder->CreateAlignedLoad(r_f64_ptr, 8); - if (bits == 32) { - return m_ir_builder->CreateFPTrunc(r_f64, m_ir_builder->getFloatTy()); - } - else { - return r_f64; - } - } - else { - auto r_i64_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); - auto r_i64 = m_ir_builder->CreateAlignedLoad(r_i64_ptr, 8); - if (bits == 32) { - return m_ir_builder->CreateTrunc(r_i64, m_ir_builder->getInt32Ty()); - } - else { - return r_i64; - } - } + auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, FPR[r])); + if (!as_int) { + auto r_f64_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getDoubleTy()->getPointerTo()); + auto r_f64 = m_ir_builder->CreateAlignedLoad(r_f64_ptr, 8); + if (bits == 32) { + return m_ir_builder->CreateFPTrunc(r_f64, m_ir_builder->getFloatTy()); + } + else { + return r_f64; + } + } + else { + auto r_i64_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getInt64Ty()->getPointerTo()); + auto r_i64 = m_ir_builder->CreateAlignedLoad(r_i64_ptr, 8); + if (bits == 32) { + return m_ir_builder->CreateTrunc(r_i64, m_ir_builder->getInt32Ty()); + } + else { + return r_i64; + } + } } void Compiler::SetFpr(u32 r, Value * val) { - auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, FPR[r])); - auto r_f64_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getDoubleTy()->getPointerTo()); + auto r_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, FPR[r])); + auto r_f64_ptr = m_ir_builder->CreateBitCast(r_i8_ptr, m_ir_builder->getDoubleTy()->getPointerTo()); - Value* val_f64; - if (val->getType()->isDoubleTy() || val->getType()->isIntegerTy(64)) { - val_f64 = m_ir_builder->CreateBitCast(val, m_ir_builder->getDoubleTy()); - } - else if (val->getType()->isFloatTy() || val->getType()->isIntegerTy(32)) { - auto val_f32 = m_ir_builder->CreateBitCast(val, m_ir_builder->getFloatTy()); - val_f64 = m_ir_builder->CreateFPExt(val_f32, m_ir_builder->getDoubleTy()); - } - else { - assert(0); - } + Value* val_f64; + if (val->getType()->isDoubleTy() || val->getType()->isIntegerTy(64)) { + val_f64 = m_ir_builder->CreateBitCast(val, m_ir_builder->getDoubleTy()); + } + else if (val->getType()->isFloatTy() || val->getType()->isIntegerTy(32)) { + auto val_f32 = m_ir_builder->CreateBitCast(val, m_ir_builder->getFloatTy()); + val_f64 = m_ir_builder->CreateFPExt(val_f32, m_ir_builder->getDoubleTy()); + } + else { + assert(0); + } - m_ir_builder->CreateAlignedStore(val_f64, r_f64_ptr, 8); + m_ir_builder->CreateAlignedStore(val_f64, r_f64_ptr, 8); } Value * Compiler::GetVscr() { - auto vscr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VSCR)); - auto vscr_i32_ptr = m_ir_builder->CreateBitCast(vscr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); - return m_ir_builder->CreateAlignedLoad(vscr_i32_ptr, 4); + auto vscr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VSCR)); + auto vscr_i32_ptr = m_ir_builder->CreateBitCast(vscr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); + return m_ir_builder->CreateAlignedLoad(vscr_i32_ptr, 4); } void Compiler::SetVscr(Value * val_x32) { - auto val_i32 = m_ir_builder->CreateBitCast(val_x32, m_ir_builder->getInt32Ty()); - auto vscr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VSCR)); - auto vscr_i32_ptr = m_ir_builder->CreateBitCast(vscr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); - m_ir_builder->CreateAlignedStore(val_i32, vscr_i32_ptr, 4); + auto val_i32 = m_ir_builder->CreateBitCast(val_x32, m_ir_builder->getInt32Ty()); + auto vscr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VSCR)); + auto vscr_i32_ptr = m_ir_builder->CreateBitCast(vscr_i8_ptr, m_ir_builder->getInt32Ty()->getPointerTo()); + m_ir_builder->CreateAlignedStore(val_i32, vscr_i32_ptr, 4); } Value * Compiler::GetVr(u32 vr) { - auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VPR[vr])); - auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); - return m_ir_builder->CreateAlignedLoad(vr_i128_ptr, 16); + auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VPR[vr])); + auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); + return m_ir_builder->CreateAlignedLoad(vr_i128_ptr, 16); } Value * Compiler::GetVrAsIntVec(u32 vr, u32 vec_elt_num_bits) { - auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VPR[vr])); - auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); - auto vr_vec_ptr = m_ir_builder->CreateBitCast(vr_i128_ptr, VectorType::get(m_ir_builder->getIntNTy(vec_elt_num_bits), 128 / vec_elt_num_bits)->getPointerTo()); - return m_ir_builder->CreateAlignedLoad(vr_vec_ptr, 16); + auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VPR[vr])); + auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); + auto vr_vec_ptr = m_ir_builder->CreateBitCast(vr_i128_ptr, VectorType::get(m_ir_builder->getIntNTy(vec_elt_num_bits), 128 / vec_elt_num_bits)->getPointerTo()); + return m_ir_builder->CreateAlignedLoad(vr_vec_ptr, 16); } Value * Compiler::GetVrAsFloatVec(u32 vr) { - auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VPR[vr])); - auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); - auto vr_v4f32_ptr = m_ir_builder->CreateBitCast(vr_i128_ptr, VectorType::get(m_ir_builder->getFloatTy(), 4)->getPointerTo()); - return m_ir_builder->CreateAlignedLoad(vr_v4f32_ptr, 16); + auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VPR[vr])); + auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); + auto vr_v4f32_ptr = m_ir_builder->CreateBitCast(vr_i128_ptr, VectorType::get(m_ir_builder->getFloatTy(), 4)->getPointerTo()); + return m_ir_builder->CreateAlignedLoad(vr_v4f32_ptr, 16); } Value * Compiler::GetVrAsDoubleVec(u32 vr) { - auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VPR[vr])); - auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); - auto vr_v2f64_ptr = m_ir_builder->CreateBitCast(vr_i128_ptr, VectorType::get(m_ir_builder->getDoubleTy(), 2)->getPointerTo()); - return m_ir_builder->CreateAlignedLoad(vr_v2f64_ptr, 16); + auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VPR[vr])); + auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); + auto vr_v2f64_ptr = m_ir_builder->CreateBitCast(vr_i128_ptr, VectorType::get(m_ir_builder->getDoubleTy(), 2)->getPointerTo()); + return m_ir_builder->CreateAlignedLoad(vr_v2f64_ptr, 16); } void Compiler::SetVr(u32 vr, Value * val_x128) { - auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VPR[vr])); - auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); - auto val_i128 = m_ir_builder->CreateBitCast(val_x128, m_ir_builder->getIntNTy(128)); - m_ir_builder->CreateAlignedStore(val_i128, vr_i128_ptr, 16); + auto vr_i8_ptr = m_ir_builder->CreateConstGEP1_32(m_state.args[CompileTaskState::Args::State], (unsigned int)offsetof(PPUThread, VPR[vr])); + auto vr_i128_ptr = m_ir_builder->CreateBitCast(vr_i8_ptr, m_ir_builder->getIntNTy(128)->getPointerTo()); + auto val_i128 = m_ir_builder->CreateBitCast(val_x128, m_ir_builder->getIntNTy(128)); + m_ir_builder->CreateAlignedStore(val_i128, vr_i128_ptr, 16); } Value * Compiler::CheckBranchCondition(u32 bo, u32 bi) { - bool bo0 = bo & 0x10 ? true : false; - bool bo1 = bo & 0x08 ? true : false; - bool bo2 = bo & 0x04 ? true : false; - bool bo3 = bo & 0x02 ? true : false; + bool bo0 = bo & 0x10 ? true : false; + bool bo1 = bo & 0x08 ? true : false; + bool bo2 = bo & 0x04 ? true : false; + bool bo3 = bo & 0x02 ? true : false; - auto ctr_i64 = GetCtr(); - if (!bo2) { - ctr_i64 = m_ir_builder->CreateSub(ctr_i64, m_ir_builder->getInt64(1)); - SetCtr(ctr_i64); - } + auto ctr_i64 = GetCtr(); + if (!bo2) { + ctr_i64 = m_ir_builder->CreateSub(ctr_i64, m_ir_builder->getInt64(1)); + SetCtr(ctr_i64); + } - Value * ctr_ok_i1 = nullptr; - if (!bo2) { - // TODO: Check if we should compare all bits or just the lower 32 bits. This depends on MSR[SF]. Not sure what it is for PS3. - ctr_ok_i1 = m_ir_builder->CreateICmpNE(ctr_i64, m_ir_builder->getInt64(0)); - if (bo3) { - ctr_ok_i1 = m_ir_builder->CreateXor(ctr_ok_i1, m_ir_builder->getInt1(bo3)); - } - } + Value * ctr_ok_i1 = nullptr; + if (!bo2) { + // TODO: Check if we should compare all bits or just the lower 32 bits. This depends on MSR[SF]. Not sure what it is for PS3. + ctr_ok_i1 = m_ir_builder->CreateICmpNE(ctr_i64, m_ir_builder->getInt64(0)); + if (bo3) { + ctr_ok_i1 = m_ir_builder->CreateXor(ctr_ok_i1, m_ir_builder->getInt1(bo3)); + } + } - Value * cond_ok_i1 = nullptr; - if (!bo0) { - auto cr_bi_i32 = GetBit(GetCr(), bi); - cond_ok_i1 = m_ir_builder->CreateTrunc(cr_bi_i32, m_ir_builder->getInt1Ty()); - if (!bo1) { - cond_ok_i1 = m_ir_builder->CreateXor(cond_ok_i1, m_ir_builder->getInt1(!bo1)); - } - } + Value * cond_ok_i1 = nullptr; + if (!bo0) { + auto cr_bi_i32 = GetBit(GetCr(), bi); + cond_ok_i1 = m_ir_builder->CreateTrunc(cr_bi_i32, m_ir_builder->getInt1Ty()); + if (!bo1) { + cond_ok_i1 = m_ir_builder->CreateXor(cond_ok_i1, m_ir_builder->getInt1(!bo1)); + } + } - Value * cmp_i1 = nullptr; - if (ctr_ok_i1 && cond_ok_i1) { - cmp_i1 = m_ir_builder->CreateAnd(ctr_ok_i1, cond_ok_i1); - } - else if (ctr_ok_i1) { - cmp_i1 = ctr_ok_i1; - } - else if (cond_ok_i1) { - cmp_i1 = cond_ok_i1; - } + Value * cmp_i1 = nullptr; + if (ctr_ok_i1 && cond_ok_i1) { + cmp_i1 = m_ir_builder->CreateAnd(ctr_ok_i1, cond_ok_i1); + } + else if (ctr_ok_i1) { + cmp_i1 = ctr_ok_i1; + } + else if (cond_ok_i1) { + cmp_i1 = cond_ok_i1; + } - return cmp_i1; + return cmp_i1; } void Compiler::CreateBranch(llvm::Value * cmp_i1, llvm::Value * target_i32, bool lk, bool target_is_lr) { - if (lk) - SetLr(m_ir_builder->getInt64(m_state.current_instruction_address + 4)); + if (lk) + SetLr(m_ir_builder->getInt64(m_state.current_instruction_address + 4)); - BasicBlock *current_block = m_ir_builder->GetInsertBlock(); + BasicBlock *current_block = m_ir_builder->GetInsertBlock(); - BasicBlock * target_block = nullptr; - if (dyn_cast(target_i32)) { - // Target address is an immediate value. - u32 target_address = (u32)(dyn_cast(target_i32)->getLimitedValue()); - if (lk) { - // Function call - if (cmp_i1) { // There is no need to create a new block for an unconditional jump - target_block = GetBasicBlockFromAddress(m_state.current_instruction_address, "target"); - m_ir_builder->SetInsertPoint(target_block); - } + BasicBlock * target_block = nullptr; + if (dyn_cast(target_i32)) { + // Target address is an immediate value. + u32 target_address = (u32)(dyn_cast(target_i32)->getLimitedValue()); + if (lk) { + // Function call + if (cmp_i1) { // There is no need to create a new block for an unconditional jump + target_block = GetBasicBlockFromAddress(m_state.current_instruction_address, "target"); + m_ir_builder->SetInsertPoint(target_block); + } - SetPc(target_i32); - IndirectCall(target_address, m_ir_builder->getInt64(0), true); - m_ir_builder->CreateBr(GetBasicBlockFromAddress(m_state.current_instruction_address + 4)); - } - else { - // Local branch - target_block = GetBasicBlockFromAddress(target_address); - } - } - else { - // Target address is in a register - if (cmp_i1) { // There is no need to create a new block for an unconditional jump - target_block = GetBasicBlockFromAddress(m_state.current_instruction_address, "target"); - m_ir_builder->SetInsertPoint(target_block); - } + SetPc(target_i32); + IndirectCall(target_address, m_ir_builder->getInt64(0), true); + m_ir_builder->CreateBr(GetBasicBlockFromAddress(m_state.current_instruction_address + 4)); + } + else { + // Local branch + target_block = GetBasicBlockFromAddress(target_address); + } + } + else { + // Target address is in a register + if (cmp_i1) { // There is no need to create a new block for an unconditional jump + target_block = GetBasicBlockFromAddress(m_state.current_instruction_address, "target"); + m_ir_builder->SetInsertPoint(target_block); + } - SetPc(target_i32); - if (target_is_lr && !lk) { - // Return from this function - m_ir_builder->CreateRet(m_ir_builder->getInt32(0)); - } - else if (lk) { - BasicBlock *next_block = GetBasicBlockFromAddress(m_state.current_instruction_address + 4); - BasicBlock *unknown_function_block = GetBasicBlockFromAddress(m_state.current_instruction_address, "unknown_function"); + SetPc(target_i32); + if (target_is_lr && !lk) { + // Return from this function + m_ir_builder->CreateRet(m_ir_builder->getInt32(0)); + } + else if (lk) { + BasicBlock *next_block = GetBasicBlockFromAddress(m_state.current_instruction_address + 4); + BasicBlock *unknown_function_block = GetBasicBlockFromAddress(m_state.current_instruction_address, "unknown_function"); - auto switch_instr = m_ir_builder->CreateSwitch(target_i32, unknown_function_block); - m_ir_builder->SetInsertPoint(unknown_function_block); - m_ir_builder->CreateCall2(m_execute_unknown_function, m_state.args[CompileTaskState::Args::State], m_ir_builder->getInt64(0)); - m_ir_builder->CreateBr(next_block); + auto switch_instr = m_ir_builder->CreateSwitch(target_i32, unknown_function_block); + m_ir_builder->SetInsertPoint(unknown_function_block); + m_ir_builder->CreateCall2(m_execute_unknown_function, m_state.args[CompileTaskState::Args::State], m_ir_builder->getInt64(0)); + m_ir_builder->CreateBr(next_block); - auto call_i = m_state.cfg->calls.find(m_state.current_instruction_address); - if (call_i != m_state.cfg->calls.end()) { - for (auto function_i = call_i->second.begin(); function_i != call_i->second.end(); function_i++) { - auto block = GetBasicBlockFromAddress(m_state.current_instruction_address, fmt::Format("0x%08X", *function_i)); - m_ir_builder->SetInsertPoint(block); - IndirectCall(*function_i, m_ir_builder->getInt64(0), true); - m_ir_builder->CreateBr(next_block); - switch_instr->addCase(m_ir_builder->getInt32(*function_i), block); - } - } - } - else { - auto switch_instr = m_ir_builder->CreateSwitch(target_i32, GetBasicBlockFromAddress(0xFFFFFFFF)); - auto branch_i = m_state.cfg->branches.find(m_state.current_instruction_address); - if (branch_i != m_state.cfg->branches.end()) { - for (auto next_instr_i = branch_i->second.begin(); next_instr_i != branch_i->second.end(); next_instr_i++) { - switch_instr->addCase(m_ir_builder->getInt32(*next_instr_i), GetBasicBlockFromAddress(*next_instr_i)); - } - } - } - } + auto call_i = m_state.cfg->calls.find(m_state.current_instruction_address); + if (call_i != m_state.cfg->calls.end()) { + for (auto function_i = call_i->second.begin(); function_i != call_i->second.end(); function_i++) { + auto block = GetBasicBlockFromAddress(m_state.current_instruction_address, fmt::Format("0x%08X", *function_i)); + m_ir_builder->SetInsertPoint(block); + IndirectCall(*function_i, m_ir_builder->getInt64(0), true); + m_ir_builder->CreateBr(next_block); + switch_instr->addCase(m_ir_builder->getInt32(*function_i), block); + } + } + } + else { + auto switch_instr = m_ir_builder->CreateSwitch(target_i32, GetBasicBlockFromAddress(0xFFFFFFFF)); + auto branch_i = m_state.cfg->branches.find(m_state.current_instruction_address); + if (branch_i != m_state.cfg->branches.end()) { + for (auto next_instr_i = branch_i->second.begin(); next_instr_i != branch_i->second.end(); next_instr_i++) { + switch_instr->addCase(m_ir_builder->getInt32(*next_instr_i), GetBasicBlockFromAddress(*next_instr_i)); + } + } + } + } - if (cmp_i1) { - // Conditional branch - auto next_block = GetBasicBlockFromAddress(m_state.current_instruction_address + 4); - m_ir_builder->SetInsertPoint(current_block); - m_ir_builder->CreateCondBr(cmp_i1, target_block, next_block); - } - else { - // Unconditional branch - if (target_block) { - m_ir_builder->SetInsertPoint(current_block); - m_ir_builder->CreateBr(target_block); - } - } + if (cmp_i1) { + // Conditional branch + auto next_block = GetBasicBlockFromAddress(m_state.current_instruction_address + 4); + m_ir_builder->SetInsertPoint(current_block); + m_ir_builder->CreateCondBr(cmp_i1, target_block, next_block); + } + else { + // Unconditional branch + if (target_block) { + m_ir_builder->SetInsertPoint(current_block); + m_ir_builder->CreateBr(target_block); + } + } - m_state.hit_branch_instruction = true; + m_state.hit_branch_instruction = true; } Value * Compiler::ReadMemory(Value * addr_i64, u32 bits, u32 alignment, bool bswap, bool could_be_mmio) { - addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFF); - auto eaddr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr(0))); - auto eaddr_ix_ptr = m_ir_builder->CreateIntToPtr(eaddr_i64, m_ir_builder->getIntNTy(bits)->getPointerTo()); - auto val_ix = (Value *)m_ir_builder->CreateLoad(eaddr_ix_ptr, alignment); - if (bits > 8 && bswap) { - val_ix = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, m_ir_builder->getIntNTy(bits)), val_ix); - } + addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFF); + auto eaddr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr(0))); + auto eaddr_ix_ptr = m_ir_builder->CreateIntToPtr(eaddr_i64, m_ir_builder->getIntNTy(bits)->getPointerTo()); + auto val_ix = (Value *)m_ir_builder->CreateLoad(eaddr_ix_ptr, alignment); + if (bits > 8 && bswap) { + val_ix = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, m_ir_builder->getIntNTy(bits)), val_ix); + } - return val_ix; + return val_ix; } void Compiler::WriteMemory(Value * addr_i64, Value * val_ix, u32 alignment, bool bswap, bool could_be_mmio) { - if (val_ix->getType()->getIntegerBitWidth() > 8 && bswap) { - val_ix = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, val_ix->getType()), val_ix); - } + if (val_ix->getType()->getIntegerBitWidth() > 8 && bswap) { + val_ix = m_ir_builder->CreateCall(Intrinsic::getDeclaration(m_module, Intrinsic::bswap, val_ix->getType()), val_ix); + } - addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFF); - auto eaddr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr(0))); - auto eaddr_ix_ptr = m_ir_builder->CreateIntToPtr(eaddr_i64, val_ix->getType()->getPointerTo()); - m_ir_builder->CreateAlignedStore(val_ix, eaddr_ix_ptr, alignment); + addr_i64 = m_ir_builder->CreateAnd(addr_i64, 0xFFFFFFFF); + auto eaddr_i64 = m_ir_builder->CreateAdd(addr_i64, m_ir_builder->getInt64((u64)vm::get_ptr(0))); + auto eaddr_ix_ptr = m_ir_builder->CreateIntToPtr(eaddr_i64, val_ix->getType()->getPointerTo()); + m_ir_builder->CreateAlignedStore(val_ix, eaddr_ix_ptr, alignment); } llvm::Value * Compiler::IndirectCall(u32 address, Value * context_i64, bool is_function) { - const Executable *functionPtr = m_recompilation_engine.GetExecutable(address, is_function); - auto location_i64 = m_ir_builder->getInt64((uint64_t)functionPtr); - auto location_i64_ptr = m_ir_builder->CreateIntToPtr(location_i64, m_ir_builder->getInt64Ty()->getPointerTo()); - auto executable_i64 = m_ir_builder->CreateLoad(location_i64_ptr); - auto executable_ptr = m_ir_builder->CreateIntToPtr(executable_i64, m_compiled_function_type->getPointerTo()); - auto ret_i32 = m_ir_builder->CreateCall2(executable_ptr, m_state.args[CompileTaskState::Args::State], context_i64); + const Executable *functionPtr = m_recompilation_engine.GetExecutable(address, is_function); + auto location_i64 = m_ir_builder->getInt64((uint64_t)functionPtr); + auto location_i64_ptr = m_ir_builder->CreateIntToPtr(location_i64, m_ir_builder->getInt64Ty()->getPointerTo()); + auto executable_i64 = m_ir_builder->CreateLoad(location_i64_ptr); + auto executable_ptr = m_ir_builder->CreateIntToPtr(executable_i64, m_compiled_function_type->getPointerTo()); + auto ret_i32 = m_ir_builder->CreateCall2(executable_ptr, m_state.args[CompileTaskState::Args::State], context_i64); - auto cmp_i1 = m_ir_builder->CreateICmpEQ(ret_i32, m_ir_builder->getInt32(0xFFFFFFFF)); - auto then_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "then_all_fs"); - auto merge_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "merge_all_fs"); - m_ir_builder->CreateCondBr(cmp_i1, then_bb, merge_bb); + auto cmp_i1 = m_ir_builder->CreateICmpEQ(ret_i32, m_ir_builder->getInt32(0xFFFFFFFF)); + auto then_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "then_all_fs"); + auto merge_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "merge_all_fs"); + m_ir_builder->CreateCondBr(cmp_i1, then_bb, merge_bb); - m_ir_builder->SetInsertPoint(then_bb); - m_ir_builder->CreateRet(m_ir_builder->getInt32(0)); - m_ir_builder->SetInsertPoint(merge_bb); - return ret_i32; + m_ir_builder->SetInsertPoint(then_bb); + m_ir_builder->CreateRet(m_ir_builder->getInt32(0)); + m_ir_builder->SetInsertPoint(merge_bb); + return ret_i32; } void Compiler::CompilationError(const std::string & error) { - LOG_ERROR(PPU, "[0x%08X] %s", m_state.current_instruction_address, error.c_str()); - Emu.Pause(); + LOG_ERROR(PPU, "[0x%08X] %s", m_state.current_instruction_address, error.c_str()); + Emu.Pause(); } void Compiler::InitRotateMask() { - for (u32 mb = 0; mb < 64; mb++) { - for (u32 me = 0; me < 64; me++) { - u64 mask = ((u64)-1 >> mb) ^ ((me >= 63) ? 0 : (u64)-1 >> (me + 1)); - s_rotate_mask[mb][me] = mb > me ? ~mask : mask; - } - } + for (u32 mb = 0; mb < 64; mb++) { + for (u32 me = 0; me < 64; me++) { + u64 mask = ((u64)-1 >> mb) ^ ((me >= 63) ? 0 : (u64)-1 >> (me + 1)); + s_rotate_mask[mb][me] = mb > me ? ~mask : mask; + } + } } -#endif +#endif \ No newline at end of file diff --git a/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp b/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp index 9e5e05e4ce..007dbd07ca 100644 --- a/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp +++ b/rpcs3/Emu/Cell/PPULLVMRecompilerTests.cpp @@ -12,7 +12,7 @@ #include -#define PPU_LLVM_RECOMPILER_UNIT_TESTS 1 // Uncomment to enable tests +//#define PPU_LLVM_RECOMPILER_UNIT_TESTS 1 // Uncomment to enable tests //#define PPU_LLVM_RECOMPILER_UNIT_TESTS_VERBOSE 1 // Uncomment to print everything (even for passed tests) using namespace llvm; @@ -31,351 +31,349 @@ VerifyInstructionAgainstInterpreter(fmt::Format("%s.%d", #fn, tc).c_str(), &Comp /// Register state of a PPU struct ppu_recompiler_llvm::PPUState { - /// Floating point registers - PPCdouble FPR[32]; + /// Floating point registers + PPCdouble FPR[32]; - ///Floating point status and control register - FPSCRhdr FPSCR; + ///Floating point status and control register + FPSCRhdr FPSCR; - /// General purpose reggisters - u64 GPR[32]; + /// General purpose reggisters + u64 GPR[32]; - /// Vector purpose registers - u128 VPR[32]; + /// Vector purpose registers + u128 VPR[32]; - /// Condition register - CRhdr CR; + /// Condition register + CRhdr CR; - /// Fixed point exception register - XERhdr XER; + /// Fixed point exception register + XERhdr XER; - /// Vector status and control register - VSCRhdr VSCR; + /// Vector status and control register + VSCRhdr VSCR; - /// Link register - u64 LR; + /// Link register + u64 LR; - /// Count register - u64 CTR; + /// Count register + u64 CTR; - /// SPR general purpose registers - u64 SPRG[8]; + /// SPR general purpose registers + u64 SPRG[8]; - /// Time base register - u64 TB; + /// Time base register + u64 TB; - /// Memory block - u32 address; - u64 mem_block[64]; + /// Memory block + u32 address; + u64 mem_block[64]; - void Load(PPUThread & ppu, u32 addr) { - for (int i = 0; i < 32; i++) { - FPR[i] = ppu.FPR[i]; - GPR[i] = ppu.GPR[i]; - VPR[i] = ppu.VPR[i]; + void Load(PPUThread & ppu, u32 addr) { + for (int i = 0; i < 32; i++) { + FPR[i] = ppu.FPR[i]; + GPR[i] = ppu.GPR[i]; + VPR[i] = ppu.VPR[i]; - if (i < 8) { - SPRG[i] = ppu.SPRG[i]; - } - } + if (i < 8) { + SPRG[i] = ppu.SPRG[i]; + } + } - FPSCR = ppu.FPSCR; - CR = ppu.CR; - XER = ppu.XER; - VSCR = ppu.VSCR; - LR = ppu.LR; - CTR = ppu.CTR; - TB = ppu.TB; + FPSCR = ppu.FPSCR; + CR = ppu.CR; + XER = ppu.XER; + VSCR = ppu.VSCR; + LR = ppu.LR; + CTR = ppu.CTR; + TB = ppu.TB; - address = addr; - for (int i = 0; i < (sizeof(mem_block) / 8); i++) { - mem_block[i] = vm::read64(address + (i * 8)); - } - } + address = addr; + for (int i = 0; i < (sizeof(mem_block) / 8); i++) { + mem_block[i] = vm::read64(address + (i * 8)); + } + } - void Store(PPUThread & ppu) { - for (int i = 0; i < 32; i++) { - ppu.FPR[i] = FPR[i]; - ppu.GPR[i] = GPR[i]; - ppu.VPR[i] = VPR[i]; + void Store(PPUThread & ppu) { + for (int i = 0; i < 32; i++) { + ppu.FPR[i] = FPR[i]; + ppu.GPR[i] = GPR[i]; + ppu.VPR[i] = VPR[i]; - if (i < 8) { - ppu.SPRG[i] = SPRG[i]; - } - } + if (i < 8) { + ppu.SPRG[i] = SPRG[i]; + } + } - ppu.FPSCR = FPSCR; - ppu.CR = CR; - ppu.XER = XER; - ppu.VSCR = VSCR; - ppu.LR = LR; - ppu.CTR = CTR; - ppu.TB = TB; + ppu.FPSCR = FPSCR; + ppu.CR = CR; + ppu.XER = XER; + ppu.VSCR = VSCR; + ppu.LR = LR; + ppu.CTR = CTR; + ppu.TB = TB; - for (int i = 0; i < (sizeof(mem_block) / 8); i++) { - vm::write64(address + (i * 8), mem_block[i]); - } - } + for (int i = 0; i < (sizeof(mem_block) / 8); i++) { + vm::write64(address + (i * 8), mem_block[i]); + } + } - void SetRandom(u32 addr) { - std::mt19937_64 rng; + void SetRandom(u32 addr) { + std::mt19937_64 rng; - rng.seed((u32)std::chrono::high_resolution_clock::now().time_since_epoch().count()); - for (int i = 0; i < 32; i++) { - FPR[i] = (double)rng(); - GPR[i] = rng(); - VPR[i]._f[0] = (float)rng(); - VPR[i]._f[1] = (float)(rng() & 0x7FFFFFFF); - VPR[i]._f[2] = -(float)(rng() & 0x7FFFFFFF); - VPR[i]._f[3] = -(float)rng(); + rng.seed((u32)std::chrono::high_resolution_clock::now().time_since_epoch().count()); + for (int i = 0; i < 32; i++) { + FPR[i] = (double)rng(); + GPR[i] = rng(); + VPR[i]._f[0] = (float)rng(); + VPR[i]._f[1] = (float)(rng() & 0x7FFFFFFF); + VPR[i]._f[2] = -(float)(rng() & 0x7FFFFFFF); + VPR[i]._f[3] = -(float)rng(); - if (i < 8) { - SPRG[i] = rng(); - } - } + if (i < 8) { + SPRG[i] = rng(); + } + } - FPSCR.FPSCR = (u32)rng(); - CR.CR = (u32)rng(); - XER.XER = 0; - XER.CA = (u32)rng(); - XER.SO = (u32)rng(); - XER.OV = (u32)rng(); - VSCR.VSCR = (u32)rng(); - VSCR.X = 0; - VSCR.Y = 0; - LR = rng(); - CTR = rng(); - TB = rng(); + FPSCR.FPSCR = (u32)rng(); + CR.CR = (u32)rng(); + XER.XER = 0; + XER.CA = (u32)rng(); + XER.SO = (u32)rng(); + XER.OV = (u32)rng(); + VSCR.VSCR = (u32)rng(); + VSCR.X = 0; + VSCR.Y = 0; + LR = rng(); + CTR = rng(); + TB = rng(); - address = addr; - for (int i = 0; i < (sizeof(mem_block) / 8); i++) { - mem_block[i] = rng(); - } - } + address = addr; + for (int i = 0; i < (sizeof(mem_block) / 8); i++) { + mem_block[i] = rng(); + } + } - std::string ToString() const { - std::string ret; + std::string ToString() const { + std::string ret; - for (int i = 0; i < 32; i++) { - ret += fmt::Format("GPR[%02d] = 0x%016llx FPR[%02d] = %16g (0x%016llx) VPR[%02d] = 0x%s [%s]\n", i, GPR[i], i, FPR[i]._double, FPR[i]._u64, i, VPR[i].to_hex().c_str(), VPR[i].to_xyzw().c_str()); - } + for (int i = 0; i < 32; i++) { + ret += fmt::Format("GPR[%02d] = 0x%016llx FPR[%02d] = %16g (0x%016llx) VPR[%02d] = 0x%s [%s]\n", i, GPR[i], i, FPR[i]._double, FPR[i]._u64, i, VPR[i].to_hex().c_str(), VPR[i].to_xyzw().c_str()); + } - for (int i = 0; i < 8; i++) { - ret += fmt::Format("SPRG[%d] = 0x%016llx\n", i, SPRG[i]); - } + for (int i = 0; i < 8; i++) { + ret += fmt::Format("SPRG[%d] = 0x%016llx\n", i, SPRG[i]); + } - ret += fmt::Format("CR = 0x%08x LR = 0x%016llx CTR = 0x%016llx TB=0x%016llx\n", CR.CR, LR, CTR, TB); - ret += fmt::Format("XER = 0x%016llx [CA=%d | OV=%d | SO=%d]\n", XER.XER, fmt::by_value(XER.CA), fmt::by_value(XER.OV), fmt::by_value(XER.SO)); - //ret += fmt::Format("FPSCR = 0x%08x " // TODO: Uncomment after implementing FPSCR - // "[RN=%d | NI=%d | XE=%d | ZE=%d | UE=%d | OE=%d | VE=%d | " - // "VXCVI=%d | VXSQRT=%d | VXSOFT=%d | FPRF=%d | " - // "FI=%d | FR=%d | VXVC=%d | VXIMZ=%d | " - // "VXZDZ=%d | VXIDI=%d | VXISI=%d | VXSNAN=%d | " - // "XX=%d | ZX=%d | UX=%d | OX=%d | VX=%d | FEX=%d | FX=%d]\n", - // FPSCR.FPSCR, - // fmt::by_value(FPSCR.RN), - // fmt::by_value(FPSCR.NI), fmt::by_value(FPSCR.XE), fmt::by_value(FPSCR.ZE), fmt::by_value(FPSCR.UE), fmt::by_value(FPSCR.OE), fmt::by_value(FPSCR.VE), - // fmt::by_value(FPSCR.VXCVI), fmt::by_value(FPSCR.VXSQRT), fmt::by_value(FPSCR.VXSOFT), fmt::by_value(FPSCR.FPRF), - // fmt::by_value(FPSCR.FI), fmt::by_value(FPSCR.FR), fmt::by_value(FPSCR.VXVC), fmt::by_value(FPSCR.VXIMZ), - // fmt::by_value(FPSCR.VXZDZ), fmt::by_value(FPSCR.VXIDI), fmt::by_value(FPSCR.VXISI), fmt::by_value(FPSCR.VXSNAN), - // fmt::by_value(FPSCR.XX), fmt::by_value(FPSCR.ZX), fmt::by_value(FPSCR.UX), fmt::by_value(FPSCR.OX), fmt::by_value(FPSCR.VX), fmt::by_value(FPSCR.FEX), fmt::by_value(FPSCR.FX)); - //ret += fmt::Format("VSCR = 0x%08x [NJ=%d | SAT=%d]\n", VSCR.VSCR, fmt::by_value(VSCR.NJ), fmt::by_value(VSCR.SAT)); // TODO: Uncomment after implementing VSCR.SAT + ret += fmt::Format("CR = 0x%08x LR = 0x%016llx CTR = 0x%016llx TB=0x%016llx\n", CR.CR, LR, CTR, TB); + ret += fmt::Format("XER = 0x%016llx [CA=%d | OV=%d | SO=%d]\n", XER.XER, fmt::by_value(XER.CA), fmt::by_value(XER.OV), fmt::by_value(XER.SO)); + //ret += fmt::Format("FPSCR = 0x%08x " // TODO: Uncomment after implementing FPSCR + // "[RN=%d | NI=%d | XE=%d | ZE=%d | UE=%d | OE=%d | VE=%d | " + // "VXCVI=%d | VXSQRT=%d | VXSOFT=%d | FPRF=%d | " + // "FI=%d | FR=%d | VXVC=%d | VXIMZ=%d | " + // "VXZDZ=%d | VXIDI=%d | VXISI=%d | VXSNAN=%d | " + // "XX=%d | ZX=%d | UX=%d | OX=%d | VX=%d | FEX=%d | FX=%d]\n", + // FPSCR.FPSCR, + // fmt::by_value(FPSCR.RN), + // fmt::by_value(FPSCR.NI), fmt::by_value(FPSCR.XE), fmt::by_value(FPSCR.ZE), fmt::by_value(FPSCR.UE), fmt::by_value(FPSCR.OE), fmt::by_value(FPSCR.VE), + // fmt::by_value(FPSCR.VXCVI), fmt::by_value(FPSCR.VXSQRT), fmt::by_value(FPSCR.VXSOFT), fmt::by_value(FPSCR.FPRF), + // fmt::by_value(FPSCR.FI), fmt::by_value(FPSCR.FR), fmt::by_value(FPSCR.VXVC), fmt::by_value(FPSCR.VXIMZ), + // fmt::by_value(FPSCR.VXZDZ), fmt::by_value(FPSCR.VXIDI), fmt::by_value(FPSCR.VXISI), fmt::by_value(FPSCR.VXSNAN), + // fmt::by_value(FPSCR.XX), fmt::by_value(FPSCR.ZX), fmt::by_value(FPSCR.UX), fmt::by_value(FPSCR.OX), fmt::by_value(FPSCR.VX), fmt::by_value(FPSCR.FEX), fmt::by_value(FPSCR.FX)); + //ret += fmt::Format("VSCR = 0x%08x [NJ=%d | SAT=%d]\n", VSCR.VSCR, fmt::by_value(VSCR.NJ), fmt::by_value(VSCR.SAT)); // TODO: Uncomment after implementing VSCR.SAT - for (int i = 0; i < (sizeof(mem_block) / 8); i += 2) { - ret += fmt::Format("mem_block[%d] = 0x%016llx mem_block[%d] = 0x%016llx\n", i, mem_block[i], i + 1, mem_block[i + 1]); - } + for (int i = 0; i < (sizeof(mem_block) / 8); i += 2) { + ret += fmt::Format("mem_block[%d] = 0x%016llx mem_block[%d] = 0x%016llx\n", i, mem_block[i], i + 1, mem_block[i + 1]); + } - return ret; - } + return ret; + } }; #ifdef PPU_LLVM_RECOMPILER_UNIT_TESTS static std::string StateDiff(PPUState const & recomp, PPUState const & interp) { - std::string ret; + std::string ret; - for (int i = 0; i < 32; i++) { - if (recomp.GPR[i] != interp.GPR[i]) { - ret += fmt::Format("recomp: GPR[%02d] = 0x%016llx interp: GPR[%02d] = 0x%016llx\n", i, recomp.GPR[i], i, interp.GPR[i]); - } - if (recomp.FPR[i]._u64 != interp.FPR[i]._u64) { - ret += fmt::Format("recomp: FPR[%02d] = %16g (0x%016llx) interp: FPR[%02d] = %16g (0x%016llx)\n", i, recomp.FPR[i]._double, recomp.FPR[i]._u64, i, interp.FPR[i]._double, interp.FPR[i]._u64); - } - if (recomp.VPR[i] != interp.VPR[i]) { - ret += fmt::Format("recomp: VPR[%02d] = 0x%s [%s]\n", i, recomp.VPR[i].to_hex().c_str(), recomp.VPR[i].to_xyzw().c_str()); - ret += fmt::Format("interp: VPR[%02d] = 0x%s [%s]\n", i, interp.VPR[i].to_hex().c_str(), interp.VPR[i].to_xyzw().c_str()); - } - } + for (int i = 0; i < 32; i++) { + if (recomp.GPR[i] != interp.GPR[i]) { + ret += fmt::Format("recomp: GPR[%02d] = 0x%016llx interp: GPR[%02d] = 0x%016llx\n", i, recomp.GPR[i], i, interp.GPR[i]); + } + if (recomp.FPR[i]._u64 != interp.FPR[i]._u64) { + ret += fmt::Format("recomp: FPR[%02d] = %16g (0x%016llx) interp: FPR[%02d] = %16g (0x%016llx)\n", i, recomp.FPR[i]._double, recomp.FPR[i]._u64, i, interp.FPR[i]._double, interp.FPR[i]._u64); + } + if (recomp.VPR[i] != interp.VPR[i]) { + ret += fmt::Format("recomp: VPR[%02d] = 0x%s [%s]\n", i, recomp.VPR[i].to_hex().c_str(), recomp.VPR[i].to_xyzw().c_str()); + ret += fmt::Format("interp: VPR[%02d] = 0x%s [%s]\n", i, interp.VPR[i].to_hex().c_str(), interp.VPR[i].to_xyzw().c_str()); + } + } - for (int i = 0; i < 8; i++) { - if (recomp.SPRG[i] != interp.SPRG[i]) - ret += fmt::Format("recomp: SPRG[%d] = 0x%016llx interp: SPRG[%d] = 0x%016llx\n", i, recomp.SPRG[i], i, interp.SPRG[i]); - } + for (int i = 0; i < 8; i++) { + if (recomp.SPRG[i] != interp.SPRG[i]) + ret += fmt::Format("recomp: SPRG[%d] = 0x%016llx interp: SPRG[%d] = 0x%016llx\n", i, recomp.SPRG[i], i, interp.SPRG[i]); + } - if (recomp.CR.CR != interp.CR.CR) { - ret += fmt::Format("recomp: CR = 0x%08x\n", recomp.CR.CR); - ret += fmt::Format("interp: CR = 0x%08x\n", interp.CR.CR); - } - if (recomp.LR != interp.LR) { - ret += fmt::Format("recomp: LR = 0x%016llx\n", recomp.LR); - ret += fmt::Format("interp: LR = 0x%016llx\n", interp.LR); - } - if (recomp.CTR != interp.CTR) { - ret += fmt::Format("recomp: CTR = 0x%016llx\n", recomp.CTR); - ret += fmt::Format("interp: CTR = 0x%016llx\n", interp.CTR); - } - if (recomp.TB != interp.TB) { - ret += fmt::Format("recomp: TB = 0x%016llx\n", recomp.TB); - ret += fmt::Format("interp: TB = 0x%016llx\n", interp.TB); - } + if (recomp.CR.CR != interp.CR.CR) { + ret += fmt::Format("recomp: CR = 0x%08x\n", recomp.CR.CR); + ret += fmt::Format("interp: CR = 0x%08x\n", interp.CR.CR); + } + if (recomp.LR != interp.LR) { + ret += fmt::Format("recomp: LR = 0x%016llx\n", recomp.LR); + ret += fmt::Format("interp: LR = 0x%016llx\n", interp.LR); + } + if (recomp.CTR != interp.CTR) { + ret += fmt::Format("recomp: CTR = 0x%016llx\n", recomp.CTR); + ret += fmt::Format("interp: CTR = 0x%016llx\n", interp.CTR); + } + if (recomp.TB != interp.TB) { + ret += fmt::Format("recomp: TB = 0x%016llx\n", recomp.TB); + ret += fmt::Format("interp: TB = 0x%016llx\n", interp.TB); + } - if (recomp.XER.XER != interp.XER.XER) { - ret += fmt::Format("recomp: XER = 0x%016llx [CA=%d | OV=%d | SO=%d]\n", recomp.XER.XER, fmt::by_value(recomp.XER.CA), fmt::by_value(recomp.XER.OV), fmt::by_value(recomp.XER.SO)); - ret += fmt::Format("interp: XER = 0x%016llx [CA=%d | OV=%d | SO=%d]\n", interp.XER.XER, fmt::by_value(interp.XER.CA), fmt::by_value(interp.XER.OV), fmt::by_value(interp.XER.SO)); - } + if (recomp.XER.XER != interp.XER.XER) { + ret += fmt::Format("recomp: XER = 0x%016llx [CA=%d | OV=%d | SO=%d]\n", recomp.XER.XER, fmt::by_value(recomp.XER.CA), fmt::by_value(recomp.XER.OV), fmt::by_value(recomp.XER.SO)); + ret += fmt::Format("interp: XER = 0x%016llx [CA=%d | OV=%d | SO=%d]\n", interp.XER.XER, fmt::by_value(interp.XER.CA), fmt::by_value(interp.XER.OV), fmt::by_value(interp.XER.SO)); + } - for (int i = 0; i < (sizeof(recomp.mem_block) / 8); i++) { - if (recomp.mem_block[i] != interp.mem_block[i]) { - ret += fmt::Format("recomp: mem_block[%d] = 0x%016llx\n", i, recomp.mem_block[i]); - ret += fmt::Format("interp: mem_block[%d] = 0x%016llx\n", i, interp.mem_block[i]); - } - } + for (int i = 0; i < (sizeof(recomp.mem_block) / 8); i++) { + if (recomp.mem_block[i] != interp.mem_block[i]) { + ret += fmt::Format("recomp: mem_block[%d] = 0x%016llx\n", i, recomp.mem_block[i]); + ret += fmt::Format("interp: mem_block[%d] = 0x%016llx\n", i, interp.mem_block[i]); + } + } - return ret; - } + return ret; +} #endif // PPU_LLVM_RECOMPILER_UNIT_TESTS - #ifdef PPU_LLVM_RECOMPILER_UNIT_TESTS -static PPUThread * s_ppu_state = nullptr; +static PPUThread * s_ppu_state = nullptr; static PPUInterpreter * s_interpreter = nullptr; #endif // PPU_LLVM_RECOMPILER_UNIT_TESTS template void Compiler::VerifyInstructionAgainstInterpreter(const char * name, void (Compiler::*recomp_fn)(Args...), void (PPUInterpreter::*interp_fn)(Args...), PPUState & input_state, Args... args) { #ifdef PPU_LLVM_RECOMPILER_UNIT_TESTS - auto test_case = [&]() { - (this->*recomp_fn)(args...); - }; - auto input = [&]() { - input_state.Store(*s_ppu_state); - }; - auto check_result = [&](std::string & msg) { - PPUState recomp_output_state; - PPUState interp_output_state; + auto test_case = [&]() { + (this->*recomp_fn)(args...); + }; + auto input = [&]() { + input_state.Store(*s_ppu_state); + }; + auto check_result = [&](std::string & msg) { + PPUState recomp_output_state; + PPUState interp_output_state; - recomp_output_state.Load(*s_ppu_state, input_state.address); - input_state.Store(*s_ppu_state); - (s_interpreter->*interp_fn)(args...); - interp_output_state.Load(*s_ppu_state, input_state.address); + recomp_output_state.Load(*s_ppu_state, input_state.address); + input_state.Store(*s_ppu_state); + (s_interpreter->*interp_fn)(args...); + interp_output_state.Load(*s_ppu_state, input_state.address); - if (interp_output_state.ToString() != recomp_output_state.ToString()) { - msg = std::string("Input state:\n") + input_state.ToString() + + if (interp_output_state.ToString() != recomp_output_state.ToString()) { + msg = std::string("Input state:\n") + input_state.ToString() + #ifdef PPU_LLVM_RECOMPILER_UNIT_TESTS_VERBOSE - std::string("\nOutput state:\n") + recomp_output_state.ToString() + - std::string("\nInterpreter output state:\n") + interp_output_state.ToString() + + std::string("\nOutput state:\n") + recomp_output_state.ToString() + + std::string("\nInterpreter output state:\n") + interp_output_state.ToString() + #endif // PPU_LLVM_RECOMPILER_UNIT_TESTS_VERBOSE - std::string("\nState diff:\n") + StateDiff(recomp_output_state, interp_output_state); - return false; - } + std::string("\nState diff:\n") + StateDiff(recomp_output_state, interp_output_state); + return false; + } - return true; - }; - RunTest(name, test_case, input, check_result); + return true; + }; + RunTest(name, test_case, input, check_result); #endif // PPU_LLVM_RECOMPILER_UNIT_TESTS } void Compiler::RunTest(const char * name, std::function test_case, std::function input, std::function check_result) { #ifdef PPU_LLVM_RECOMPILER_UNIT_TESTS - m_recompilation_engine.Log() << "Running test " << name << '\n'; + m_recompilation_engine.Log() << "Running test " << name << '\n'; auto fpmexec = getFpmAndExec(); auto fpm = fpmexec.first; auto execution_engine = fpmexec.second; - // Create the function - m_state.function = (Function *)m_module->getOrInsertFunction(name, m_compiled_function_type); - m_state.function->setCallingConv(CallingConv::X86_64_Win64); - auto arg_i = m_state.function->arg_begin(); - arg_i->setName("ppu_state"); - m_state.args[CompileTaskState::Args::State] = arg_i; - (++arg_i)->setName("context"); - m_state.args[CompileTaskState::Args::Context] = arg_i; - m_state.current_instruction_address = s_ppu_state->PC; + // Create the function + m_state.function = (Function *)m_module->getOrInsertFunction(name, m_compiled_function_type); + m_state.function->setCallingConv(CallingConv::X86_64_Win64); + auto arg_i = m_state.function->arg_begin(); + arg_i->setName("ppu_state"); + m_state.args[CompileTaskState::Args::State] = arg_i; + (++arg_i)->setName("context"); + m_state.args[CompileTaskState::Args::Context] = arg_i; + m_state.current_instruction_address = s_ppu_state->PC; - auto block = BasicBlock::Create(*m_llvm_context, "start", m_state.function); - m_ir_builder->SetInsertPoint(block); + auto block = BasicBlock::Create(*m_llvm_context, "start", m_state.function); + m_ir_builder->SetInsertPoint(block); - test_case(); + test_case(); - m_ir_builder->CreateRet(m_ir_builder->getInt32(0)); + m_ir_builder->CreateRet(m_ir_builder->getInt32(0)); - std::stringstream logmsg; + std::stringstream logmsg; - // Print the IR - std::string ir; - raw_string_ostream ir_ostream(ir); - m_state.function->print(ir_ostream); - //m_recompilation_engine.Log() << "LLVM IR:" << ir; - logmsg << "LLVM IR:" << ir; + // Print the IR + std::string ir; + raw_string_ostream ir_ostream(ir); + m_state.function->print(ir_ostream); + //m_recompilation_engine.Log() << "LLVM IR:" << ir; + logmsg << "LLVM IR:" << ir; - std::string verify_results; - raw_string_ostream verify_results_ostream(verify_results); - if (verifyFunction(*m_state.function, &verify_results_ostream)) { - // m_recompilation_engine.Log() << "Verification Failed:\n" << verify_results << '\n'; - logmsg << "Verification Failed:\n" << verify_results << '\n'; - return; - } + std::string verify_results; + raw_string_ostream verify_results_ostream(verify_results); + if (verifyFunction(*m_state.function, &verify_results_ostream)) { + // m_recompilation_engine.Log() << "Verification Failed:\n" << verify_results << '\n'; + logmsg << "Verification Failed:\n" << verify_results << '\n'; + return; + } + // Optimize + fpm->run(*m_state.function); + // Print the optimized IR + ir = ""; + m_state.function->print(ir_ostream); + //m_recompilation_engine.Log() << "Optimized LLVM IR:" << ir; + logmsg << "Optimized LLVM IR:" << ir; - // Optimize - fpm->run(*m_state.function); - - // Print the optimized IR - ir = ""; - m_state.function->print(ir_ostream); - //m_recompilation_engine.Log() << "Optimized LLVM IR:" << ir; - logmsg << "Optimized LLVM IR:" << ir; - - // Generate the function - //MachineCodeInfo mci; + // Generate the function + //MachineCodeInfo mci; execution_engine->finalizeObject(); /* - // Disassemble the generated function - auto disassembler = LLVMCreateDisasm(sys::getProcessTriple().c_str(), nullptr, 0, nullptr, nullptr); + // Disassemble the generated function + auto disassembler = LLVMCreateDisasm(sys::getProcessTriple().c_str(), nullptr, 0, nullptr, nullptr); - //m_recompilation_engine.Log() << "Disassembly:\n"; - logmsg << "Disassembly:\n"; - for (uint64_t pc = 0; pc < mci.size();) { - char str[1024]; + //m_recompilation_engine.Log() << "Disassembly:\n"; + logmsg << "Disassembly:\n"; + for (uint64_t pc = 0; pc < mci.size();) { + char str[1024]; - auto size = LLVMDisasmInstruction(disassembler, (uint8_t *)mci.address() + pc, mci.size() - pc, (uint64_t)((uint8_t *)mci.address() + pc), str, sizeof(str)); - //m_recompilation_engine.Log() << ((uint8_t *)mci.address() + pc) << ':' << str << '\n'; + auto size = LLVMDisasmInstruction(disassembler, (uint8_t *)mci.address() + pc, mci.size() - pc, (uint64_t)((uint8_t *)mci.address() + pc), str, sizeof(str)); + //m_recompilation_engine.Log() << ((uint8_t *)mci.address() + pc) << ':' << str << '\n'; logmsg << "0x" << static_cast((uint8_t *)mci.address() + pc) << ':' << str << '\n'; - pc += size; - } + pc += size; + } - LLVMDisasmDispose(disassembler); + LLVMDisasmDispose(disassembler); */ - // Run the test - input(); - auto executable = (Executable)execution_engine->getPointerToFunction(m_state.function); - executable(s_ppu_state, 0); + // Run the test + input(); + auto executable = (Executable)execution_engine->getPointerToFunction(m_state.function); + executable(s_ppu_state, 0); - // Verify results - std::string msg; - bool pass = check_result(msg); - if (pass) { + // Verify results + std::string msg; + bool pass = check_result(msg); + if (pass) { #ifdef PPU_LLVM_RECOMPILER_UNIT_TESTS_VERBOSE - m_recompilation_engine.Log() << logmsg.str() << "Test " << name << " passed\n" << msg << "\n"; + m_recompilation_engine.Log() << logmsg.str() << "Test " << name << " passed\n" << msg << "\n"; #else - m_recompilation_engine.Log() << "Test " << name << " passed\n"; + m_recompilation_engine.Log() << "Test " << name << " passed\n"; #endif // PPU_LLVM_RECOMPILER_UNIT_TESTS_VERBOSE - } else { - m_recompilation_engine.Log() << logmsg.str() << "Test " << name << " failed\n" << msg << "\n"; - } + } + else { + m_recompilation_engine.Log() << logmsg.str() << "Test " << name << " failed\n" << msg << "\n"; + } delete fpm; #endif // PPU_LLVM_RECOMPILER_UNIT_TESTS @@ -383,575 +381,575 @@ void Compiler::RunTest(const char * name, std::function test_case, std:: void Compiler::RunAllTests() { #ifdef PPU_LLVM_RECOMPILER_UNIT_TESTS - s_ppu_state = Emu.GetIdManager().make_ptr("Test Thread").get(); + s_ppu_state = Emu.GetIdManager().make_ptr("Test Thread").get(); PPUInterpreter interpreter(*s_ppu_state); - s_interpreter = &interpreter; + s_interpreter = &interpreter; - m_recompilation_engine.Log() << "Starting Unit Tests\n"; + m_recompilation_engine.Log() << "Starting Unit Tests\n"; - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MFVSCR, 0, 5, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTVSCR, 0, 5, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDCUW, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDFP, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDSBS, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDSHS, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDSWS, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDUBM, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDUBS, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDUHM, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDUHS, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDUWM, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDUWS, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VAND, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VANDC, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VAVGSB, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VAVGSH, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VAVGSW, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VAVGUB, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VAVGUH, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VAVGUW, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCFSX, 0, 5, 0u, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCFSX, 5, 5, 0u, 3u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCFUX, 0, 5, 0u, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCFUX, 5, 5, 0u, 2u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPBFP, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPBFP, 5, 5, 0u, 1u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPBFP_, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPBFP_, 5, 5, 0u, 1u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQFP, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQFP, 5, 5, 0u, 1u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQFP_, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQFP_, 5, 5, 0u, 1u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUB, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUB, 5, 5, 0u, 1u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUB_, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUB_, 5, 5, 0u, 1u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUH, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUH, 5, 5, 0u, 1u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUH_, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUH_, 5, 5, 0u, 1u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUW, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUW, 5, 5, 0u, 1u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUW_, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUW_, 5, 5, 0u, 1u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGEFP, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGEFP, 5, 5, 0u, 1u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGEFP_, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGEFP_, 5, 5, 0u, 1u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTFP, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTFP, 5, 5, 0u, 1u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTFP_, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTFP_, 5, 5, 0u, 1u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSB, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSB, 5, 5, 0u, 1u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSB_, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSB_, 5, 5, 0u, 1u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSH, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSH, 5, 5, 0u, 1u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSH_, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSH_, 5, 5, 0u, 1u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSW, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSW, 5, 5, 0u, 1u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSW_, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSW_, 5, 5, 0u, 1u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUB, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUB, 5, 5, 0u, 1u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUB_, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUB_, 5, 5, 0u, 1u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUH, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUH, 5, 5, 0u, 1u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUH_, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUH_, 5, 5, 0u, 1u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUW, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUW, 5, 5, 0u, 1u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUW_, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUW_, 5, 5, 0u, 1u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCTSXS, 0, 5, 0u, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCTSXS, 5, 5, 0u, 3u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCTUXS, 0, 5, 0u, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCTUXS, 5, 5, 0u, 3u, 1u); - //VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VEXPTEFP, 0, 5, 0u, 1u); CRASH! - //VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VLOGEFP, 0, 5, 0u, 1u); CRASH! - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMADDFP, 0, 5, 0u, 1u, 2u, 3u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMAXFP, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMAXSB, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMAXSH, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMAXSW, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMAXUB, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMAXUH, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMAXUW, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMHADDSHS, 0, 5, 0u, 1u, 2u, 3u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMHRADDSHS, 0, 5, 0u, 1u, 2u, 3u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMINFP, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMINSB, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMINSH, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMINSW, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMINUB, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMINUH, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMINUW, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMLADDUHM, 0, 5, 0u, 1u, 2u, 3u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMRGHB, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMRGHH, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMRGHW, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMRGLB, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMRGLH, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMRGLW, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMMBM, 0, 5, 0u, 1u, 2u, 3u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMSHM, 0, 5, 0u, 1u, 2u, 3u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMSHS, 0, 5, 0u, 1u, 2u, 3u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMUBM, 0, 5, 0u, 1u, 2u, 3u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMUHM, 0, 5, 0u, 1u, 2u, 3u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMUHS, 0, 5, 0u, 1u, 2u, 3u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULESB, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULESH, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULEUB, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULEUH, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULOSB, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULOSH, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULOUB, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULOUH, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VNMSUBFP, 0, 5, 0u, 1u, 2u, 3u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VNOR, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VOR, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPERM, 0, 5, 0u, 1u, 2u, 3u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKPX, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKSHSS, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKSHUS, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKSWSS, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKSWUS, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKUHUM, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKUHUS, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKUWUM, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKUWUS, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VREFP, 0, 5, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VRFIM, 0, 5, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VRFIN, 0, 5, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VRFIP, 0, 5, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VRFIZ, 0, 5, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VRLB, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VRLH, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VRLW, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VRSQRTEFP, 0, 5, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSEL, 0, 5, 0u, 1u, 2u, 3u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSL, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSLB, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSLDOI, 0, 5, 0u, 1u, 2u, 6u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSLH, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSLO, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSLW, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSPLTB, 0, 5, 0u, 3u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSPLTH, 0, 5, 0u, 3u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSPLTISB, 0, 5, 0u, 12345); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSPLTISH, 0, 5, 0u, 12345); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSPLTISW, 0, 5, 0u, -12345); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSPLTW, 0, 5, 0u, 3u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSR, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSRAB, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSRAH, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSRAW, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSRB, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSRH, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSRO, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSRW, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBFP, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBSBS, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBSHS, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBSWS, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUBM, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUBS, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUHM, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUHS, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUWM, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUWS, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUMSWS, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUM2SWS, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUM4SBS, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUM4SHS, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUM4UBS, 0, 5, 0u, 1u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKHPX, 0, 5, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKHSB, 0, 5, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKHSH, 0, 5, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKLPX, 0, 5, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKLSB, 0, 5, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKLSH, 0, 5, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VXOR, 0, 5, 0u, 1u, 2u); - // TODO: Rest of the vector instructions + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MFVSCR, 0, 5, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTVSCR, 0, 5, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDCUW, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDFP, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDSBS, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDSHS, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDSWS, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDUBM, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDUBS, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDUHM, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDUHS, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDUWM, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VADDUWS, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VAND, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VANDC, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VAVGSB, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VAVGSH, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VAVGSW, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VAVGUB, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VAVGUH, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VAVGUW, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCFSX, 0, 5, 0u, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCFSX, 5, 5, 0u, 3u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCFUX, 0, 5, 0u, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCFUX, 5, 5, 0u, 2u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPBFP, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPBFP, 5, 5, 0u, 1u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPBFP_, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPBFP_, 5, 5, 0u, 1u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQFP, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQFP, 5, 5, 0u, 1u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQFP_, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQFP_, 5, 5, 0u, 1u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUB, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUB, 5, 5, 0u, 1u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUB_, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUB_, 5, 5, 0u, 1u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUH, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUH, 5, 5, 0u, 1u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUH_, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUH_, 5, 5, 0u, 1u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUW, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUW, 5, 5, 0u, 1u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUW_, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPEQUW_, 5, 5, 0u, 1u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGEFP, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGEFP, 5, 5, 0u, 1u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGEFP_, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGEFP_, 5, 5, 0u, 1u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTFP, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTFP, 5, 5, 0u, 1u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTFP_, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTFP_, 5, 5, 0u, 1u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSB, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSB, 5, 5, 0u, 1u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSB_, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSB_, 5, 5, 0u, 1u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSH, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSH, 5, 5, 0u, 1u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSH_, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSH_, 5, 5, 0u, 1u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSW, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSW, 5, 5, 0u, 1u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSW_, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTSW_, 5, 5, 0u, 1u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUB, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUB, 5, 5, 0u, 1u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUB_, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUB_, 5, 5, 0u, 1u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUH, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUH, 5, 5, 0u, 1u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUH_, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUH_, 5, 5, 0u, 1u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUW, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUW, 5, 5, 0u, 1u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUW_, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCMPGTUW_, 5, 5, 0u, 1u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCTSXS, 0, 5, 0u, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCTSXS, 5, 5, 0u, 3u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCTUXS, 0, 5, 0u, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VCTUXS, 5, 5, 0u, 3u, 1u); + //VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VEXPTEFP, 0, 5, 0u, 1u); CRASH! + //VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VLOGEFP, 0, 5, 0u, 1u); CRASH! + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMADDFP, 0, 5, 0u, 1u, 2u, 3u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMAXFP, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMAXSB, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMAXSH, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMAXSW, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMAXUB, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMAXUH, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMAXUW, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMHADDSHS, 0, 5, 0u, 1u, 2u, 3u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMHRADDSHS, 0, 5, 0u, 1u, 2u, 3u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMINFP, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMINSB, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMINSH, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMINSW, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMINUB, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMINUH, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMINUW, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMLADDUHM, 0, 5, 0u, 1u, 2u, 3u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMRGHB, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMRGHH, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMRGHW, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMRGLB, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMRGLH, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMRGLW, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMMBM, 0, 5, 0u, 1u, 2u, 3u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMSHM, 0, 5, 0u, 1u, 2u, 3u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMSHS, 0, 5, 0u, 1u, 2u, 3u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMUBM, 0, 5, 0u, 1u, 2u, 3u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMUHM, 0, 5, 0u, 1u, 2u, 3u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMSUMUHS, 0, 5, 0u, 1u, 2u, 3u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULESB, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULESH, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULEUB, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULEUH, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULOSB, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULOSH, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULOUB, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VMULOUH, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VNMSUBFP, 0, 5, 0u, 1u, 2u, 3u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VNOR, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VOR, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPERM, 0, 5, 0u, 1u, 2u, 3u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKPX, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKSHSS, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKSHUS, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKSWSS, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKSWUS, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKUHUM, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKUHUS, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKUWUM, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VPKUWUS, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VREFP, 0, 5, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VRFIM, 0, 5, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VRFIN, 0, 5, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VRFIP, 0, 5, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VRFIZ, 0, 5, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VRLB, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VRLH, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VRLW, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VRSQRTEFP, 0, 5, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSEL, 0, 5, 0u, 1u, 2u, 3u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSL, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSLB, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSLDOI, 0, 5, 0u, 1u, 2u, 6u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSLH, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSLO, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSLW, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSPLTB, 0, 5, 0u, 3u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSPLTH, 0, 5, 0u, 3u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSPLTISB, 0, 5, 0u, 12345); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSPLTISH, 0, 5, 0u, 12345); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSPLTISW, 0, 5, 0u, -12345); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSPLTW, 0, 5, 0u, 3u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSR, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSRAB, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSRAH, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSRAW, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSRB, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSRH, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSRO, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSRW, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBFP, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBSBS, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBSHS, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBSWS, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUBM, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUBS, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUHM, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUHS, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUWM, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUBUWS, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUMSWS, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUM2SWS, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUM4SBS, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUM4SHS, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VSUM4UBS, 0, 5, 0u, 1u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKHPX, 0, 5, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKHSB, 0, 5, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKHSH, 0, 5, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKLPX, 0, 5, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKLSB, 0, 5, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VUPKLSH, 0, 5, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(VXOR, 0, 5, 0u, 1u, 2u); + // TODO: Rest of the vector instructions - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULLI, 0, 5, 1u, 2u, 12345); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFIC, 0, 5, 1u, 2u, 12345); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMPLI, 0, 5, 1u, 0u, 7u, 12345u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMPLI, 5, 5, 1u, 1u, 7u, 12345u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMPI, 0, 5, 5u, 0u, 7u, -12345); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMPI, 5, 5, 5u, 1u, 7u, -12345); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDIC, 0, 5, 1u, 2u, 12345); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDIC_, 0, 5, 1u, 2u, 12345); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDI, 0, 5, 1u, 2u, 12345); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDI, 5, 5, 0u, 2u, 12345); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDIS, 0, 5, 1u, 2u, -12345); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDIS, 5, 5, 0u, 2u, -12345); - // TODO: BC - // TODO: SC - // TODO: B - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MCRF, 0, 5, 0u, 7u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MCRF, 5, 5, 6u, 2u); - // TODO: BCLR - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CRNOR, 0, 5, 0u, 7u, 3u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CRANDC, 0, 5, 5u, 6u, 7u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ISYNC, 0, 5); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CRXOR, 0, 5, 7u, 7u, 7u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CRNAND, 0, 5, 3u, 4u, 5u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CRAND, 0, 5, 1u, 2u, 3u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CREQV, 0, 5, 2u, 1u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CRORC, 0, 5, 3u, 4u, 5u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CROR, 0, 5, 6u, 7u, 0u); - // TODO: BCCTR - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLWIMI, 0, 5, 7u, 8u, 9u, 12u, 25u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLWIMI, 5, 5, 21u, 22u, 21u, 18u, 24u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLWINM, 0, 5, 7u, 8u, 9u, 12u, 25u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLWINM, 5, 5, 21u, 22u, 21u, 18u, 24u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLWNM, 0, 5, 7u, 8u, 9u, 12u, 25u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLWNM, 5, 5, 21u, 22u, 21u, 18u, 24u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ORI, 0, 5, 25u, 29u, 12345u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ORIS, 0, 5, 7u, 31u, 12345u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(XORI, 0, 5, 0u, 19u, 12345u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(XORIS, 0, 5, 3u, 14u, 12345u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ANDI_, 0, 5, 16u, 7u, 12345u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ANDIS_, 0, 5, 23u, 21u, 12345u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDICL, 0, 5, 7u, 8u, 9u, 12u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDICL, 5, 5, 21u, 22u, 43u, 43u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDICR, 0, 5, 7u, 8u, 0u, 12u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDICR, 5, 5, 21u, 22u, 63u, 43u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDIC, 0, 5, 7u, 8u, 9u, 12u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDIC, 5, 5, 21u, 22u, 23u, 43u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDIMI, 0, 5, 7u, 8u, 9u, 12u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDIMI, 5, 5, 21u, 22u, 23u, 43u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDC_LR, 0, 5, 7u, 8u, 9u, 12u, 0u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDC_LR, 5, 5, 21u, 22u, 23u, 43u, 1u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMP, 0, 5, 3u, 0u, 9u, 31u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMP, 5, 5, 6u, 1u, 23u, 14u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFC, 0, 5, 0u, 1u, 2u, 0u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFC, 5, 5, 0u, 1u, 2u, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDC, 0, 5, 0u, 1u, 2u, 0u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDC, 5, 5, 0u, 1u, 2u, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHDU, 0, 5, 7u, 8u, 9u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHDU, 5, 5, 21u, 22u, 23u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHWU, 0, 5, 7u, 8u, 9u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHWU, 5, 5, 21u, 22u, 23u, 1u); - // TODO: MFOCRF - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SLW, 0, 5, 5u, 6u, 7u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SLW, 5, 5, 5u, 6u, 7u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CNTLZW, 0, 5, 5u, 6u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CNTLZW, 5, 5, 5u, 6u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SLD, 0, 5, 5u, 6u, 7u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SLD, 5, 5, 5u, 6u, 7u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(AND, 0, 5, 7u, 8u, 9u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(AND, 5, 5, 21u, 22u, 23u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMPL, 0, 5, 3u, 0u, 9u, 31u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMPL, 5, 5, 6u, 1u, 23u, 14u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBF, 0, 5, 7u, 8u, 9u, 0u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBF, 5, 5, 21u, 22u, 23u, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CNTLZD, 0, 5, 5u, 6u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CNTLZD, 5, 5, 5u, 6u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ANDC, 0, 5, 5u, 6u, 7u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ANDC, 5, 5, 5u, 6u, 7u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHD, 0, 5, 7u, 8u, 9u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHD, 5, 5, 21u, 22u, 23u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHW, 0, 5, 7u, 8u, 9u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHW, 5, 5, 21u, 22u, 23u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(NEG, 0, 5, 7u, 8u, 0u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(NEG, 5, 5, 21u, 22u, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(NOR, 0, 5, 7u, 8u, 9u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(NOR, 5, 5, 21u, 22u, 23u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFE, 0, 5, 7u, 8u, 9u, 0u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFE, 5, 5, 21u, 22u, 23u, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDE, 0, 5, 7u, 8u, 9u, 0u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDE, 5, 5, 21u, 22u, 23u, 0u, 1u); - // TODO: MTOCRF - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDZE, 0, 5, 7u, 8u, 0u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDZE, 5, 5, 21u, 22u, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFZE, 0, 5, 7u, 8u, 0u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFZE, 5, 5, 21u, 22u, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFME, 0, 5, 7u, 8u, 0u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFME, 5, 5, 21u, 22u, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULLD, 0, 5, 7u, 8u, 9u, 0u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULLD, 5, 5, 21u, 22u, 23u, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDME, 0, 5, 7u, 8u, 0u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDME, 5, 5, 21u, 22u, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULLW, 0, 5, 7u, 8u, 9u, 0u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULLW, 5, 5, 21u, 22u, 23u, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADD, 0, 5, 7u, 8u, 9u, 0u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADD, 5, 5, 21u, 22u, 23u, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EQV, 0, 5, 7u, 8u, 9u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EQV, 5, 5, 21u, 22u, 23u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(XOR, 0, 5, 7u, 8u, 9u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(XOR, 5, 5, 21u, 22u, 23u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MFSPR, 0, 5, 5u, 0x20u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MFSPR, 5, 5, 5u, 0x100u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MFSPR, 10, 5, 5u, 0x120u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MFSPR, 15, 5, 5u, 0x8u); - // TODO: MFTB - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ORC, 0, 5, 7u, 8u, 9u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ORC, 5, 5, 21u, 22u, 23u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(OR, 0, 5, 7u, 8u, 9u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(OR, 5, 5, 21u, 22u, 23u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVDU, 0, 5, 7u, 8u, 9u, 0u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVDU, 5, 5, 21u, 22u, 23u, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVWU, 0, 5, 7u, 8u, 9u, 0u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVWU, 5, 5, 21u, 22u, 23u, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTSPR, 0, 5, 0x20u, 5u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTSPR, 5, 5, 0x100u, 5u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTSPR, 10, 5, 0x120u, 5u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTSPR, 15, 5, 0x8u, 5u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(NAND, 0, 5, 7u, 8u, 9u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(NAND, 5, 5, 21u, 22u, 23u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVD, 0, 5, 7u, 8u, 9u, 0u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVD, 5, 5, 21u, 22u, 23u, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVW, 0, 5, 7u, 8u, 9u, 0u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVW, 5, 5, 21u, 22u, 23u, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRW, 0, 5, 5u, 6u, 7u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRW, 5, 5, 5u, 6u, 7u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRD, 0, 5, 5u, 6u, 7u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRD, 5, 5, 5u, 6u, 7u, 1u); - // TODO: SYNC - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAW, 0, 5, 5u, 6u, 7u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAW, 5, 5, 5u, 6u, 7u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAD, 0, 5, 5u, 6u, 7u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAD, 5, 5, 5u, 6u, 7u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAWI, 0, 5, 5u, 6u, 0u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAWI, 5, 5, 5u, 6u, 12u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAWI, 10, 5, 5u, 6u, 22u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAWI, 15, 5, 5u, 6u, 31u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRADI1, 0, 5, 5u, 6u, 0u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRADI1, 5, 5, 5u, 6u, 12u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRADI1, 10, 5, 5u, 6u, 48u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRADI1, 15, 5, 5u, 6u, 63u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EIEIO, 0, 5); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSH, 0, 5, 6u, 9u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSH, 5, 5, 6u, 9u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSB, 0, 5, 3u, 5u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSB, 5, 5, 3u, 5u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSW, 0, 5, 25u, 29u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSW, 5, 5, 25u, 29u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULLI, 0, 5, 1u, 2u, 12345); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFIC, 0, 5, 1u, 2u, 12345); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMPLI, 0, 5, 1u, 0u, 7u, 12345u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMPLI, 5, 5, 1u, 1u, 7u, 12345u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMPI, 0, 5, 5u, 0u, 7u, -12345); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMPI, 5, 5, 5u, 1u, 7u, -12345); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDIC, 0, 5, 1u, 2u, 12345); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDIC_, 0, 5, 1u, 2u, 12345); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDI, 0, 5, 1u, 2u, 12345); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDI, 5, 5, 0u, 2u, 12345); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDIS, 0, 5, 1u, 2u, -12345); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDIS, 5, 5, 0u, 2u, -12345); + // TODO: BC + // TODO: SC + // TODO: B + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MCRF, 0, 5, 0u, 7u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MCRF, 5, 5, 6u, 2u); + // TODO: BCLR + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CRNOR, 0, 5, 0u, 7u, 3u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CRANDC, 0, 5, 5u, 6u, 7u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ISYNC, 0, 5); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CRXOR, 0, 5, 7u, 7u, 7u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CRNAND, 0, 5, 3u, 4u, 5u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CRAND, 0, 5, 1u, 2u, 3u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CREQV, 0, 5, 2u, 1u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CRORC, 0, 5, 3u, 4u, 5u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CROR, 0, 5, 6u, 7u, 0u); + // TODO: BCCTR + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLWIMI, 0, 5, 7u, 8u, 9u, 12u, 25u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLWIMI, 5, 5, 21u, 22u, 21u, 18u, 24u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLWINM, 0, 5, 7u, 8u, 9u, 12u, 25u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLWINM, 5, 5, 21u, 22u, 21u, 18u, 24u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLWNM, 0, 5, 7u, 8u, 9u, 12u, 25u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLWNM, 5, 5, 21u, 22u, 21u, 18u, 24u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ORI, 0, 5, 25u, 29u, 12345u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ORIS, 0, 5, 7u, 31u, 12345u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(XORI, 0, 5, 0u, 19u, 12345u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(XORIS, 0, 5, 3u, 14u, 12345u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ANDI_, 0, 5, 16u, 7u, 12345u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ANDIS_, 0, 5, 23u, 21u, 12345u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDICL, 0, 5, 7u, 8u, 9u, 12u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDICL, 5, 5, 21u, 22u, 43u, 43u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDICR, 0, 5, 7u, 8u, 0u, 12u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDICR, 5, 5, 21u, 22u, 63u, 43u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDIC, 0, 5, 7u, 8u, 9u, 12u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDIC, 5, 5, 21u, 22u, 23u, 43u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDIMI, 0, 5, 7u, 8u, 9u, 12u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDIMI, 5, 5, 21u, 22u, 23u, 43u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDC_LR, 0, 5, 7u, 8u, 9u, 12u, 0u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(RLDC_LR, 5, 5, 21u, 22u, 23u, 43u, 1u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMP, 0, 5, 3u, 0u, 9u, 31u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMP, 5, 5, 6u, 1u, 23u, 14u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFC, 0, 5, 0u, 1u, 2u, 0u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFC, 5, 5, 0u, 1u, 2u, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDC, 0, 5, 0u, 1u, 2u, 0u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDC, 5, 5, 0u, 1u, 2u, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHDU, 0, 5, 7u, 8u, 9u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHDU, 5, 5, 21u, 22u, 23u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHWU, 0, 5, 7u, 8u, 9u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHWU, 5, 5, 21u, 22u, 23u, 1u); + // TODO: MFOCRF + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SLW, 0, 5, 5u, 6u, 7u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SLW, 5, 5, 5u, 6u, 7u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CNTLZW, 0, 5, 5u, 6u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CNTLZW, 5, 5, 5u, 6u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SLD, 0, 5, 5u, 6u, 7u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SLD, 5, 5, 5u, 6u, 7u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(AND, 0, 5, 7u, 8u, 9u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(AND, 5, 5, 21u, 22u, 23u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMPL, 0, 5, 3u, 0u, 9u, 31u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CMPL, 5, 5, 6u, 1u, 23u, 14u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBF, 0, 5, 7u, 8u, 9u, 0u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBF, 5, 5, 21u, 22u, 23u, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CNTLZD, 0, 5, 5u, 6u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(CNTLZD, 5, 5, 5u, 6u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ANDC, 0, 5, 5u, 6u, 7u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ANDC, 5, 5, 5u, 6u, 7u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHD, 0, 5, 7u, 8u, 9u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHD, 5, 5, 21u, 22u, 23u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHW, 0, 5, 7u, 8u, 9u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULHW, 5, 5, 21u, 22u, 23u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(NEG, 0, 5, 7u, 8u, 0u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(NEG, 5, 5, 21u, 22u, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(NOR, 0, 5, 7u, 8u, 9u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(NOR, 5, 5, 21u, 22u, 23u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFE, 0, 5, 7u, 8u, 9u, 0u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFE, 5, 5, 21u, 22u, 23u, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDE, 0, 5, 7u, 8u, 9u, 0u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDE, 5, 5, 21u, 22u, 23u, 0u, 1u); + // TODO: MTOCRF + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDZE, 0, 5, 7u, 8u, 0u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDZE, 5, 5, 21u, 22u, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFZE, 0, 5, 7u, 8u, 0u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFZE, 5, 5, 21u, 22u, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFME, 0, 5, 7u, 8u, 0u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SUBFME, 5, 5, 21u, 22u, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULLD, 0, 5, 7u, 8u, 9u, 0u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULLD, 5, 5, 21u, 22u, 23u, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDME, 0, 5, 7u, 8u, 0u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADDME, 5, 5, 21u, 22u, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULLW, 0, 5, 7u, 8u, 9u, 0u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MULLW, 5, 5, 21u, 22u, 23u, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADD, 0, 5, 7u, 8u, 9u, 0u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ADD, 5, 5, 21u, 22u, 23u, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EQV, 0, 5, 7u, 8u, 9u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EQV, 5, 5, 21u, 22u, 23u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(XOR, 0, 5, 7u, 8u, 9u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(XOR, 5, 5, 21u, 22u, 23u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MFSPR, 0, 5, 5u, 0x20u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MFSPR, 5, 5, 5u, 0x100u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MFSPR, 10, 5, 5u, 0x120u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MFSPR, 15, 5, 5u, 0x8u); + // TODO: MFTB + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ORC, 0, 5, 7u, 8u, 9u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(ORC, 5, 5, 21u, 22u, 23u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(OR, 0, 5, 7u, 8u, 9u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(OR, 5, 5, 21u, 22u, 23u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVDU, 0, 5, 7u, 8u, 9u, 0u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVDU, 5, 5, 21u, 22u, 23u, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVWU, 0, 5, 7u, 8u, 9u, 0u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVWU, 5, 5, 21u, 22u, 23u, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTSPR, 0, 5, 0x20u, 5u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTSPR, 5, 5, 0x100u, 5u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTSPR, 10, 5, 0x120u, 5u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTSPR, 15, 5, 0x8u, 5u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(NAND, 0, 5, 7u, 8u, 9u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(NAND, 5, 5, 21u, 22u, 23u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVD, 0, 5, 7u, 8u, 9u, 0u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVD, 5, 5, 21u, 22u, 23u, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVW, 0, 5, 7u, 8u, 9u, 0u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(DIVW, 5, 5, 21u, 22u, 23u, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRW, 0, 5, 5u, 6u, 7u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRW, 5, 5, 5u, 6u, 7u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRD, 0, 5, 5u, 6u, 7u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRD, 5, 5, 5u, 6u, 7u, 1u); + // TODO: SYNC + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAW, 0, 5, 5u, 6u, 7u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAW, 5, 5, 5u, 6u, 7u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAD, 0, 5, 5u, 6u, 7u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAD, 5, 5, 5u, 6u, 7u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAWI, 0, 5, 5u, 6u, 0u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAWI, 5, 5, 5u, 6u, 12u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAWI, 10, 5, 5u, 6u, 22u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRAWI, 15, 5, 5u, 6u, 31u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRADI1, 0, 5, 5u, 6u, 0u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRADI1, 5, 5, 5u, 6u, 12u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRADI1, 10, 5, 5u, 6u, 48u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(SRADI1, 15, 5, 5u, 6u, 63u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EIEIO, 0, 5); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSH, 0, 5, 6u, 9u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSH, 5, 5, 6u, 9u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSB, 0, 5, 3u, 5u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSB, 5, 5, 3u, 5u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSW, 0, 5, 25u, 29u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(EXTSW, 5, 5, 25u, 29u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FDIVS, 0, 5, 0u, 1u, 2u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FSUBS, 0, 5, 0u, 1u, 2u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FADDS, 0, 5, 0u, 1u, 2u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FSQRTS, 0, 5, 0u, 1u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FRES, 0, 5, 0u, 1u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMULS, 0, 5, 0u, 1u, 2u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMADDS, 0, 5, 0u, 1u, 2u, 3u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMSUBS, 0, 5, 0u, 1u, 2u, 3u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNMSUBS, 0, 5, 0u, 1u, 2u, 3u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNMADDS, 0, 5, 0u, 1u, 2u, 3u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB1, 0, 5, 0u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB1, 5, 5, 3u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB1, 10, 5, 25u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB1, 15, 5, 31u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MCRFS, 0, 5, 0u, 7u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MCRFS, 5, 5, 7u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MCRFS, 10, 5, 5u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MCRFS, 15, 5, 5u, 3u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB0, 0, 5, 0u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB0, 5, 5, 3u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB0, 10, 5, 25u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB0, 15, 5, 31u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSFI, 0, 5, 0u, 1u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSFI, 5, 5, 2u, 6u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSFI, 10, 5, 5u, 11u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSFI, 15, 5, 7u, 14u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MFFS, 0, 5, 0u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSF, 0, 5, 0u, 0u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSF, 5, 5, 2u, 0u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSF, 10, 5, 5u, 0u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSF, 15, 5, 7u, 0u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FCMPU, 0, 5, 5u, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FRSP, 0, 5, 0u, 1u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FCTIW, 0, 5, 0u, 1u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FCTIWZ, 0, 5, 0u, 1u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FDIV, 0, 5, 0u, 1u, 2u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FSUB, 0, 5, 0u, 1u, 2u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FADD, 0, 5, 0u, 1u, 2u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FSQRT, 0, 5, 0u, 1u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FSEL, 0, 5, 0u, 1u, 2u, 3u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMUL, 0, 5, 0u, 1u, 2u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FRSQRTE, 0, 5, 0u, 1u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMSUB, 0, 5, 0u, 1u, 2u, 3u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMADD, 0, 5, 0u, 1u, 2u, 3u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNMSUB, 0, 5, 0u, 1u, 2u, 3u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNMADD, 0, 5, 0u, 1u, 2u, 3u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FCMPO, 0, 5, 3u, 0u, 1u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNEG, 0, 5, 0u, 1u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMR, 0, 5, 0u, 1u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNABS, 0, 5, 0u, 1u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FABS, 0, 5, 0u, 1u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FCTID, 0, 5, 0u, 1u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FCFID, 0, 5, 0u, 1u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FDIVS, 0, 5, 0u, 1u, 2u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FSUBS, 0, 5, 0u, 1u, 2u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FADDS, 0, 5, 0u, 1u, 2u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FSQRTS, 0, 5, 0u, 1u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FRES, 0, 5, 0u, 1u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMULS, 0, 5, 0u, 1u, 2u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMADDS, 0, 5, 0u, 1u, 2u, 3u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMSUBS, 0, 5, 0u, 1u, 2u, 3u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNMSUBS, 0, 5, 0u, 1u, 2u, 3u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNMADDS, 0, 5, 0u, 1u, 2u, 3u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB1, 0, 5, 0u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB1, 5, 5, 3u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB1, 10, 5, 25u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB1, 15, 5, 31u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MCRFS, 0, 5, 0u, 7u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MCRFS, 5, 5, 7u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MCRFS, 10, 5, 5u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MCRFS, 15, 5, 5u, 3u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB0, 0, 5, 0u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB0, 5, 5, 3u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB0, 10, 5, 25u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSB0, 15, 5, 31u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSFI, 0, 5, 0u, 1u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSFI, 5, 5, 2u, 6u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSFI, 10, 5, 5u, 11u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSFI, 15, 5, 7u, 14u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MFFS, 0, 5, 0u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSF, 0, 5, 0u, 0u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSF, 5, 5, 2u, 0u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSF, 10, 5, 5u, 0u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(MTFSF, 15, 5, 7u, 0u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FCMPU, 0, 5, 5u, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FRSP, 0, 5, 0u, 1u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FCTIW, 0, 5, 0u, 1u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FCTIWZ, 0, 5, 0u, 1u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FDIV, 0, 5, 0u, 1u, 2u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FSUB, 0, 5, 0u, 1u, 2u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FADD, 0, 5, 0u, 1u, 2u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FSQRT, 0, 5, 0u, 1u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FSEL, 0, 5, 0u, 1u, 2u, 3u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMUL, 0, 5, 0u, 1u, 2u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FRSQRTE, 0, 5, 0u, 1u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMSUB, 0, 5, 0u, 1u, 2u, 3u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMADD, 0, 5, 0u, 1u, 2u, 3u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNMSUB, 0, 5, 0u, 1u, 2u, 3u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNMADD, 0, 5, 0u, 1u, 2u, 3u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FCMPO, 0, 5, 3u, 0u, 1u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNEG, 0, 5, 0u, 1u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FMR, 0, 5, 0u, 1u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FNABS, 0, 5, 0u, 1u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FABS, 0, 5, 0u, 1u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FCTID, 0, 5, 0u, 1u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER_USING_RANDOM_INPUT(FCFID, 0, 5, 0u, 1u, 0u); - PPUState input; - input.SetRandom(0x10000); - input.GPR[14] = 10; - input.GPR[21] = 15; - input.GPR[23] = 0x10000; - input.mem_block[0] = 0x8877665544332211; + PPUState input; + input.SetRandom(0x10000); + input.GPR[14] = 10; + input.GPR[21] = 15; + input.GPR[23] = 0x10000; + input.mem_block[0] = 0x8877665544332211; - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LBZ, 0, input, 5u, 0u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LBZ, 1, input, 5u, 14u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LBZU, 0, input, 5u, 14u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LBZX, 0, input, 5u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LBZX, 1, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LBZUX, 0, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHZ, 0, input, 5u, 0u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHZ, 1, input, 5u, 14u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHZU, 0, input, 5u, 14u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHZX, 0, input, 5u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHZX, 1, input, 5u, 14u, 23u); - //VERIFY_INSTRUCTION_AGAINST_INTERPRETER(ECIWX, 0, input, 5u, 0u, 23u); - //VERIFY_INSTRUCTION_AGAINST_INTERPRETER(ECIWX, 1, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHZUX, 0, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHA, 0, input, 5u, 0u, 0x100F0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHA, 1, input, 5u, 14u, 0x100F0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHAU, 0, input, 5u, 14u, 0x100F0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHAX, 0, input, 5u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHAX, 1, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHAUX, 0, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHBRX, 0, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWZ, 0, input, 5u, 0u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWZ, 1, input, 5u, 14u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWZU, 0, input, 5u, 14u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWZX, 0, input, 5u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWZX, 1, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWZUX, 0, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWA, 0, input, 5u, 0u, 0x100F0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWA, 1, input, 5u, 14u, 0x100F0); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWAX, 0, input, 5u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWAX, 1, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWAUX, 0, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWBRX, 0, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LD, 0, input, 5u, 0u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LD, 1, input, 5u, 14u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LDU, 0, input, 5u, 14u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LDX, 0, input, 5u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LDX, 1, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LDUX, 0, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LDBRX, 0, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFS, 0, input, 5u, 0u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFS, 1, input, 5u, 14u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFSU, 0, input, 5u, 14u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFSX, 0, input, 5u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFSX, 1, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFSUX, 0, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFD, 0, input, 5u, 0u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFD, 1, input, 5u, 14u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFDU, 0, input, 5u, 14u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFDX, 0, input, 5u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFDX, 1, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFDUX, 0, input, 5u, 14u, 23u); - //VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWARX, 0, input, 5u, 0u, 23u); - //VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWARX, 1, input, 5u, 14u, 23u); - //VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LDARX, 0, input, 5u, 0u, 23u); - //VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LDARX, 1, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LSWI, 0, input, 5u, 23u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LSWI, 1, input, 5u, 23u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LSWI, 2, input, 5u, 23u, 7u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LSWI, 3, input, 5u, 23u, 25u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LMW, 0, input, 5u, 0u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LMW, 1, input, 16u, 14u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVX, 0, input, 5u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVX, 1, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVXL, 0, input, 5u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVXL, 1, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVSL, 0, input, 5u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVSL, 1, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVSL, 2, input, 5u, 21u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVSR, 0, input, 5u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVSR, 1, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVSR, 2, input, 5u, 21u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVEBX, 0, input, 5u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVEBX, 1, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVEBX, 2, input, 5u, 21u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVEHX, 0, input, 5u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVEHX, 1, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVEHX, 2, input, 5u, 21u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVEWX, 0, input, 5u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVEWX, 1, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVEWX, 2, input, 5u, 21u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVLX, 0, input, 5u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVLX, 1, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVLX, 2, input, 5u, 21u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVRX, 0, input, 5u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVRX, 1, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVRX, 2, input, 5u, 21u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LBZ, 0, input, 5u, 0u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LBZ, 1, input, 5u, 14u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LBZU, 0, input, 5u, 14u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LBZX, 0, input, 5u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LBZX, 1, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LBZUX, 0, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHZ, 0, input, 5u, 0u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHZ, 1, input, 5u, 14u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHZU, 0, input, 5u, 14u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHZX, 0, input, 5u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHZX, 1, input, 5u, 14u, 23u); + //VERIFY_INSTRUCTION_AGAINST_INTERPRETER(ECIWX, 0, input, 5u, 0u, 23u); + //VERIFY_INSTRUCTION_AGAINST_INTERPRETER(ECIWX, 1, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHZUX, 0, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHA, 0, input, 5u, 0u, 0x100F0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHA, 1, input, 5u, 14u, 0x100F0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHAU, 0, input, 5u, 14u, 0x100F0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHAX, 0, input, 5u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHAX, 1, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHAUX, 0, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LHBRX, 0, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWZ, 0, input, 5u, 0u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWZ, 1, input, 5u, 14u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWZU, 0, input, 5u, 14u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWZX, 0, input, 5u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWZX, 1, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWZUX, 0, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWA, 0, input, 5u, 0u, 0x100F0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWA, 1, input, 5u, 14u, 0x100F0); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWAX, 0, input, 5u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWAX, 1, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWAUX, 0, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWBRX, 0, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LD, 0, input, 5u, 0u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LD, 1, input, 5u, 14u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LDU, 0, input, 5u, 14u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LDX, 0, input, 5u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LDX, 1, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LDUX, 0, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LDBRX, 0, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFS, 0, input, 5u, 0u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFS, 1, input, 5u, 14u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFSU, 0, input, 5u, 14u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFSX, 0, input, 5u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFSX, 1, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFSUX, 0, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFD, 0, input, 5u, 0u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFD, 1, input, 5u, 14u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFDU, 0, input, 5u, 14u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFDX, 0, input, 5u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFDX, 1, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LFDUX, 0, input, 5u, 14u, 23u); + //VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWARX, 0, input, 5u, 0u, 23u); + //VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LWARX, 1, input, 5u, 14u, 23u); + //VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LDARX, 0, input, 5u, 0u, 23u); + //VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LDARX, 1, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LSWI, 0, input, 5u, 23u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LSWI, 1, input, 5u, 23u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LSWI, 2, input, 5u, 23u, 7u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LSWI, 3, input, 5u, 23u, 25u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LMW, 0, input, 5u, 0u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LMW, 1, input, 16u, 14u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVX, 0, input, 5u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVX, 1, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVXL, 0, input, 5u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVXL, 1, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVSL, 0, input, 5u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVSL, 1, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVSL, 2, input, 5u, 21u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVSR, 0, input, 5u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVSR, 1, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVSR, 2, input, 5u, 21u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVEBX, 0, input, 5u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVEBX, 1, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVEBX, 2, input, 5u, 21u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVEHX, 0, input, 5u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVEHX, 1, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVEHX, 2, input, 5u, 21u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVEWX, 0, input, 5u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVEWX, 1, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVEWX, 2, input, 5u, 21u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVLX, 0, input, 5u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVLX, 1, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVLX, 2, input, 5u, 21u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVRX, 0, input, 5u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVRX, 1, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(LVRX, 2, input, 5u, 21u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STB, 0, input, 3u, 0u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STB, 1, input, 3u, 14u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STBU, 0, input, 3u, 14u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STDCX_, 0, input, 3u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STDCX_, 1, input, 3u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STBX, 0, input, 3u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STBX, 1, input, 3u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STBUX, 0, input, 3u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STH, 0, input, 3u, 0u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STH, 1, input, 3u, 14u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STHU, 0, input, 3u, 14u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STHX, 0, input, 3u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STHX, 1, input, 3u, 14u, 23u); - //VERIFY_INSTRUCTION_AGAINST_INTERPRETER(ECOWX, 0, input, 3u, 0u, 23u); - //VERIFY_INSTRUCTION_AGAINST_INTERPRETER(ECOWX, 1, input, 3u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STHUX, 0, input, 3u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STHBRX, 0, input, 3u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STW, 0, input, 3u, 0u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STW, 1, input, 3u, 14u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWU, 0, input, 3u, 14u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWX, 0, input, 3u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWX, 1, input, 3u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWUX, 0, input, 3u, 14u, 23u); - //VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVLX, 0, input, 0u, 0u, 23u); CRASH - //VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVLX, 1, input, 0u, 14u, 23u); CRASH - //VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVLX, 2, input, 0u, 21u, 23u); CRASH - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWBRX, 0, input, 3u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STD, 0, input, 3u, 0u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STD, 1, input, 3u, 14u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STDU, 0, input, 3u, 14u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STDX, 0, input, 3u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STDX, 1, input, 3u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWCX_, 0, input, 3u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWCX_, 1, input, 3u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STDUX, 0, input, 3u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFS, 0, input, 3u, 0u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFS, 1, input, 3u, 14u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFSU, 0, input, 3u, 14u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFSX, 0, input, 3u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFSX, 1, input, 3u, 14u, 23u); - //VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVRX, 0, input, 0u, 0u, 23u); CRASH - //VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVRX, 1, input, 0u, 14u, 23u); CRASH - //VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVRX, 2, input, 0u, 21u, 23u); CRASH - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFSUX, 0, input, 3u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFD, 0, input, 3u, 0u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFD, 1, input, 3u, 14u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFDU, 0, input, 3u, 14u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFDX, 0, input, 3u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFDX, 1, input, 3u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFDUX, 0, input, 3u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFIWX, 0, input, 3u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVX, 0, input, 5u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVX, 1, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVXL, 0, input, 5u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVXL, 1, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVEBX, 0, input, 5u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVEBX, 1, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVEHX, 0, input, 5u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVEHX, 1, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVEWX, 0, input, 5u, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVEWX, 1, input, 5u, 14u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STMW, 0, input, 5u, 0u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STMW, 1, input, 16u, 14u, 0x10000); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STSWI, 0, input, 5u, 23u, 0u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STSWI, 1, input, 5u, 23u, 2u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STSWI, 2, input, 5u, 23u, 7u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STSWI, 3, input, 5u, 23u, 25u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(DCBZ, 0, input, 0u, 23u); - VERIFY_INSTRUCTION_AGAINST_INTERPRETER(DCBZ, 1, input, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STB, 0, input, 3u, 0u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STB, 1, input, 3u, 14u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STBU, 0, input, 3u, 14u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STDCX_, 0, input, 3u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STDCX_, 1, input, 3u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STBX, 0, input, 3u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STBX, 1, input, 3u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STBUX, 0, input, 3u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STH, 0, input, 3u, 0u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STH, 1, input, 3u, 14u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STHU, 0, input, 3u, 14u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STHX, 0, input, 3u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STHX, 1, input, 3u, 14u, 23u); + //VERIFY_INSTRUCTION_AGAINST_INTERPRETER(ECOWX, 0, input, 3u, 0u, 23u); + //VERIFY_INSTRUCTION_AGAINST_INTERPRETER(ECOWX, 1, input, 3u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STHUX, 0, input, 3u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STHBRX, 0, input, 3u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STW, 0, input, 3u, 0u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STW, 1, input, 3u, 14u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWU, 0, input, 3u, 14u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWX, 0, input, 3u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWX, 1, input, 3u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWUX, 0, input, 3u, 14u, 23u); + //VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVLX, 0, input, 0u, 0u, 23u); CRASH + //VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVLX, 1, input, 0u, 14u, 23u); CRASH + //VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVLX, 2, input, 0u, 21u, 23u); CRASH + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWBRX, 0, input, 3u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STD, 0, input, 3u, 0u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STD, 1, input, 3u, 14u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STDU, 0, input, 3u, 14u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STDX, 0, input, 3u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STDX, 1, input, 3u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWCX_, 0, input, 3u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STWCX_, 1, input, 3u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STDUX, 0, input, 3u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFS, 0, input, 3u, 0u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFS, 1, input, 3u, 14u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFSU, 0, input, 3u, 14u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFSX, 0, input, 3u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFSX, 1, input, 3u, 14u, 23u); + //VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVRX, 0, input, 0u, 0u, 23u); CRASH + //VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVRX, 1, input, 0u, 14u, 23u); CRASH + //VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVRX, 2, input, 0u, 21u, 23u); CRASH + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFSUX, 0, input, 3u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFD, 0, input, 3u, 0u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFD, 1, input, 3u, 14u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFDU, 0, input, 3u, 14u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFDX, 0, input, 3u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFDX, 1, input, 3u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFDUX, 0, input, 3u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STFIWX, 0, input, 3u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVX, 0, input, 5u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVX, 1, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVXL, 0, input, 5u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVXL, 1, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVEBX, 0, input, 5u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVEBX, 1, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVEHX, 0, input, 5u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVEHX, 1, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVEWX, 0, input, 5u, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STVEWX, 1, input, 5u, 14u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STMW, 0, input, 5u, 0u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STMW, 1, input, 16u, 14u, 0x10000); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STSWI, 0, input, 5u, 23u, 0u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STSWI, 1, input, 5u, 23u, 2u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STSWI, 2, input, 5u, 23u, 7u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(STSWI, 3, input, 5u, 23u, 25u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(DCBZ, 0, input, 0u, 23u); + VERIFY_INSTRUCTION_AGAINST_INTERPRETER(DCBZ, 1, input, 14u, 23u); - m_recompilation_engine.Log() << "Finished Unit Tests\n"; + m_recompilation_engine.Log() << "Finished Unit Tests\n"; #endif // PPU_LLVM_RECOMPILER_UNIT_TESTS -} +} \ No newline at end of file