diff --git a/include/xenia/cpu/codegen/function_generator.h b/include/xenia/cpu/codegen/function_generator.h index 600a63403..635f713e4 100644 --- a/include/xenia/cpu/codegen/function_generator.h +++ b/include/xenia/cpu/codegen/function_generator.h @@ -91,7 +91,8 @@ public: private: void GenerateSharedBlocks(); - void GenerateBasicBlock(sdb::FunctionBlock* block, llvm::BasicBlock* bb); + void PrepareBasicBlock(sdb::FunctionBlock* block); + void GenerateBasicBlock(sdb::FunctionBlock* block); void setup_xer(); void setup_lr(); @@ -118,8 +119,9 @@ private: std::map bbs_; // Address of the instruction being generated. - uint32_t cia_; + uint32_t cia_; + ppc::InstrAccessBits access_bits_; struct { llvm::Value* indirection_target; llvm::Value* indirection_cia; diff --git a/include/xenia/cpu/ppc/instr.h b/include/xenia/cpu/ppc/instr.h index ac0200e56..af2fa2052 100644 --- a/include/xenia/cpu/ppc/instr.h +++ b/include/xenia/cpu/ppc/instr.h @@ -250,6 +250,25 @@ typedef struct { } InstrOperand; +class InstrAccessBits { +public: + InstrAccessBits() : spr(0), cr(0), gpr(0), fpr(0) {} + + // Bitmasks derived from the accesses to registers. + // Format is 2 bits for each register, even bits indicating reads and odds + // indicating writes. + uint64_t spr; // fpcsr/ctr/lr/xer + uint64_t cr; // cr7/6/5/4/3/2/1/0 + uint64_t gpr; // r31-0 + uint64_t fpr; // f31-0 + + void Clear(); + void Extend(InstrAccessBits& other); + void MarkAccess(InstrRegister& reg); + void Dump(std::string& out_str); +}; + + class InstrDisasm { public: enum Flags { @@ -263,6 +282,7 @@ public: char info[64]; std::vector operands; std::vector special_registers; + InstrAccessBits access_bits; void Init(std::string name, std::string info, uint32_t flags); void AddLR(InstrRegister::Access access); @@ -274,11 +294,6 @@ public: void AddUImmOperand(uint64_t value, size_t width, std::string display = ""); int Finish(); - // TODO(benvanik): fast checks - uint64_t reg_mask; - uint64_t gpr_mask; - uint64_t fpr_mask; - void Dump(std::string& str, size_t pad = 8); }; diff --git a/src/cpu/codegen/emit_memory.cc b/src/cpu/codegen/emit_memory.cc index dc8b6e119..903e38b22 100644 --- a/src/cpu/codegen/emit_memory.cc +++ b/src/cpu/codegen/emit_memory.cc @@ -845,12 +845,22 @@ XEEMITTER(dcbst, 0x7C00006C, X )(FunctionGenerator& g, IRBuilder<>& b, I return 1; } +XEDISASMR(dcbt, 0x7C00022C, X )(InstrData& i, InstrDisasm& d) { + d.Init("dcbt", "Data Cache Block Touch", 0); + // TODO + return d.Finish(); +} XEEMITTER(dcbt, 0x7C00022C, X )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { // No-op for now. // TODO(benvanik): use @llvm.prefetch return 0; } +XEDISASMR(dcbtst, 0x7C0001EC, X )(InstrData& i, InstrDisasm& d) { + d.Init("dcbtst", "Data Cache Block Touch for Store", 0); + // TODO + return d.Finish(); +} XEEMITTER(dcbtst, 0x7C0001EC, X )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { // No-op for now. // TODO(benvanik): use @llvm.prefetch @@ -947,8 +957,8 @@ void RegisterEmitCategoryMemory() { XEREGISTEREMITTER(stfsx, 0x7C00052E); XEREGISTEREMITTER(dcbf, 0x7C0000AC); XEREGISTEREMITTER(dcbst, 0x7C00006C); - XEREGISTEREMITTER(dcbt, 0x7C00022C); - XEREGISTEREMITTER(dcbtst, 0x7C0001EC); + XEREGISTERINSTR(dcbt, 0x7C00022C); + XEREGISTERINSTR(dcbtst, 0x7C0001EC); XEREGISTEREMITTER(dcbz, 0x7C0007EC); XEREGISTEREMITTER(icbi, 0x7C0007AC); } diff --git a/src/cpu/codegen/function_generator.cc b/src/cpu/codegen/function_generator.cc index 61d88db34..852e6f829 100644 --- a/src/cpu/codegen/function_generator.cc +++ b/src/cpu/codegen/function_generator.cc @@ -61,6 +61,8 @@ FunctionGenerator::FunctionGenerator( external_indirection_block_ = NULL; bb_ = NULL; + access_bits_.Clear(); + locals_.indirection_target = NULL; locals_.indirection_cia = NULL; @@ -146,21 +148,18 @@ void FunctionGenerator::GenerateBasicBlocks() { return_block_ = BasicBlock::Create(*context_, "return", gen_fn_); // Pass 1 creates all of the blocks - this way we can branch to them. + // We also track registers used so that when know which ones to fill/spill. for (std::map::iterator it = fn_->blocks.begin(); it != fn_->blocks.end(); ++it) { FunctionBlock* block = it->second; - - char name[32]; - xesnprintfa(name, XECOUNT(name), "loc_%.8X", block->start_address); - BasicBlock* bb = BasicBlock::Create(*context_, name, gen_fn_); - bbs_.insert(std::pair(block->start_address, bb)); + PrepareBasicBlock(block); } // Pass 2 fills in instructions. for (std::map::iterator it = fn_->blocks.begin(); it != fn_->blocks.end(); ++it) { FunctionBlock* block = it->second; - GenerateBasicBlock(block, GetBasicBlock(block->start_address)); + GenerateBasicBlock(block); } // Setup the shared return/indirection/etc blocks now that we know all the @@ -215,10 +214,49 @@ void FunctionGenerator::GenerateSharedBlocks() { } } -void FunctionGenerator::GenerateBasicBlock(FunctionBlock* block, - BasicBlock* bb) { +void FunctionGenerator::PrepareBasicBlock(FunctionBlock* block) { + // Create the basic block that will end up getting filled during + // generation. + char name[32]; + xesnprintfa(name, XECOUNT(name), "loc_%.8X", block->start_address); + BasicBlock* bb = BasicBlock::Create(*context_, name, gen_fn_); + bbs_.insert(std::pair(block->start_address, bb)); + + // Scan and disassemble each instruction in the block to get accurate + // register access bits. In the future we could do other optimization checks + // in this pass. + // TODO(benvanik): perhaps we want to stash this for each basic block? + // We could use this for faster checking of cr/ca checks/etc. + InstrAccessBits access_bits; + uint8_t* p = xe_memory_addr(memory_, 0); + for (uint32_t ia = block->start_address; ia <= block->end_address; ia += 4) { + InstrData i; + i.address = ia; + i.code = XEGETUINT32BE(p + ia); + i.type = ppc::GetInstrType(i.code); + + // Ignore unknown or ones with no disassembler fn. + if (!i.type || !i.type->disassemble) { + continue; + } + + ppc::InstrDisasm d; + i.type->disassemble(i, d); + + // Accumulate access bits. + access_bits.Extend(d.access_bits); + } + + // Add in access bits to function access bits. + access_bits_.Extend(access_bits); +} + +void FunctionGenerator::GenerateBasicBlock(FunctionBlock* block) { IRBuilder<>& b = *builder_; + BasicBlock* bb = GetBasicBlock(block->start_address); + XEASSERTNOTNULL(bb); + printf(" bb %.8X-%.8X:\n", block->start_address, block->end_address); fn_block_ = block; @@ -268,7 +306,7 @@ void FunctionGenerator::GenerateBasicBlock(FunctionBlock* block, d.Dump(disasm); printf(" %.8X: %.8X %s\n", ia, i.code, disasm.c_str()); } else { - printf(" %.8X: %.8X %s\n", ia, i.code, i.type->name); + printf(" %.8X: %.8X %s ???\n", ia, i.code, i.type->name); } // TODO(benvanik): debugging information? source/etc? diff --git a/src/cpu/ppc/instr.cc b/src/cpu/ppc/instr.cc index 628af95d7..6bb3dcc4f 100644 --- a/src/cpu/ppc/instr.cc +++ b/src/cpu/ppc/instr.cc @@ -17,9 +17,136 @@ using namespace xe::cpu::ppc; +void InstrAccessBits::Clear() { + spr = cr = gpr = fpr = 0; +} + +void InstrAccessBits::Extend(InstrAccessBits& other) { + spr |= other.spr; + cr |= other.cr; + gpr |= other.gpr; + fpr |= other.fpr; + } + +void InstrAccessBits::MarkAccess(InstrRegister& reg) { + uint64_t bits = 0; + if (reg.access & InstrRegister::kRead) { + bits |= 0x1; + } + if (reg.access & InstrRegister::kWrite) { + bits |= 0x2; + } + + switch (reg.set) { + case InstrRegister::kXER: + spr |= bits << (2 * 0); + break; + case InstrRegister::kLR: + spr |= bits << (2 * 1); + break; + case InstrRegister::kCTR: + spr |= bits << (2 * 2); + break; + case InstrRegister::kCR: + cr |= bits << (2 * reg.ordinal); + break; + case InstrRegister::kFPSCR: + spr |= bits << (2 * 3); + break; + case InstrRegister::kGPR: + gpr |= bits << (2 * reg.ordinal); + break; + case InstrRegister::kFPR: + fpr |= bits << (2 * reg.ordinal); + break; + default: + case InstrRegister::kVMX: + XEASSERTALWAYS(); + break; + } +} + +void InstrAccessBits::Dump(std::string& out_str) { + std::stringstream str; + if (spr) { + uint64_t spr_t = spr; + if (spr_t & 0x3) { + str << "XER ["; + str << ((spr_t & 1) ? "R" : " "); + str << ((spr_t & 2) ? "W" : " "); + str << "] "; + } + spr_t >>= 2; + if (spr_t & 0x3) { + str << "LR ["; + str << ((spr_t & 1) ? "R" : " "); + str << ((spr_t & 2) ? "W" : " "); + str << "] "; + } + spr_t >>= 2; + if (spr_t & 0x3) { + str << "CTR ["; + str << ((spr_t & 1) ? "R" : " "); + str << ((spr_t & 2) ? "W" : " "); + str << "] "; + } + spr_t >>= 2; + if (spr_t & 0x3) { + str << "FPCSR ["; + str << ((spr_t & 1) ? "R" : " "); + str << ((spr_t & 2) ? "W" : " "); + str << "] "; + } + spr_t >>= 2; + } + + if (cr) { + uint64_t cr_t = cr; + for (size_t n = 0; n < 8; n++) { + if (cr_t & 0x3) { + str << "cr" << n << " ["; + str << ((cr_t & 1) ? "R" : " "); + str << ((cr_t & 2) ? "W" : " "); + str << "] "; + } + cr_t >>= 2; + } + } + + if (gpr) { + uint64_t gpr_t = gpr; + for (size_t n = 0; n < 32; n++) { + if (gpr_t & 0x3) { + str << "r" << n << " ["; + str << ((gpr_t & 1) ? "R" : " "); + str << ((gpr_t & 2) ? "W" : " "); + str << "] "; + } + gpr_t >>= 2; + } + } + + if (fpr) { + uint64_t fpr_t = fpr; + for (size_t n = 0; n < 32; n++) { + if (fpr_t & 0x3) { + str << "f" << n << " ["; + str << ((fpr_t & 1) ? "R" : " "); + str << ((fpr_t & 2) ? "W" : " "); + str << "] "; + } + fpr_t >>= 2; + } + } + + out_str = str.str(); +} + + void InstrDisasm::Init(std::string name, std::string info, uint32_t flags) { operands.clear(); special_registers.clear(); + access_bits.Clear(); if (flags & InstrDisasm::kOE) { name += "o"; @@ -173,11 +300,16 @@ void InstrDisasm::AddUImmOperand(uint64_t value, size_t width, } int InstrDisasm::Finish() { - // TODO(benvanik): setup fast checks - reg_mask = 0; - gpr_mask = 0; - fpr_mask = 0; - + for (std::vector::iterator it = operands.begin(); + it != operands.end(); ++it) { + if (it->type == InstrOperand::kRegister) { + access_bits.MarkAccess(it->reg); + } + } + for (std::vector::iterator it = special_registers.begin(); + it != special_registers.end(); ++it) { + access_bits.MarkAccess(*it); + } return 0; }