diff --git a/src/cpu/backend/interpreter/interpreter_backend.cc b/src/cpu/backend/interpreter/interpreter_backend.cc index 8ace32d8..17594dec 100644 --- a/src/cpu/backend/interpreter/interpreter_backend.cc +++ b/src/cpu/backend/interpreter/interpreter_backend.cc @@ -97,9 +97,6 @@ IntInstr *AssembleContext::TranslateInstr(Instr &ir_i) { i->fn = GetCallback(ir_i.op(), GetSignature(ir_i), GetAccessMask(ir_i)); - i->guest_addr = ir_i.guest_addr; - i->guest_op = ir_i.guest_op; - return i; } diff --git a/src/cpu/backend/interpreter/interpreter_backend.h b/src/cpu/backend/interpreter/interpreter_backend.h index 0d621f8f..66e36654 100644 --- a/src/cpu/backend/interpreter/interpreter_backend.h +++ b/src/cpu/backend/interpreter/interpreter_backend.h @@ -65,8 +65,6 @@ union IntValue { struct IntInstr { IntFn fn; IntValue arg[4]; - intptr_t guest_addr; - intptr_t guest_op; }; class AssembleContext { diff --git a/src/cpu/frontend/sh4/sh4_builder.cc b/src/cpu/frontend/sh4/sh4_builder.cc index d5bce63b..e8f0049a 100644 --- a/src/cpu/frontend/sh4/sh4_builder.cc +++ b/src/cpu/frontend/sh4/sh4_builder.cc @@ -11,7 +11,11 @@ using namespace dreavm::cpu::ir; using namespace dreavm::emu; SH4Builder::SH4Builder(Memory &memory) - : memory_(memory), has_delay_instr_(false), last_instr_(nullptr) {} + : memory_(memory), + has_delay_instr_(false), + preserve_offset_(-1), + preserve_mask_(0), + offset_preserved_(false) {} SH4Builder::~SH4Builder() {} @@ -36,15 +40,7 @@ void SH4Builder::Emit(uint32_t start_addr) { } // emit the current instruction - printf("SH4::Emit %s\n", instr.type->name); - (emit_callbacks[instr.type->op])(*this, instr); - - // find the first ir instruction emitted for this op - ir::Instr *emitted = GetFirstEmittedInstr(); - if (emitted) { - emitted->guest_addr = addr; - emitted->guest_op = instr.type->op; - } + (emit_callbacks[instr.type->op])(*this, fpu_state_, instr); // delayed instructions will be emitted already by the instructions handler addr += delayed ? 4 : 2; @@ -81,48 +77,13 @@ void SH4Builder::Emit(uint32_t start_addr) { SetMetadata(MD_GUEST_CYCLES, AllocConstant(guest_cycles)); } -void SH4Builder::DumpToFile(uint32_t start_addr) { - char filename[PATH_MAX]; - snprintf(filename, sizeof(filename), "../dreamcast/0x%x.bin", start_addr); - - printf("DUMPING 0x%x to %s\n", start_addr, filename); - - FILE *fp = fopen(filename, "wb"); - if (!fp) { - printf("FAILED TO OPEN FILE HANDLE\n"); - return; - } - - uint32_t addr = start_addr; - while (true) { - uint16_t opcode = memory_.R16(addr); - Instr instr(addr, opcode); - bool delayed = instr.type->flags & OP_FLAG_DELAYED; - - fwrite(&opcode, 2, 1, fp); - - if (delayed) { - uint16_t delay_opcode = memory_.R16(addr + 2); - fwrite(&delay_opcode, 2, 1, fp); - } - - if (instr.type->flags & OP_FLAG_BRANCH) { - break; - } - - addr += delayed ? 4 : 2; - } - - fclose(fp); -} - Value *SH4Builder::LoadRegister(int n, ValueTy type) { return LoadContext(offsetof(SH4Context, r[n]), type); } void SH4Builder::StoreRegister(int n, Value *v) { CHECK_EQ(v->type(), VALUE_I32); - return StoreContext(offsetof(SH4Context, r[n]), v); + return StoreAndPreserveContext(offsetof(SH4Context, r[n]), v); } Value *SH4Builder::LoadRegisterF(int n, ValueTy type) { @@ -130,7 +91,7 @@ Value *SH4Builder::LoadRegisterF(int n, ValueTy type) { } void SH4Builder::StoreRegisterF(int n, Value *v) { - return StoreContext(offsetof(SH4Context, fr[n]), v); + return StoreAndPreserveContext(offsetof(SH4Context, fr[n]), v); } Value *SH4Builder::LoadRegisterXF(int n, ValueTy type) { @@ -138,7 +99,7 @@ Value *SH4Builder::LoadRegisterXF(int n, ValueTy type) { } void SH4Builder::StoreRegisterXF(int n, Value *v) { - return StoreContext(offsetof(SH4Context, xf[n]), v); + return StoreAndPreserveContext(offsetof(SH4Context, xf[n]), v); } Value *SH4Builder::LoadSR() { @@ -147,7 +108,7 @@ Value *SH4Builder::LoadSR() { void SH4Builder::StoreSR(Value *v) { CHECK_EQ(v->type(), VALUE_I32); - StoreContext(offsetof(SH4Context, sr), v, IF_INVALIDATE_CONTEXT); + StoreAndPreserveContext(offsetof(SH4Context, sr), v, IF_INVALIDATE_CONTEXT); CallExternal((ExternalFn)&SRUpdated); } @@ -163,7 +124,7 @@ Value *SH4Builder::LoadGBR() { } void SH4Builder::StoreGBR(Value *v) { - StoreContext(offsetof(SH4Context, gbr), v); + StoreAndPreserveContext(offsetof(SH4Context, gbr), v); } ir::Value *SH4Builder::LoadFPSCR() { @@ -175,55 +136,75 @@ ir::Value *SH4Builder::LoadFPSCR() { void SH4Builder::StoreFPSCR(ir::Value *v) { CHECK_EQ(v->type(), VALUE_I32); v = And(v, AllocConstant(0x003fffff)); - StoreContext(offsetof(SH4Context, fpscr), v); + StoreAndPreserveContext(offsetof(SH4Context, fpscr), v); CallExternal((ExternalFn)&FPSCRUpdated); } +ir::Value *SH4Builder::LoadPR() { + return LoadContext(offsetof(SH4Context, pr), VALUE_I32); +} + +void SH4Builder::StorePR(ir::Value *v) { + CHECK_EQ(v->type(), VALUE_I32); + StoreAndPreserveContext(offsetof(SH4Context, pr), v); +} + +void SH4Builder::PreserveT() { + preserve_offset_ = offsetof(SH4Context, sr); + preserve_mask_ = T; +} + +void SH4Builder::PreservePR() { + preserve_offset_ = offsetof(SH4Context, pr); + preserve_mask_ = 0; +} + +void SH4Builder::PreserveRegister(int n) { + preserve_offset_ = offsetof(SH4Context, r[n]); + preserve_mask_ = 0; +} + +Value *SH4Builder::LoadPreserved() { + Value *v = offset_preserved_ + // if the offset had to be preserved, load it up + ? LoadContext(offsetof(SH4Context, preserve), VALUE_I32) + // else, load from its original location + : LoadContext(preserve_offset_, VALUE_I32); + + if (preserve_mask_) { + v = And(v, AllocConstant(preserve_mask_)); + } + + // reset preserve state + preserve_offset_ = -1; + preserve_mask_ = 0; + offset_preserved_ = false; + + return v; +} + void SH4Builder::EmitDelayInstr() { CHECK_EQ(has_delay_instr_, true) << "No delay instruction available"; has_delay_instr_ = false; - printf("SH4::Emit %s\n", delay_instr_.type->name); - (emit_callbacks[delay_instr_.type->op])(*this, delay_instr_); - - ir::Instr *emitted = GetFirstEmittedInstr(); - if (emitted) { - emitted->guest_addr = delay_instr_.addr; - emitted->guest_op = delay_instr_.type->op; - } + (emit_callbacks[delay_instr_.type->op])(*this, fpu_state_, delay_instr_); } -// FIXME simplify this -ir::Instr *SH4Builder::GetFirstEmittedInstr() { - ir::Instr *first = last_instr_; - - // find the first instruction since the tail of the last batch - if (!first && current_block_ && current_block_->instrs().head()) { - first = current_block_->instrs().head(); - } else if (first && first->next()) { - first = first->next(); - } else if (first && first->block()->next() && - first->block()->next()->instrs().head()) { - first = first->block()->next()->instrs().head(); +// When emitting an instruction in the delay slot, it's possible that it will +// overwrite a register needed by the original branch instruction. The branch +// emitter can request that a register be preserved with PreserveT, etc. before +// the delay slot, and if it is overwritten, the register is cached off. After +// emitting the delay slot, the branch emitter can call LoadPreserved to load +// either the original value, or if that was overwritten, the cached value. +void SH4Builder::StoreAndPreserveContext(size_t offset, Value *v, + InstrFlag flags) { + // if a register that needs to be preserved is overwritten, cache it + if (offset == preserve_offset_) { + CHECK(!offset_preserved_) << "Can only preserve a single value"; + StoreContext(offsetof(SH4Context, preserve), + LoadContext(offset, VALUE_I32)); + offset_preserved_ = true; } - // nothing new emitted - if (first == last_instr_) { - return nullptr; - } - - // move to end of this batch of emitted instructions - last_instr_ = first; - if (last_instr_) { - while (last_instr_->block()->next() && - last_instr_->block()->next()->instrs().head()) { - last_instr_ = last_instr_->block()->next()->instrs().head(); - } - while (last_instr_->next()) { - last_instr_ = last_instr_->next(); - } - } - CHECK_NOTNULL(last_instr_); - - return first; + StoreContext(offset, v, flags); } diff --git a/src/cpu/frontend/sh4/sh4_builder.h b/src/cpu/frontend/sh4/sh4_builder.h index 62cbdd94..db38be1d 100644 --- a/src/cpu/frontend/sh4/sh4_builder.h +++ b/src/cpu/frontend/sh4/sh4_builder.h @@ -38,8 +38,7 @@ class SH4Builder : public ir::IRBuilder { SH4Builder(emu::Memory &memory); ~SH4Builder(); - void Emit(uint32_t start_addr); - void DumpToFile(uint32_t start_addr); + void Emit(uint32_t start_addr, const SH4Context &ctx); ir::Value *LoadRegister(int n, ir::ValueTy type); void StoreRegister(int n, ir::Value *v); @@ -55,16 +54,26 @@ class SH4Builder : public ir::IRBuilder { void StoreGBR(ir::Value *v); ir::Value *LoadFPSCR(); void StoreFPSCR(ir::Value *v); + ir::Value *LoadPR(); + void StorePR(ir::Value *v); + + void PreserveT(); + void PreservePR(); + void PreserveRegister(int n); + ir::Value *LoadPreserved(); void EmitDelayInstr(); private: - ir::Instr *GetFirstEmittedInstr(); + void StoreAndPreserveContext(size_t offset, ir::Value *v, + ir::InstrFlag flags = ir::IF_NONE); emu::Memory &memory_; Instr delay_instr_; bool has_delay_instr_; - ir::Instr *last_instr_; + size_t preserve_offset_; + uint32_t preserve_mask_; + bool offset_preserved_; }; } } diff --git a/src/cpu/frontend/sh4/sh4_emit.cc b/src/cpu/frontend/sh4/sh4_emit.cc index b3fa46f6..08487d2f 100644 --- a/src/cpu/frontend/sh4/sh4_emit.cc +++ b/src/cpu/frontend/sh4/sh4_emit.cc @@ -1069,9 +1069,11 @@ EMITTER(BF) { // 1000 1111 dddd dddd 3/1 - // BFS disp EMITTER(BFS) { - uint32_t dest_addr = i.addr + 4 + (int8_t)i.disp * 2; - Value *cond = b.LoadT(); + b.PreserveT(); b.EmitDelayInstr(); + Value *cond = b.LoadPreserved(); + + uint32_t dest_addr = i.addr + 4 + (int8_t)i.disp * 2; b.BranchFalse(cond, b.AllocConstant(dest_addr)); } @@ -1088,9 +1090,11 @@ EMITTER(BT) { // 1000 1101 dddd dddd 2/1 - // BTS disp EMITTER(BTS) { - uint32_t dest_addr = i.addr + 4 + (int8_t)i.disp * 2; - Value *cond = b.LoadT(); + b.PreserveT(); b.EmitDelayInstr(); + Value *cond = b.LoadPreserved(); + + uint32_t dest_addr = i.addr + 4 + (int8_t)i.disp * 2; b.BranchTrue(cond, b.AllocConstant(dest_addr)); } @@ -1098,10 +1102,11 @@ EMITTER(BTS) { // 1010 dddd dddd dddd 2 - // BRA disp EMITTER(BRA) { + b.EmitDelayInstr(); + int32_t disp = ((i.disp & 0xfff) << 20) >> 20; // 12-bit displacement must be sign extended uint32_t dest_addr = i.addr + 4 + disp * 2; - b.EmitDelayInstr(); b.Branch(b.AllocConstant(dest_addr)); } @@ -1109,9 +1114,11 @@ EMITTER(BRA) { // 0000 mmmm 0010 0011 2 - // BRAF Rn EMITTER(BRAF) { - Value *rn = b.LoadRegister(i.Rn, VALUE_I32); - Value *dest_addr = b.Add(b.AllocConstant(i.addr + 4), rn); + b.PreserveRegister(i.Rn); b.EmitDelayInstr(); + Value *rn = b.LoadPreserved(); + + Value *dest_addr = b.Add(b.AllocConstant(i.addr + 4), rn); b.Branch(dest_addr); } @@ -1119,12 +1126,13 @@ EMITTER(BRAF) { // 1011 dddd dddd dddd 2 - // BSR disp EMITTER(BSR) { + b.EmitDelayInstr(); + int32_t disp = ((i.disp & 0xfff) << 20) >> 20; // 12-bit displacement must be sign extended uint32_t ret_addr = i.addr + 4; uint32_t dest_addr = ret_addr + disp * 2; - b.EmitDelayInstr(); - b.StoreContext(offsetof(SH4Context, pr), b.AllocConstant(ret_addr)); + b.StorePR(b.AllocConstant(ret_addr)); b.Branch(b.AllocConstant(dest_addr)); } @@ -1132,34 +1140,42 @@ EMITTER(BSR) { // 0000 mmmm 0000 0011 2 - // BSRF Rn EMITTER(BSRF) { - Value *rn = b.LoadRegister(i.Rn, VALUE_I32); + b.PreserveRegister(i.Rn); + b.EmitDelayInstr(); + Value *rn = b.LoadPreserved(); + Value *ret_addr = b.AllocConstant(i.addr + 4); Value *dest_addr = b.Add(rn, ret_addr); - b.EmitDelayInstr(); - b.StoreContext(offsetof(SH4Context, pr), ret_addr); + b.StorePR(ret_addr); b.Branch(dest_addr); } // JMP @Rm EMITTER(JMP) { - Value *dest_addr = b.LoadRegister(i.Rn, VALUE_I32); + b.PreserveRegister(i.Rn); b.EmitDelayInstr(); + Value *dest_addr = b.LoadPreserved(); + b.Branch(dest_addr); } // JSR @Rn EMITTER(JSR) { - Value *ret_addr = b.AllocConstant(i.addr + 4); - Value *dest_addr = b.LoadRegister(i.Rn, VALUE_I32); + b.PreserveRegister(i.Rn); b.EmitDelayInstr(); - b.StoreContext(offsetof(SH4Context, pr), ret_addr); + Value *dest_addr = b.LoadPreserved(); + + Value *ret_addr = b.AllocConstant(i.addr + 4); + b.StorePR(ret_addr); b.Branch(dest_addr); } // RTS EMITTER(RTS) { + b.PreservePR(); b.EmitDelayInstr(); - Value *dest_addr = b.LoadContext(offsetof(SH4Context, pr), VALUE_I32); + Value *dest_addr = b.LoadPreserved(); + b.Branch(dest_addr); } @@ -1337,7 +1353,7 @@ EMITTER(LDSMACL) { // LDS Rm,PR EMITTER(LDSPR) { Value *v = b.LoadRegister(i.Rm, VALUE_I32); - b.StoreContext(offsetof(SH4Context, pr), v); + b.StorePR(v); } // LDS.L @Rm+,MACH @@ -1360,7 +1376,7 @@ EMITTER(LDSMMACL) { EMITTER(LDSMPR) { Value *addr = b.LoadRegister(i.Rm, VALUE_I32); Value *v = b.Load(addr, VALUE_I32); - b.StoreContext(offsetof(SH4Context, pr), v); + b.StorePR(v); b.StoreRegister(i.Rm, b.Add(addr, b.AllocConstant(4))); } @@ -1620,7 +1636,7 @@ EMITTER(STSMACL) { // STS PR,Rn EMITTER(STSPR) { - Value *v = b.LoadContext(offsetof(SH4Context, pr), VALUE_I32); + Value *v = b.LoadPR(); b.StoreRegister(i.Rn, v); } @@ -1647,7 +1663,7 @@ EMITTER(STSMPR) { Value *addr = b.Sub(b.LoadRegister(i.Rn, VALUE_I32), b.AllocConstant(4)); b.StoreRegister(i.Rn, addr); - Value *pr = b.LoadContext(offsetof(SH4Context, pr), VALUE_I32); + Value *pr = b.LoadPR(); b.Store(addr, pr); } diff --git a/src/cpu/ir/ir_builder.h b/src/cpu/ir/ir_builder.h index 3de551ea..9c9cd54f 100644 --- a/src/cpu/ir/ir_builder.h +++ b/src/cpu/ir/ir_builder.h @@ -278,10 +278,6 @@ class Instr : public core::IntrusiveListNode { intptr_t tag() const { return tag_; } void set_tag(intptr_t tag) { tag_ = tag; } - // temp debug variables - intptr_t guest_addr; - intptr_t guest_op; - private: Block *set_block(Block *block) { return block_ = block; } diff --git a/src/cpu/ir/passes/validate_pass.cc b/src/cpu/ir/passes/validate_pass.cc index e0fb08bd..a6dd9a5c 100644 --- a/src/cpu/ir/passes/validate_pass.cc +++ b/src/cpu/ir/passes/validate_pass.cc @@ -8,37 +8,31 @@ using namespace dreavm::cpu::ir::passes; void ValidatePass::Run(IRBuilder &builder) { PROFILER_RUNTIME("ValidatePass::Run"); - int cnt = 0; - for (auto block : builder.blocks()) { - ValidateBlock(cnt, builder, block); + ValidateBlock(builder, block); } } -void ValidatePass::ValidateBlock(int &cnt, IRBuilder &builder, Block *block) { +void ValidatePass::ValidateBlock(IRBuilder &builder, Block *block) { Instr *tail = block->instrs().tail(); CHECK(tail && IRBuilder::IsTerminator(tail)) << "Block ends in a non-terminating instruction"; for (auto instr : block->instrs()) { - ValidateInstr(cnt, builder, block, instr); + ValidateInstr(builder, block, instr); } } -void ValidatePass::ValidateInstr(int &cnt, IRBuilder &builder, Block *block, +void ValidatePass::ValidateInstr(IRBuilder &builder, Block *block, Instr *instr) { - // Value *result = instr->result(); + Value *result = instr->result(); - // if (result) { - // for (auto ref : result->refs()) { - // CHECK_EQ(ref->instr()->block(), block) - // << "Instruction result is referenced by multiple blocks, values can - // " - // "only be used in the block they're declared in " - // << Opnames[ref->instr()->op()] << " " << cnt; - // } - // } - - // cnt++; + if (result) { + for (auto ref : result->refs()) { + CHECK_EQ(ref->instr()->block(), block) + << "Instruction result is referenced by multiple blocks, values can " + "only be used in the block they're declared in"; + } + } } diff --git a/src/cpu/ir/passes/validate_pass.h b/src/cpu/ir/passes/validate_pass.h index 139427ce..6cfdafd8 100644 --- a/src/cpu/ir/passes/validate_pass.h +++ b/src/cpu/ir/passes/validate_pass.h @@ -11,8 +11,8 @@ namespace passes { class ValidatePass : public Pass { public: void Run(IRBuilder &builder); - void ValidateBlock(int &cnt, IRBuilder &builder, Block *block); - void ValidateInstr(int &cnt, IRBuilder &builder, Block *block, Instr *instr); + void ValidateBlock(IRBuilder &builder, Block *block); + void ValidateInstr(IRBuilder &builder, Block *block, Instr *instr); }; } } diff --git a/src/cpu/sh4_context.h b/src/cpu/sh4_context.h index bc0aab1d..d82e54d9 100644 --- a/src/cpu/sh4_context.h +++ b/src/cpu/sh4_context.h @@ -51,6 +51,7 @@ struct SH4Context { uint32_t dbr; uint32_t sq[2][8]; uint32_t sq_ext_addr[2]; + uint32_t preserve; SR_T sr, ssr, old_sr; FPSCR_T fpscr, old_fpscr; };