preserve registers when emitting delay slots

This commit is contained in:
Anthony Pesch 2015-08-13 01:14:16 -07:00
parent 5a0642d0a5
commit 6f4aebf144
9 changed files with 136 additions and 144 deletions

View File

@ -97,9 +97,6 @@ IntInstr *AssembleContext::TranslateInstr(Instr &ir_i) {
i->fn = GetCallback(ir_i.op(), GetSignature(ir_i), GetAccessMask(ir_i));
i->guest_addr = ir_i.guest_addr;
i->guest_op = ir_i.guest_op;
return i;
}

View File

@ -65,8 +65,6 @@ union IntValue {
struct IntInstr {
IntFn fn;
IntValue arg[4];
intptr_t guest_addr;
intptr_t guest_op;
};
class AssembleContext {

View File

@ -11,7 +11,11 @@ using namespace dreavm::cpu::ir;
using namespace dreavm::emu;
SH4Builder::SH4Builder(Memory &memory)
: memory_(memory), has_delay_instr_(false), last_instr_(nullptr) {}
: memory_(memory),
has_delay_instr_(false),
preserve_offset_(-1),
preserve_mask_(0),
offset_preserved_(false) {}
SH4Builder::~SH4Builder() {}
@ -36,15 +40,7 @@ void SH4Builder::Emit(uint32_t start_addr) {
}
// emit the current instruction
printf("SH4::Emit %s\n", instr.type->name);
(emit_callbacks[instr.type->op])(*this, instr);
// find the first ir instruction emitted for this op
ir::Instr *emitted = GetFirstEmittedInstr();
if (emitted) {
emitted->guest_addr = addr;
emitted->guest_op = instr.type->op;
}
(emit_callbacks[instr.type->op])(*this, fpu_state_, instr);
// delayed instructions will be emitted already by the instructions handler
addr += delayed ? 4 : 2;
@ -81,48 +77,13 @@ void SH4Builder::Emit(uint32_t start_addr) {
SetMetadata(MD_GUEST_CYCLES, AllocConstant(guest_cycles));
}
void SH4Builder::DumpToFile(uint32_t start_addr) {
char filename[PATH_MAX];
snprintf(filename, sizeof(filename), "../dreamcast/0x%x.bin", start_addr);
printf("DUMPING 0x%x to %s\n", start_addr, filename);
FILE *fp = fopen(filename, "wb");
if (!fp) {
printf("FAILED TO OPEN FILE HANDLE\n");
return;
}
uint32_t addr = start_addr;
while (true) {
uint16_t opcode = memory_.R16(addr);
Instr instr(addr, opcode);
bool delayed = instr.type->flags & OP_FLAG_DELAYED;
fwrite(&opcode, 2, 1, fp);
if (delayed) {
uint16_t delay_opcode = memory_.R16(addr + 2);
fwrite(&delay_opcode, 2, 1, fp);
}
if (instr.type->flags & OP_FLAG_BRANCH) {
break;
}
addr += delayed ? 4 : 2;
}
fclose(fp);
}
Value *SH4Builder::LoadRegister(int n, ValueTy type) {
return LoadContext(offsetof(SH4Context, r[n]), type);
}
void SH4Builder::StoreRegister(int n, Value *v) {
CHECK_EQ(v->type(), VALUE_I32);
return StoreContext(offsetof(SH4Context, r[n]), v);
return StoreAndPreserveContext(offsetof(SH4Context, r[n]), v);
}
Value *SH4Builder::LoadRegisterF(int n, ValueTy type) {
@ -130,7 +91,7 @@ Value *SH4Builder::LoadRegisterF(int n, ValueTy type) {
}
void SH4Builder::StoreRegisterF(int n, Value *v) {
return StoreContext(offsetof(SH4Context, fr[n]), v);
return StoreAndPreserveContext(offsetof(SH4Context, fr[n]), v);
}
Value *SH4Builder::LoadRegisterXF(int n, ValueTy type) {
@ -138,7 +99,7 @@ Value *SH4Builder::LoadRegisterXF(int n, ValueTy type) {
}
void SH4Builder::StoreRegisterXF(int n, Value *v) {
return StoreContext(offsetof(SH4Context, xf[n]), v);
return StoreAndPreserveContext(offsetof(SH4Context, xf[n]), v);
}
Value *SH4Builder::LoadSR() {
@ -147,7 +108,7 @@ Value *SH4Builder::LoadSR() {
void SH4Builder::StoreSR(Value *v) {
CHECK_EQ(v->type(), VALUE_I32);
StoreContext(offsetof(SH4Context, sr), v, IF_INVALIDATE_CONTEXT);
StoreAndPreserveContext(offsetof(SH4Context, sr), v, IF_INVALIDATE_CONTEXT);
CallExternal((ExternalFn)&SRUpdated);
}
@ -163,7 +124,7 @@ Value *SH4Builder::LoadGBR() {
}
void SH4Builder::StoreGBR(Value *v) {
StoreContext(offsetof(SH4Context, gbr), v);
StoreAndPreserveContext(offsetof(SH4Context, gbr), v);
}
ir::Value *SH4Builder::LoadFPSCR() {
@ -175,55 +136,75 @@ ir::Value *SH4Builder::LoadFPSCR() {
void SH4Builder::StoreFPSCR(ir::Value *v) {
CHECK_EQ(v->type(), VALUE_I32);
v = And(v, AllocConstant(0x003fffff));
StoreContext(offsetof(SH4Context, fpscr), v);
StoreAndPreserveContext(offsetof(SH4Context, fpscr), v);
CallExternal((ExternalFn)&FPSCRUpdated);
}
ir::Value *SH4Builder::LoadPR() {
return LoadContext(offsetof(SH4Context, pr), VALUE_I32);
}
void SH4Builder::StorePR(ir::Value *v) {
CHECK_EQ(v->type(), VALUE_I32);
StoreAndPreserveContext(offsetof(SH4Context, pr), v);
}
void SH4Builder::PreserveT() {
preserve_offset_ = offsetof(SH4Context, sr);
preserve_mask_ = T;
}
void SH4Builder::PreservePR() {
preserve_offset_ = offsetof(SH4Context, pr);
preserve_mask_ = 0;
}
void SH4Builder::PreserveRegister(int n) {
preserve_offset_ = offsetof(SH4Context, r[n]);
preserve_mask_ = 0;
}
Value *SH4Builder::LoadPreserved() {
Value *v = offset_preserved_
// if the offset had to be preserved, load it up
? LoadContext(offsetof(SH4Context, preserve), VALUE_I32)
// else, load from its original location
: LoadContext(preserve_offset_, VALUE_I32);
if (preserve_mask_) {
v = And(v, AllocConstant(preserve_mask_));
}
// reset preserve state
preserve_offset_ = -1;
preserve_mask_ = 0;
offset_preserved_ = false;
return v;
}
void SH4Builder::EmitDelayInstr() {
CHECK_EQ(has_delay_instr_, true) << "No delay instruction available";
has_delay_instr_ = false;
printf("SH4::Emit %s\n", delay_instr_.type->name);
(emit_callbacks[delay_instr_.type->op])(*this, delay_instr_);
ir::Instr *emitted = GetFirstEmittedInstr();
if (emitted) {
emitted->guest_addr = delay_instr_.addr;
emitted->guest_op = delay_instr_.type->op;
}
(emit_callbacks[delay_instr_.type->op])(*this, fpu_state_, delay_instr_);
}
// FIXME simplify this
ir::Instr *SH4Builder::GetFirstEmittedInstr() {
ir::Instr *first = last_instr_;
// find the first instruction since the tail of the last batch
if (!first && current_block_ && current_block_->instrs().head()) {
first = current_block_->instrs().head();
} else if (first && first->next()) {
first = first->next();
} else if (first && first->block()->next() &&
first->block()->next()->instrs().head()) {
first = first->block()->next()->instrs().head();
// When emitting an instruction in the delay slot, it's possible that it will
// overwrite a register needed by the original branch instruction. The branch
// emitter can request that a register be preserved with PreserveT, etc. before
// the delay slot, and if it is overwritten, the register is cached off. After
// emitting the delay slot, the branch emitter can call LoadPreserved to load
// either the original value, or if that was overwritten, the cached value.
void SH4Builder::StoreAndPreserveContext(size_t offset, Value *v,
InstrFlag flags) {
// if a register that needs to be preserved is overwritten, cache it
if (offset == preserve_offset_) {
CHECK(!offset_preserved_) << "Can only preserve a single value";
StoreContext(offsetof(SH4Context, preserve),
LoadContext(offset, VALUE_I32));
offset_preserved_ = true;
}
// nothing new emitted
if (first == last_instr_) {
return nullptr;
}
// move to end of this batch of emitted instructions
last_instr_ = first;
if (last_instr_) {
while (last_instr_->block()->next() &&
last_instr_->block()->next()->instrs().head()) {
last_instr_ = last_instr_->block()->next()->instrs().head();
}
while (last_instr_->next()) {
last_instr_ = last_instr_->next();
}
}
CHECK_NOTNULL(last_instr_);
return first;
StoreContext(offset, v, flags);
}

View File

@ -38,8 +38,7 @@ class SH4Builder : public ir::IRBuilder {
SH4Builder(emu::Memory &memory);
~SH4Builder();
void Emit(uint32_t start_addr);
void DumpToFile(uint32_t start_addr);
void Emit(uint32_t start_addr, const SH4Context &ctx);
ir::Value *LoadRegister(int n, ir::ValueTy type);
void StoreRegister(int n, ir::Value *v);
@ -55,16 +54,26 @@ class SH4Builder : public ir::IRBuilder {
void StoreGBR(ir::Value *v);
ir::Value *LoadFPSCR();
void StoreFPSCR(ir::Value *v);
ir::Value *LoadPR();
void StorePR(ir::Value *v);
void PreserveT();
void PreservePR();
void PreserveRegister(int n);
ir::Value *LoadPreserved();
void EmitDelayInstr();
private:
ir::Instr *GetFirstEmittedInstr();
void StoreAndPreserveContext(size_t offset, ir::Value *v,
ir::InstrFlag flags = ir::IF_NONE);
emu::Memory &memory_;
Instr delay_instr_;
bool has_delay_instr_;
ir::Instr *last_instr_;
size_t preserve_offset_;
uint32_t preserve_mask_;
bool offset_preserved_;
};
}
}

View File

@ -1069,9 +1069,11 @@ EMITTER(BF) {
// 1000 1111 dddd dddd 3/1 -
// BFS disp
EMITTER(BFS) {
uint32_t dest_addr = i.addr + 4 + (int8_t)i.disp * 2;
Value *cond = b.LoadT();
b.PreserveT();
b.EmitDelayInstr();
Value *cond = b.LoadPreserved();
uint32_t dest_addr = i.addr + 4 + (int8_t)i.disp * 2;
b.BranchFalse(cond, b.AllocConstant(dest_addr));
}
@ -1088,9 +1090,11 @@ EMITTER(BT) {
// 1000 1101 dddd dddd 2/1 -
// BTS disp
EMITTER(BTS) {
uint32_t dest_addr = i.addr + 4 + (int8_t)i.disp * 2;
Value *cond = b.LoadT();
b.PreserveT();
b.EmitDelayInstr();
Value *cond = b.LoadPreserved();
uint32_t dest_addr = i.addr + 4 + (int8_t)i.disp * 2;
b.BranchTrue(cond, b.AllocConstant(dest_addr));
}
@ -1098,10 +1102,11 @@ EMITTER(BTS) {
// 1010 dddd dddd dddd 2 -
// BRA disp
EMITTER(BRA) {
b.EmitDelayInstr();
int32_t disp = ((i.disp & 0xfff) << 20) >>
20; // 12-bit displacement must be sign extended
uint32_t dest_addr = i.addr + 4 + disp * 2;
b.EmitDelayInstr();
b.Branch(b.AllocConstant(dest_addr));
}
@ -1109,9 +1114,11 @@ EMITTER(BRA) {
// 0000 mmmm 0010 0011 2 -
// BRAF Rn
EMITTER(BRAF) {
Value *rn = b.LoadRegister(i.Rn, VALUE_I32);
Value *dest_addr = b.Add(b.AllocConstant(i.addr + 4), rn);
b.PreserveRegister(i.Rn);
b.EmitDelayInstr();
Value *rn = b.LoadPreserved();
Value *dest_addr = b.Add(b.AllocConstant(i.addr + 4), rn);
b.Branch(dest_addr);
}
@ -1119,12 +1126,13 @@ EMITTER(BRAF) {
// 1011 dddd dddd dddd 2 -
// BSR disp
EMITTER(BSR) {
b.EmitDelayInstr();
int32_t disp = ((i.disp & 0xfff) << 20) >>
20; // 12-bit displacement must be sign extended
uint32_t ret_addr = i.addr + 4;
uint32_t dest_addr = ret_addr + disp * 2;
b.EmitDelayInstr();
b.StoreContext(offsetof(SH4Context, pr), b.AllocConstant(ret_addr));
b.StorePR(b.AllocConstant(ret_addr));
b.Branch(b.AllocConstant(dest_addr));
}
@ -1132,34 +1140,42 @@ EMITTER(BSR) {
// 0000 mmmm 0000 0011 2 -
// BSRF Rn
EMITTER(BSRF) {
Value *rn = b.LoadRegister(i.Rn, VALUE_I32);
b.PreserveRegister(i.Rn);
b.EmitDelayInstr();
Value *rn = b.LoadPreserved();
Value *ret_addr = b.AllocConstant(i.addr + 4);
Value *dest_addr = b.Add(rn, ret_addr);
b.EmitDelayInstr();
b.StoreContext(offsetof(SH4Context, pr), ret_addr);
b.StorePR(ret_addr);
b.Branch(dest_addr);
}
// JMP @Rm
EMITTER(JMP) {
Value *dest_addr = b.LoadRegister(i.Rn, VALUE_I32);
b.PreserveRegister(i.Rn);
b.EmitDelayInstr();
Value *dest_addr = b.LoadPreserved();
b.Branch(dest_addr);
}
// JSR @Rn
EMITTER(JSR) {
Value *ret_addr = b.AllocConstant(i.addr + 4);
Value *dest_addr = b.LoadRegister(i.Rn, VALUE_I32);
b.PreserveRegister(i.Rn);
b.EmitDelayInstr();
b.StoreContext(offsetof(SH4Context, pr), ret_addr);
Value *dest_addr = b.LoadPreserved();
Value *ret_addr = b.AllocConstant(i.addr + 4);
b.StorePR(ret_addr);
b.Branch(dest_addr);
}
// RTS
EMITTER(RTS) {
b.PreservePR();
b.EmitDelayInstr();
Value *dest_addr = b.LoadContext(offsetof(SH4Context, pr), VALUE_I32);
Value *dest_addr = b.LoadPreserved();
b.Branch(dest_addr);
}
@ -1337,7 +1353,7 @@ EMITTER(LDSMACL) {
// LDS Rm,PR
EMITTER(LDSPR) {
Value *v = b.LoadRegister(i.Rm, VALUE_I32);
b.StoreContext(offsetof(SH4Context, pr), v);
b.StorePR(v);
}
// LDS.L @Rm+,MACH
@ -1360,7 +1376,7 @@ EMITTER(LDSMMACL) {
EMITTER(LDSMPR) {
Value *addr = b.LoadRegister(i.Rm, VALUE_I32);
Value *v = b.Load(addr, VALUE_I32);
b.StoreContext(offsetof(SH4Context, pr), v);
b.StorePR(v);
b.StoreRegister(i.Rm, b.Add(addr, b.AllocConstant(4)));
}
@ -1620,7 +1636,7 @@ EMITTER(STSMACL) {
// STS PR,Rn
EMITTER(STSPR) {
Value *v = b.LoadContext(offsetof(SH4Context, pr), VALUE_I32);
Value *v = b.LoadPR();
b.StoreRegister(i.Rn, v);
}
@ -1647,7 +1663,7 @@ EMITTER(STSMPR) {
Value *addr = b.Sub(b.LoadRegister(i.Rn, VALUE_I32), b.AllocConstant(4));
b.StoreRegister(i.Rn, addr);
Value *pr = b.LoadContext(offsetof(SH4Context, pr), VALUE_I32);
Value *pr = b.LoadPR();
b.Store(addr, pr);
}

View File

@ -278,10 +278,6 @@ class Instr : public core::IntrusiveListNode<Instr> {
intptr_t tag() const { return tag_; }
void set_tag(intptr_t tag) { tag_ = tag; }
// temp debug variables
intptr_t guest_addr;
intptr_t guest_op;
private:
Block *set_block(Block *block) { return block_ = block; }

View File

@ -8,37 +8,31 @@ using namespace dreavm::cpu::ir::passes;
void ValidatePass::Run(IRBuilder &builder) {
PROFILER_RUNTIME("ValidatePass::Run");
int cnt = 0;
for (auto block : builder.blocks()) {
ValidateBlock(cnt, builder, block);
ValidateBlock(builder, block);
}
}
void ValidatePass::ValidateBlock(int &cnt, IRBuilder &builder, Block *block) {
void ValidatePass::ValidateBlock(IRBuilder &builder, Block *block) {
Instr *tail = block->instrs().tail();
CHECK(tail && IRBuilder::IsTerminator(tail))
<< "Block ends in a non-terminating instruction";
for (auto instr : block->instrs()) {
ValidateInstr(cnt, builder, block, instr);
ValidateInstr(builder, block, instr);
}
}
void ValidatePass::ValidateInstr(int &cnt, IRBuilder &builder, Block *block,
void ValidatePass::ValidateInstr(IRBuilder &builder, Block *block,
Instr *instr) {
// Value *result = instr->result();
Value *result = instr->result();
// if (result) {
// for (auto ref : result->refs()) {
// CHECK_EQ(ref->instr()->block(), block)
// << "Instruction result is referenced by multiple blocks, values can
// "
// "only be used in the block they're declared in "
// << Opnames[ref->instr()->op()] << " " << cnt;
// }
// }
// cnt++;
if (result) {
for (auto ref : result->refs()) {
CHECK_EQ(ref->instr()->block(), block)
<< "Instruction result is referenced by multiple blocks, values can "
"only be used in the block they're declared in";
}
}
}

View File

@ -11,8 +11,8 @@ namespace passes {
class ValidatePass : public Pass {
public:
void Run(IRBuilder &builder);
void ValidateBlock(int &cnt, IRBuilder &builder, Block *block);
void ValidateInstr(int &cnt, IRBuilder &builder, Block *block, Instr *instr);
void ValidateBlock(IRBuilder &builder, Block *block);
void ValidateInstr(IRBuilder &builder, Block *block, Instr *instr);
};
}
}

View File

@ -51,6 +51,7 @@ struct SH4Context {
uint32_t dbr;
uint32_t sq[2][8];
uint32_t sq_ext_addr[2];
uint32_t preserve;
SR_T sr, ssr, old_sr;
FPSCR_T fpscr, old_fpscr;
};