dynarec: skip single branch targets
This commit is contained in:
parent
3f4993979f
commit
2d8bc6d6ee
|
@ -13,6 +13,7 @@
|
||||||
#include "hw/sh4/sh4_opcode_list.h"
|
#include "hw/sh4/sh4_opcode_list.h"
|
||||||
#include "hw/sh4/sh4_core.h"
|
#include "hw/sh4/sh4_core.h"
|
||||||
#include "hw/sh4/sh4_mem.h"
|
#include "hw/sh4/sh4_mem.h"
|
||||||
|
#include "hw/sh4/modules/mmu.h"
|
||||||
#include "decoder_opcodes.h"
|
#include "decoder_opcodes.h"
|
||||||
|
|
||||||
#define BLOCK_MAX_SH_OPS_SOFT 500
|
#define BLOCK_MAX_SH_OPS_SOFT 500
|
||||||
|
@ -101,13 +102,13 @@ static void dec_DynamicSet(u32 regbase,u32 offs=0)
|
||||||
Emit(shop_jdyn,reg_pc_dyn,mk_reg((Sh4RegType)regbase),mk_imm(offs));
|
Emit(shop_jdyn,reg_pc_dyn,mk_reg((Sh4RegType)regbase),mk_imm(offs));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void dec_End(u32 dst,BlockEndType flags,bool delay)
|
static void dec_End(u32 dst, BlockEndType flags, bool delaySlot)
|
||||||
{
|
{
|
||||||
if (state.ngen.OnlyDynamicEnds && flags == BET_StaticJump)
|
if (state.ngen.OnlyDynamicEnds && flags == BET_StaticJump)
|
||||||
{
|
{
|
||||||
Emit(shop_mov32,mk_reg(reg_nextpc),mk_imm(dst));
|
Emit(shop_mov32, mk_reg(reg_nextpc), mk_imm(dst));
|
||||||
dec_DynamicSet(reg_nextpc);
|
dec_DynamicSet(reg_nextpc);
|
||||||
dec_End(0xFFFFFFFF,BET_DynamicJump,delay);
|
dec_End(NullAddress, BET_DynamicJump, delaySlot);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -116,11 +117,14 @@ static void dec_End(u32 dst,BlockEndType flags,bool delay)
|
||||||
verify(flags == BET_DynamicJump);
|
verify(flags == BET_DynamicJump);
|
||||||
}
|
}
|
||||||
|
|
||||||
state.BlockType=flags;
|
state.BlockType = flags;
|
||||||
state.NextOp=delay?NDO_Delayslot:NDO_End;
|
state.NextOp = delaySlot ? NDO_Delayslot : NDO_End;
|
||||||
state.DelayOp=NDO_End;
|
state.DelayOp = NDO_End;
|
||||||
state.JumpAddr=dst;
|
state.JumpAddr = dst;
|
||||||
state.NextAddr=state.cpu.rpc+2+(delay?2:0);
|
if (flags != BET_StaticCall && flags != BET_StaticJump)
|
||||||
|
state.NextAddr = state.cpu.rpc + 2 + (delaySlot ? 2 : 0);
|
||||||
|
else
|
||||||
|
verify(state.JumpAddr != NullAddress);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define GetN(str) ((str>>8) & 0xf)
|
#define GetN(str) ((str>>8) & 0xf)
|
||||||
|
@ -128,9 +132,6 @@ static void dec_End(u32 dst,BlockEndType flags,bool delay)
|
||||||
#define GetImm4(str) ((str>>0) & 0xf)
|
#define GetImm4(str) ((str>>0) & 0xf)
|
||||||
#define GetImm8(str) ((str>>0) & 0xff)
|
#define GetImm8(str) ((str>>0) & 0xff)
|
||||||
#define GetSImm8(str) ((s8)((str>>0) & 0xff))
|
#define GetSImm8(str) ((s8)((str>>0) & 0xff))
|
||||||
#define GetImm12(str) ((str>>0) & 0xfff)
|
|
||||||
#define GetSImm12(str) (((s16)((GetImm12(str))<<4))>>4)
|
|
||||||
|
|
||||||
|
|
||||||
#define SR_STATUS_MASK 0x700083F2
|
#define SR_STATUS_MASK 0x700083F2
|
||||||
#define SR_T_MASK 1
|
#define SR_T_MASK 1
|
||||||
|
@ -189,7 +190,7 @@ sh4dec(i0000_nnnn_0010_0011)
|
||||||
u32 n = GetN(op);
|
u32 n = GetN(op);
|
||||||
|
|
||||||
dec_DynamicSet(reg_r0+n,state.cpu.rpc + 4);
|
dec_DynamicSet(reg_r0+n,state.cpu.rpc + 4);
|
||||||
dec_End(0xFFFFFFFF,BET_DynamicJump,true);
|
dec_End(NullAddress, BET_DynamicJump, true);
|
||||||
}
|
}
|
||||||
//jmp @<REG_N>
|
//jmp @<REG_N>
|
||||||
sh4dec(i0100_nnnn_0010_1011)
|
sh4dec(i0100_nnnn_0010_1011)
|
||||||
|
@ -197,39 +198,36 @@ sh4dec(i0100_nnnn_0010_1011)
|
||||||
u32 n = GetN(op);
|
u32 n = GetN(op);
|
||||||
|
|
||||||
dec_DynamicSet(reg_r0+n);
|
dec_DynamicSet(reg_r0+n);
|
||||||
dec_End(0xFFFFFFFF,BET_DynamicJump,true);
|
dec_End(NullAddress, BET_DynamicJump, true);
|
||||||
}
|
}
|
||||||
//bsr <bdisp12>
|
//bsr <bdisp12>
|
||||||
sh4dec(i1011_iiii_iiii_iiii)
|
sh4dec(i1011_iiii_iiii_iiii)
|
||||||
{
|
{
|
||||||
//TODO: set PR
|
|
||||||
dec_set_pr();
|
dec_set_pr();
|
||||||
dec_End(dec_jump_simm12(op),BET_StaticCall,true);
|
dec_End(dec_jump_simm12(op), BET_StaticCall, true);
|
||||||
}
|
}
|
||||||
//bsrf <REG_N>
|
//bsrf <REG_N>
|
||||||
sh4dec(i0000_nnnn_0000_0011)
|
sh4dec(i0000_nnnn_0000_0011)
|
||||||
{
|
{
|
||||||
u32 n = GetN(op);
|
u32 n = GetN(op);
|
||||||
//TODO: set PR
|
|
||||||
u32 retaddr=dec_set_pr();
|
u32 retaddr=dec_set_pr();
|
||||||
dec_DynamicSet(reg_r0+n,retaddr);
|
dec_DynamicSet(reg_r0+n,retaddr);
|
||||||
dec_End(0xFFFFFFFF,BET_DynamicCall,true);
|
dec_End(NullAddress, BET_DynamicCall, true);
|
||||||
}
|
}
|
||||||
//jsr @<REG_N>
|
//jsr @<REG_N>
|
||||||
sh4dec(i0100_nnnn_0000_1011)
|
sh4dec(i0100_nnnn_0000_1011)
|
||||||
{
|
{
|
||||||
u32 n = GetN(op);
|
u32 n = GetN(op);
|
||||||
|
|
||||||
//TODO: Set pr
|
|
||||||
dec_set_pr();
|
dec_set_pr();
|
||||||
dec_DynamicSet(reg_r0+n);
|
dec_DynamicSet(reg_r0+n);
|
||||||
dec_End(0xFFFFFFFF,BET_DynamicCall,true);
|
dec_End(NullAddress, BET_DynamicCall, true);
|
||||||
}
|
}
|
||||||
//rts
|
//rts
|
||||||
sh4dec(i0000_0000_0000_1011)
|
sh4dec(i0000_0000_0000_1011)
|
||||||
{
|
{
|
||||||
dec_DynamicSet(reg_pr);
|
dec_DynamicSet(reg_pr);
|
||||||
dec_End(0xFFFFFFFF,BET_DynamicRet,true);
|
dec_End(NullAddress, BET_DynamicRet, true);
|
||||||
}
|
}
|
||||||
//rte
|
//rte
|
||||||
sh4dec(i0000_0000_0010_1011)
|
sh4dec(i0000_0000_0010_1011)
|
||||||
|
@ -238,7 +236,7 @@ sh4dec(i0000_0000_0010_1011)
|
||||||
dec_write_sr(reg_ssr);
|
dec_write_sr(reg_ssr);
|
||||||
Emit(shop_sync_sr);
|
Emit(shop_sync_sr);
|
||||||
dec_DynamicSet(reg_spc);
|
dec_DynamicSet(reg_spc);
|
||||||
dec_End(0xFFFFFFFF,BET_DynamicIntr,true);
|
dec_End(NullAddress, BET_DynamicIntr, true);
|
||||||
}
|
}
|
||||||
//trapa #<imm>
|
//trapa #<imm>
|
||||||
sh4dec(i1100_0011_iiii_iiii)
|
sh4dec(i1100_0011_iiii_iiii)
|
||||||
|
@ -246,7 +244,7 @@ sh4dec(i1100_0011_iiii_iiii)
|
||||||
//TODO: ifb
|
//TODO: ifb
|
||||||
dec_fallback(op);
|
dec_fallback(op);
|
||||||
dec_DynamicSet(reg_nextpc);
|
dec_DynamicSet(reg_nextpc);
|
||||||
dec_End(0xFFFFFFFF,BET_DynamicJump,false);
|
dec_End(NullAddress, BET_DynamicJump, false);
|
||||||
}
|
}
|
||||||
//sleep
|
//sleep
|
||||||
sh4dec(i0000_0000_0001_1011)
|
sh4dec(i0000_0000_0001_1011)
|
||||||
|
@ -254,7 +252,7 @@ sh4dec(i0000_0000_0001_1011)
|
||||||
//TODO: ifb
|
//TODO: ifb
|
||||||
dec_fallback(op);
|
dec_fallback(op);
|
||||||
dec_DynamicSet(reg_nextpc);
|
dec_DynamicSet(reg_nextpc);
|
||||||
dec_End(0xFFFFFFFF,BET_DynamicJump,false);
|
dec_End(NullAddress, BET_DynamicJump, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
//ldc.l @<REG_N>+,SR
|
//ldc.l @<REG_N>+,SR
|
||||||
|
@ -272,7 +270,7 @@ sh4dec(i0100_nnnn_0000_0111)
|
||||||
//FIXME only if interrupts got on .. :P
|
//FIXME only if interrupts got on .. :P
|
||||||
UpdateINTC();
|
UpdateINTC();
|
||||||
}
|
}
|
||||||
dec_End(0xFFFFFFFF,BET_StaticIntr,false);
|
dec_End(NullAddress,BET_StaticIntr,false);
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@ -283,7 +281,7 @@ sh4dec(i0100_nnnn_0000_1110)
|
||||||
|
|
||||||
dec_write_sr((Sh4RegType)(reg_r0+n));
|
dec_write_sr((Sh4RegType)(reg_r0+n));
|
||||||
Emit(shop_sync_sr);
|
Emit(shop_sync_sr);
|
||||||
dec_End(0xFFFFFFFF,BET_StaticIntr,false);
|
dec_End(NullAddress, BET_StaticIntr, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
//nop !
|
//nop !
|
||||||
|
@ -975,16 +973,29 @@ static void state_Setup(u32 rpc,fpscr_t fpu_cfg)
|
||||||
//verify(fpu_cfg.RM<2); // Happens with many wince games (set to 3)
|
//verify(fpu_cfg.RM<2); // Happens with many wince games (set to 3)
|
||||||
//what about fp/fs ?
|
//what about fp/fs ?
|
||||||
|
|
||||||
state.NextOp=NDO_NextOp;
|
state.NextOp = NDO_NextOp;
|
||||||
state.BlockType=BET_SCL_Intr;
|
state.BlockType = BET_SCL_Intr;
|
||||||
state.JumpAddr=0xFFFFFFFF;
|
state.JumpAddr = NullAddress;
|
||||||
state.NextAddr=0xFFFFFFFF;
|
state.NextAddr = NullAddress;
|
||||||
|
|
||||||
state.info.has_readm=false;
|
state.info.has_readm=false;
|
||||||
state.info.has_writem=false;
|
state.info.has_writem=false;
|
||||||
state.info.has_fpu=false;
|
state.info.has_fpu=false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void dec_updateBlockCycles(RuntimeBlockInfo *block, u16 op)
|
||||||
|
{
|
||||||
|
if (!mmu_enabled())
|
||||||
|
{
|
||||||
|
if (op < 0xF000)
|
||||||
|
block->guest_cycles++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
block->guest_cycles += std::max((int)OpDesc[op]->LatencyCycles, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles)
|
bool dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles)
|
||||||
{
|
{
|
||||||
blk=rbi;
|
blk=rbi;
|
||||||
|
@ -1015,15 +1026,8 @@ bool dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles)
|
||||||
u32 op = IReadMem16(state.cpu.rpc);
|
u32 op = IReadMem16(state.cpu.rpc);
|
||||||
|
|
||||||
blk->guest_opcodes++;
|
blk->guest_opcodes++;
|
||||||
if (!mmu_enabled())
|
dec_updateBlockCycles(blk, op);
|
||||||
{
|
|
||||||
if (op < 0xF000)
|
|
||||||
blk->guest_cycles++;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
blk->guest_cycles += std::max((int)OpDesc[op]->LatencyCycles, 1);
|
|
||||||
}
|
|
||||||
if (OpDesc[op]->IsFloatingPoint())
|
if (OpDesc[op]->IsFloatingPoint())
|
||||||
{
|
{
|
||||||
if (sr.FD == 1)
|
if (sr.FD == 1)
|
||||||
|
@ -1045,11 +1049,11 @@ bool dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles)
|
||||||
if (OpDesc[op]->SetPC())
|
if (OpDesc[op]->SetPC())
|
||||||
{
|
{
|
||||||
dec_DynamicSet(reg_nextpc);
|
dec_DynamicSet(reg_nextpc);
|
||||||
dec_End(0xFFFFFFFF,BET_DynamicJump,false);
|
dec_End(NullAddress, BET_DynamicJump, false);
|
||||||
}
|
}
|
||||||
if (OpDesc[op]->SetFPSCR() && !state.cpu.is_delayslot)
|
else if (OpDesc[op]->SetFPSCR() && !state.cpu.is_delayslot)
|
||||||
{
|
{
|
||||||
dec_End(state.cpu.rpc+2,BET_StaticJump,false);
|
dec_End(state.cpu.rpc + 2, BET_StaticJump, false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1062,13 +1066,33 @@ bool dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles)
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case NDO_Jump:
|
|
||||||
die("Too old");
|
|
||||||
//state.NextOp=state.JumpOp;
|
|
||||||
//state.cpu.rpc=state.JumpAddr;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case NDO_End:
|
case NDO_End:
|
||||||
|
// Disabled for now since we need to know if the block is read-only,
|
||||||
|
// which isn't determined until after the decoding.
|
||||||
|
// This is a relatively rare optimization anyway
|
||||||
|
#if 0
|
||||||
|
// detect if calling an empty subroutine and skip it
|
||||||
|
if (state.BlockType == BET_StaticCall && blk->read_only)
|
||||||
|
{
|
||||||
|
if ((state.JumpAddr >> 12) == (blk->vaddr >> 12)
|
||||||
|
|| (state.JumpAddr >> 12) == ((blk->vaddr + (blk->guest_opcodes - 1) * 2) >> 12))
|
||||||
|
{
|
||||||
|
u32 op = IReadMem16(state.JumpAddr);
|
||||||
|
if (op == 0x000B) // rts
|
||||||
|
{
|
||||||
|
u16 delayOp = IReadMem16(state.JumpAddr + 2);
|
||||||
|
if (delayOp == 0x0000 || delayOp == 0x0009) // nop
|
||||||
|
{
|
||||||
|
state.NextOp = NDO_NextOp;
|
||||||
|
state.cpu.is_delayslot = false;
|
||||||
|
dec_updateBlockCycles(blk, op);
|
||||||
|
dec_updateBlockCycles(blk, delayOp);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
goto _end;
|
goto _end;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,7 +35,6 @@ enum NextDecoderOperation
|
||||||
NDO_NextOp, //pc+=2
|
NDO_NextOp, //pc+=2
|
||||||
NDO_End, //End the block, Type = BlockEndType
|
NDO_End, //End the block, Type = BlockEndType
|
||||||
NDO_Delayslot, //pc+=2, NextOp=DelayOp
|
NDO_Delayslot, //pc+=2, NextOp=DelayOp
|
||||||
NDO_Jump, //pc=JumpAddr,NextOp=JumpOp
|
|
||||||
};
|
};
|
||||||
//ngen features
|
//ngen features
|
||||||
struct ngen_features
|
struct ngen_features
|
||||||
|
@ -46,12 +45,12 @@ struct ngen_features
|
||||||
|
|
||||||
struct RuntimeBlockInfo;
|
struct RuntimeBlockInfo;
|
||||||
bool dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles);
|
bool dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles);
|
||||||
|
void dec_updateBlockCycles(RuntimeBlockInfo *block, u16 op);
|
||||||
|
|
||||||
struct state_t
|
struct state_t
|
||||||
{
|
{
|
||||||
NextDecoderOperation NextOp;
|
NextDecoderOperation NextOp;
|
||||||
NextDecoderOperation DelayOp;
|
NextDecoderOperation DelayOp;
|
||||||
NextDecoderOperation JumpOp;
|
|
||||||
u32 JumpAddr;
|
u32 JumpAddr;
|
||||||
u32 NextAddr;
|
u32 NextAddr;
|
||||||
BlockEndType BlockType;
|
BlockEndType BlockType;
|
||||||
|
@ -73,5 +72,8 @@ struct state_t
|
||||||
bool has_writem;
|
bool has_writem;
|
||||||
bool has_fpu;
|
bool has_fpu;
|
||||||
} info;
|
} info;
|
||||||
|
};
|
||||||
|
|
||||||
} ;
|
const u32 NullAddress = 0xFFFFFFFF;
|
||||||
|
#define GetImm12(str) ((str>>0) & 0xfff)
|
||||||
|
#define GetSImm12(str) (((short)((GetImm12(str))<<4))>>4)
|
||||||
|
|
|
@ -149,8 +149,8 @@ bool RuntimeBlockInfo::Setup(u32 rpc,fpscr_t rfpu_cfg)
|
||||||
pBranchBlock=pNextBlock=0;
|
pBranchBlock=pNextBlock=0;
|
||||||
code=0;
|
code=0;
|
||||||
has_jcond=false;
|
has_jcond=false;
|
||||||
BranchBlock = 0xFFFFFFFF;
|
BranchBlock = NullAddress;
|
||||||
NextBlock = 0xFFFFFFFF;
|
NextBlock = NullAddress;
|
||||||
BlockType = BET_SCL_Intr;
|
BlockType = BET_SCL_Intr;
|
||||||
has_fpu_op = false;
|
has_fpu_op = false;
|
||||||
temp_block = false;
|
temp_block = false;
|
||||||
|
|
|
@ -249,7 +249,7 @@ bool SSAOptimizer::ExecuteConstOp(shil_opcode* op)
|
||||||
block->BranchBlock = block->NextBlock;
|
block->BranchBlock = block->NextBlock;
|
||||||
}
|
}
|
||||||
block->BlockType = BET_StaticJump;
|
block->BlockType = BET_StaticJump;
|
||||||
block->NextBlock = 0xFFFFFFFF;
|
block->NextBlock = NullAddress;
|
||||||
block->has_jcond = false;
|
block->has_jcond = false;
|
||||||
// same remark regarding jdyn as in the previous case
|
// same remark regarding jdyn as in the previous case
|
||||||
block->oplist.erase(block->oplist.begin() + opnum);
|
block->oplist.erase(block->oplist.begin() + opnum);
|
||||||
|
|
|
@ -50,6 +50,7 @@ public:
|
||||||
CombineShiftsPass();
|
CombineShiftsPass();
|
||||||
DeadRegisterPass();
|
DeadRegisterPass();
|
||||||
IdentityMovePass();
|
IdentityMovePass();
|
||||||
|
SingleBranchTargetPass();
|
||||||
|
|
||||||
#if DEBUG
|
#if DEBUG
|
||||||
if (stats.prop_constants > 0 || stats.dead_code_ops > 0 || stats.constant_ops_replaced > 0
|
if (stats.prop_constants > 0 || stats.dead_code_ops > 0 || stats.constant_ops_replaced > 0
|
||||||
|
@ -714,6 +715,50 @@ private:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool skipSingleBranchTarget(u32& addr, bool updateCycles)
|
||||||
|
{
|
||||||
|
if (addr == NullAddress)
|
||||||
|
return false;
|
||||||
|
bool success = false;
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
if ((addr >> 12) != (block->vaddr >> 12)
|
||||||
|
&& (addr >> 12) != ((block->vaddr + (block->guest_opcodes - 1) * 2) >> 12))
|
||||||
|
break;
|
||||||
|
|
||||||
|
u32 op = IReadMem16(addr);
|
||||||
|
// Axxx: bra <bdisp12>
|
||||||
|
if ((op & 0xF000) != 0xA000)
|
||||||
|
break;
|
||||||
|
|
||||||
|
u16 delayOp = IReadMem16(addr + 2);
|
||||||
|
if (delayOp != 0x0000 && delayOp != 0x0009) // nop
|
||||||
|
break;
|
||||||
|
|
||||||
|
int disp = GetSImm12(op) * 2 + 4;
|
||||||
|
if (disp == 0)
|
||||||
|
// infiniloop
|
||||||
|
break;
|
||||||
|
addr += disp;
|
||||||
|
if (updateCycles)
|
||||||
|
{
|
||||||
|
dec_updateBlockCycles(block, op);
|
||||||
|
dec_updateBlockCycles(block, delayOp);
|
||||||
|
}
|
||||||
|
success = true;
|
||||||
|
}
|
||||||
|
return success;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SingleBranchTargetPass()
|
||||||
|
{
|
||||||
|
if (block->read_only)
|
||||||
|
{
|
||||||
|
bool updateCycles = !skipSingleBranchTarget(block->BranchBlock, true);
|
||||||
|
skipSingleBranchTarget(block->NextBlock, updateCycles);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
RuntimeBlockInfo* block;
|
RuntimeBlockInfo* block;
|
||||||
std::set<RegValue> writeback_values;
|
std::set<RegValue> writeback_values;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue