dynarec: skip single branch targets
This commit is contained in:
parent
3f4993979f
commit
2d8bc6d6ee
|
@ -13,6 +13,7 @@
|
|||
#include "hw/sh4/sh4_opcode_list.h"
|
||||
#include "hw/sh4/sh4_core.h"
|
||||
#include "hw/sh4/sh4_mem.h"
|
||||
#include "hw/sh4/modules/mmu.h"
|
||||
#include "decoder_opcodes.h"
|
||||
|
||||
#define BLOCK_MAX_SH_OPS_SOFT 500
|
||||
|
@ -101,13 +102,13 @@ static void dec_DynamicSet(u32 regbase,u32 offs=0)
|
|||
Emit(shop_jdyn,reg_pc_dyn,mk_reg((Sh4RegType)regbase),mk_imm(offs));
|
||||
}
|
||||
|
||||
static void dec_End(u32 dst,BlockEndType flags,bool delay)
|
||||
static void dec_End(u32 dst, BlockEndType flags, bool delaySlot)
|
||||
{
|
||||
if (state.ngen.OnlyDynamicEnds && flags == BET_StaticJump)
|
||||
{
|
||||
Emit(shop_mov32,mk_reg(reg_nextpc),mk_imm(dst));
|
||||
Emit(shop_mov32, mk_reg(reg_nextpc), mk_imm(dst));
|
||||
dec_DynamicSet(reg_nextpc);
|
||||
dec_End(0xFFFFFFFF,BET_DynamicJump,delay);
|
||||
dec_End(NullAddress, BET_DynamicJump, delaySlot);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -116,11 +117,14 @@ static void dec_End(u32 dst,BlockEndType flags,bool delay)
|
|||
verify(flags == BET_DynamicJump);
|
||||
}
|
||||
|
||||
state.BlockType=flags;
|
||||
state.NextOp=delay?NDO_Delayslot:NDO_End;
|
||||
state.DelayOp=NDO_End;
|
||||
state.JumpAddr=dst;
|
||||
state.NextAddr=state.cpu.rpc+2+(delay?2:0);
|
||||
state.BlockType = flags;
|
||||
state.NextOp = delaySlot ? NDO_Delayslot : NDO_End;
|
||||
state.DelayOp = NDO_End;
|
||||
state.JumpAddr = dst;
|
||||
if (flags != BET_StaticCall && flags != BET_StaticJump)
|
||||
state.NextAddr = state.cpu.rpc + 2 + (delaySlot ? 2 : 0);
|
||||
else
|
||||
verify(state.JumpAddr != NullAddress);
|
||||
}
|
||||
|
||||
#define GetN(str) ((str>>8) & 0xf)
|
||||
|
@ -128,9 +132,6 @@ static void dec_End(u32 dst,BlockEndType flags,bool delay)
|
|||
#define GetImm4(str) ((str>>0) & 0xf)
|
||||
#define GetImm8(str) ((str>>0) & 0xff)
|
||||
#define GetSImm8(str) ((s8)((str>>0) & 0xff))
|
||||
#define GetImm12(str) ((str>>0) & 0xfff)
|
||||
#define GetSImm12(str) (((s16)((GetImm12(str))<<4))>>4)
|
||||
|
||||
|
||||
#define SR_STATUS_MASK 0x700083F2
|
||||
#define SR_T_MASK 1
|
||||
|
@ -189,7 +190,7 @@ sh4dec(i0000_nnnn_0010_0011)
|
|||
u32 n = GetN(op);
|
||||
|
||||
dec_DynamicSet(reg_r0+n,state.cpu.rpc + 4);
|
||||
dec_End(0xFFFFFFFF,BET_DynamicJump,true);
|
||||
dec_End(NullAddress, BET_DynamicJump, true);
|
||||
}
|
||||
//jmp @<REG_N>
|
||||
sh4dec(i0100_nnnn_0010_1011)
|
||||
|
@ -197,39 +198,36 @@ sh4dec(i0100_nnnn_0010_1011)
|
|||
u32 n = GetN(op);
|
||||
|
||||
dec_DynamicSet(reg_r0+n);
|
||||
dec_End(0xFFFFFFFF,BET_DynamicJump,true);
|
||||
dec_End(NullAddress, BET_DynamicJump, true);
|
||||
}
|
||||
//bsr <bdisp12>
|
||||
sh4dec(i1011_iiii_iiii_iiii)
|
||||
{
|
||||
//TODO: set PR
|
||||
dec_set_pr();
|
||||
dec_End(dec_jump_simm12(op),BET_StaticCall,true);
|
||||
dec_End(dec_jump_simm12(op), BET_StaticCall, true);
|
||||
}
|
||||
//bsrf <REG_N>
|
||||
sh4dec(i0000_nnnn_0000_0011)
|
||||
{
|
||||
u32 n = GetN(op);
|
||||
//TODO: set PR
|
||||
u32 retaddr=dec_set_pr();
|
||||
dec_DynamicSet(reg_r0+n,retaddr);
|
||||
dec_End(0xFFFFFFFF,BET_DynamicCall,true);
|
||||
dec_End(NullAddress, BET_DynamicCall, true);
|
||||
}
|
||||
//jsr @<REG_N>
|
||||
sh4dec(i0100_nnnn_0000_1011)
|
||||
{
|
||||
u32 n = GetN(op);
|
||||
|
||||
//TODO: Set pr
|
||||
dec_set_pr();
|
||||
dec_DynamicSet(reg_r0+n);
|
||||
dec_End(0xFFFFFFFF,BET_DynamicCall,true);
|
||||
dec_End(NullAddress, BET_DynamicCall, true);
|
||||
}
|
||||
//rts
|
||||
sh4dec(i0000_0000_0000_1011)
|
||||
{
|
||||
dec_DynamicSet(reg_pr);
|
||||
dec_End(0xFFFFFFFF,BET_DynamicRet,true);
|
||||
dec_End(NullAddress, BET_DynamicRet, true);
|
||||
}
|
||||
//rte
|
||||
sh4dec(i0000_0000_0010_1011)
|
||||
|
@ -238,7 +236,7 @@ sh4dec(i0000_0000_0010_1011)
|
|||
dec_write_sr(reg_ssr);
|
||||
Emit(shop_sync_sr);
|
||||
dec_DynamicSet(reg_spc);
|
||||
dec_End(0xFFFFFFFF,BET_DynamicIntr,true);
|
||||
dec_End(NullAddress, BET_DynamicIntr, true);
|
||||
}
|
||||
//trapa #<imm>
|
||||
sh4dec(i1100_0011_iiii_iiii)
|
||||
|
@ -246,7 +244,7 @@ sh4dec(i1100_0011_iiii_iiii)
|
|||
//TODO: ifb
|
||||
dec_fallback(op);
|
||||
dec_DynamicSet(reg_nextpc);
|
||||
dec_End(0xFFFFFFFF,BET_DynamicJump,false);
|
||||
dec_End(NullAddress, BET_DynamicJump, false);
|
||||
}
|
||||
//sleep
|
||||
sh4dec(i0000_0000_0001_1011)
|
||||
|
@ -254,7 +252,7 @@ sh4dec(i0000_0000_0001_1011)
|
|||
//TODO: ifb
|
||||
dec_fallback(op);
|
||||
dec_DynamicSet(reg_nextpc);
|
||||
dec_End(0xFFFFFFFF,BET_DynamicJump,false);
|
||||
dec_End(NullAddress, BET_DynamicJump, false);
|
||||
}
|
||||
|
||||
//ldc.l @<REG_N>+,SR
|
||||
|
@ -272,7 +270,7 @@ sh4dec(i0100_nnnn_0000_0111)
|
|||
//FIXME only if interrupts got on .. :P
|
||||
UpdateINTC();
|
||||
}
|
||||
dec_End(0xFFFFFFFF,BET_StaticIntr,false);
|
||||
dec_End(NullAddress,BET_StaticIntr,false);
|
||||
}
|
||||
*/
|
||||
|
||||
|
@ -283,7 +281,7 @@ sh4dec(i0100_nnnn_0000_1110)
|
|||
|
||||
dec_write_sr((Sh4RegType)(reg_r0+n));
|
||||
Emit(shop_sync_sr);
|
||||
dec_End(0xFFFFFFFF,BET_StaticIntr,false);
|
||||
dec_End(NullAddress, BET_StaticIntr, false);
|
||||
}
|
||||
|
||||
//nop !
|
||||
|
@ -975,16 +973,29 @@ static void state_Setup(u32 rpc,fpscr_t fpu_cfg)
|
|||
//verify(fpu_cfg.RM<2); // Happens with many wince games (set to 3)
|
||||
//what about fp/fs ?
|
||||
|
||||
state.NextOp=NDO_NextOp;
|
||||
state.BlockType=BET_SCL_Intr;
|
||||
state.JumpAddr=0xFFFFFFFF;
|
||||
state.NextAddr=0xFFFFFFFF;
|
||||
state.NextOp = NDO_NextOp;
|
||||
state.BlockType = BET_SCL_Intr;
|
||||
state.JumpAddr = NullAddress;
|
||||
state.NextAddr = NullAddress;
|
||||
|
||||
state.info.has_readm=false;
|
||||
state.info.has_writem=false;
|
||||
state.info.has_fpu=false;
|
||||
}
|
||||
|
||||
void dec_updateBlockCycles(RuntimeBlockInfo *block, u16 op)
|
||||
{
|
||||
if (!mmu_enabled())
|
||||
{
|
||||
if (op < 0xF000)
|
||||
block->guest_cycles++;
|
||||
}
|
||||
else
|
||||
{
|
||||
block->guest_cycles += std::max((int)OpDesc[op]->LatencyCycles, 1);
|
||||
}
|
||||
}
|
||||
|
||||
bool dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles)
|
||||
{
|
||||
blk=rbi;
|
||||
|
@ -1015,15 +1026,8 @@ bool dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles)
|
|||
u32 op = IReadMem16(state.cpu.rpc);
|
||||
|
||||
blk->guest_opcodes++;
|
||||
if (!mmu_enabled())
|
||||
{
|
||||
if (op < 0xF000)
|
||||
blk->guest_cycles++;
|
||||
}
|
||||
else
|
||||
{
|
||||
blk->guest_cycles += std::max((int)OpDesc[op]->LatencyCycles, 1);
|
||||
}
|
||||
dec_updateBlockCycles(blk, op);
|
||||
|
||||
if (OpDesc[op]->IsFloatingPoint())
|
||||
{
|
||||
if (sr.FD == 1)
|
||||
|
@ -1045,11 +1049,11 @@ bool dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles)
|
|||
if (OpDesc[op]->SetPC())
|
||||
{
|
||||
dec_DynamicSet(reg_nextpc);
|
||||
dec_End(0xFFFFFFFF,BET_DynamicJump,false);
|
||||
dec_End(NullAddress, BET_DynamicJump, false);
|
||||
}
|
||||
if (OpDesc[op]->SetFPSCR() && !state.cpu.is_delayslot)
|
||||
else if (OpDesc[op]->SetFPSCR() && !state.cpu.is_delayslot)
|
||||
{
|
||||
dec_End(state.cpu.rpc+2,BET_StaticJump,false);
|
||||
dec_End(state.cpu.rpc + 2, BET_StaticJump, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1062,13 +1066,33 @@ bool dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles)
|
|||
}
|
||||
break;
|
||||
|
||||
case NDO_Jump:
|
||||
die("Too old");
|
||||
//state.NextOp=state.JumpOp;
|
||||
//state.cpu.rpc=state.JumpAddr;
|
||||
break;
|
||||
|
||||
case NDO_End:
|
||||
// Disabled for now since we need to know if the block is read-only,
|
||||
// which isn't determined until after the decoding.
|
||||
// This is a relatively rare optimization anyway
|
||||
#if 0
|
||||
// detect if calling an empty subroutine and skip it
|
||||
if (state.BlockType == BET_StaticCall && blk->read_only)
|
||||
{
|
||||
if ((state.JumpAddr >> 12) == (blk->vaddr >> 12)
|
||||
|| (state.JumpAddr >> 12) == ((blk->vaddr + (blk->guest_opcodes - 1) * 2) >> 12))
|
||||
{
|
||||
u32 op = IReadMem16(state.JumpAddr);
|
||||
if (op == 0x000B) // rts
|
||||
{
|
||||
u16 delayOp = IReadMem16(state.JumpAddr + 2);
|
||||
if (delayOp == 0x0000 || delayOp == 0x0009) // nop
|
||||
{
|
||||
state.NextOp = NDO_NextOp;
|
||||
state.cpu.is_delayslot = false;
|
||||
dec_updateBlockCycles(blk, op);
|
||||
dec_updateBlockCycles(blk, delayOp);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
goto _end;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,7 +35,6 @@ enum NextDecoderOperation
|
|||
NDO_NextOp, //pc+=2
|
||||
NDO_End, //End the block, Type = BlockEndType
|
||||
NDO_Delayslot, //pc+=2, NextOp=DelayOp
|
||||
NDO_Jump, //pc=JumpAddr,NextOp=JumpOp
|
||||
};
|
||||
//ngen features
|
||||
struct ngen_features
|
||||
|
@ -46,12 +45,12 @@ struct ngen_features
|
|||
|
||||
struct RuntimeBlockInfo;
|
||||
bool dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles);
|
||||
void dec_updateBlockCycles(RuntimeBlockInfo *block, u16 op);
|
||||
|
||||
struct state_t
|
||||
{
|
||||
NextDecoderOperation NextOp;
|
||||
NextDecoderOperation DelayOp;
|
||||
NextDecoderOperation JumpOp;
|
||||
u32 JumpAddr;
|
||||
u32 NextAddr;
|
||||
BlockEndType BlockType;
|
||||
|
@ -73,5 +72,8 @@ struct state_t
|
|||
bool has_writem;
|
||||
bool has_fpu;
|
||||
} info;
|
||||
};
|
||||
|
||||
} ;
|
||||
const u32 NullAddress = 0xFFFFFFFF;
|
||||
#define GetImm12(str) ((str>>0) & 0xfff)
|
||||
#define GetSImm12(str) (((short)((GetImm12(str))<<4))>>4)
|
||||
|
|
|
@ -149,8 +149,8 @@ bool RuntimeBlockInfo::Setup(u32 rpc,fpscr_t rfpu_cfg)
|
|||
pBranchBlock=pNextBlock=0;
|
||||
code=0;
|
||||
has_jcond=false;
|
||||
BranchBlock = 0xFFFFFFFF;
|
||||
NextBlock = 0xFFFFFFFF;
|
||||
BranchBlock = NullAddress;
|
||||
NextBlock = NullAddress;
|
||||
BlockType = BET_SCL_Intr;
|
||||
has_fpu_op = false;
|
||||
temp_block = false;
|
||||
|
|
|
@ -249,7 +249,7 @@ bool SSAOptimizer::ExecuteConstOp(shil_opcode* op)
|
|||
block->BranchBlock = block->NextBlock;
|
||||
}
|
||||
block->BlockType = BET_StaticJump;
|
||||
block->NextBlock = 0xFFFFFFFF;
|
||||
block->NextBlock = NullAddress;
|
||||
block->has_jcond = false;
|
||||
// same remark regarding jdyn as in the previous case
|
||||
block->oplist.erase(block->oplist.begin() + opnum);
|
||||
|
|
|
@ -50,6 +50,7 @@ public:
|
|||
CombineShiftsPass();
|
||||
DeadRegisterPass();
|
||||
IdentityMovePass();
|
||||
SingleBranchTargetPass();
|
||||
|
||||
#if DEBUG
|
||||
if (stats.prop_constants > 0 || stats.dead_code_ops > 0 || stats.constant_ops_replaced > 0
|
||||
|
@ -714,6 +715,50 @@ private:
|
|||
}
|
||||
}
|
||||
|
||||
bool skipSingleBranchTarget(u32& addr, bool updateCycles)
|
||||
{
|
||||
if (addr == NullAddress)
|
||||
return false;
|
||||
bool success = false;
|
||||
while (true)
|
||||
{
|
||||
if ((addr >> 12) != (block->vaddr >> 12)
|
||||
&& (addr >> 12) != ((block->vaddr + (block->guest_opcodes - 1) * 2) >> 12))
|
||||
break;
|
||||
|
||||
u32 op = IReadMem16(addr);
|
||||
// Axxx: bra <bdisp12>
|
||||
if ((op & 0xF000) != 0xA000)
|
||||
break;
|
||||
|
||||
u16 delayOp = IReadMem16(addr + 2);
|
||||
if (delayOp != 0x0000 && delayOp != 0x0009) // nop
|
||||
break;
|
||||
|
||||
int disp = GetSImm12(op) * 2 + 4;
|
||||
if (disp == 0)
|
||||
// infiniloop
|
||||
break;
|
||||
addr += disp;
|
||||
if (updateCycles)
|
||||
{
|
||||
dec_updateBlockCycles(block, op);
|
||||
dec_updateBlockCycles(block, delayOp);
|
||||
}
|
||||
success = true;
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
void SingleBranchTargetPass()
|
||||
{
|
||||
if (block->read_only)
|
||||
{
|
||||
bool updateCycles = !skipSingleBranchTarget(block->BranchBlock, true);
|
||||
skipSingleBranchTarget(block->NextBlock, updateCycles);
|
||||
}
|
||||
}
|
||||
|
||||
RuntimeBlockInfo* block;
|
||||
std::set<RegValue> writeback_values;
|
||||
|
||||
|
|
Loading…
Reference in New Issue