dynarec: skip single branch targets

This commit is contained in:
Flyinghead 2021-02-14 18:49:40 +01:00
parent 3f4993979f
commit 2d8bc6d6ee
5 changed files with 124 additions and 53 deletions

View File

@ -13,6 +13,7 @@
#include "hw/sh4/sh4_opcode_list.h"
#include "hw/sh4/sh4_core.h"
#include "hw/sh4/sh4_mem.h"
#include "hw/sh4/modules/mmu.h"
#include "decoder_opcodes.h"
#define BLOCK_MAX_SH_OPS_SOFT 500
@ -101,13 +102,13 @@ static void dec_DynamicSet(u32 regbase,u32 offs=0)
Emit(shop_jdyn,reg_pc_dyn,mk_reg((Sh4RegType)regbase),mk_imm(offs));
}
static void dec_End(u32 dst,BlockEndType flags,bool delay)
static void dec_End(u32 dst, BlockEndType flags, bool delaySlot)
{
if (state.ngen.OnlyDynamicEnds && flags == BET_StaticJump)
{
Emit(shop_mov32,mk_reg(reg_nextpc),mk_imm(dst));
Emit(shop_mov32, mk_reg(reg_nextpc), mk_imm(dst));
dec_DynamicSet(reg_nextpc);
dec_End(0xFFFFFFFF,BET_DynamicJump,delay);
dec_End(NullAddress, BET_DynamicJump, delaySlot);
return;
}
@ -116,11 +117,14 @@ static void dec_End(u32 dst,BlockEndType flags,bool delay)
verify(flags == BET_DynamicJump);
}
state.BlockType=flags;
state.NextOp=delay?NDO_Delayslot:NDO_End;
state.DelayOp=NDO_End;
state.JumpAddr=dst;
state.NextAddr=state.cpu.rpc+2+(delay?2:0);
state.BlockType = flags;
state.NextOp = delaySlot ? NDO_Delayslot : NDO_End;
state.DelayOp = NDO_End;
state.JumpAddr = dst;
if (flags != BET_StaticCall && flags != BET_StaticJump)
state.NextAddr = state.cpu.rpc + 2 + (delaySlot ? 2 : 0);
else
verify(state.JumpAddr != NullAddress);
}
#define GetN(str) ((str>>8) & 0xf)
@ -128,9 +132,6 @@ static void dec_End(u32 dst,BlockEndType flags,bool delay)
#define GetImm4(str) ((str>>0) & 0xf)
#define GetImm8(str) ((str>>0) & 0xff)
#define GetSImm8(str) ((s8)((str>>0) & 0xff))
#define GetImm12(str) ((str>>0) & 0xfff)
#define GetSImm12(str) (((s16)((GetImm12(str))<<4))>>4)
#define SR_STATUS_MASK 0x700083F2
#define SR_T_MASK 1
@ -189,7 +190,7 @@ sh4dec(i0000_nnnn_0010_0011)
u32 n = GetN(op);
dec_DynamicSet(reg_r0+n,state.cpu.rpc + 4);
dec_End(0xFFFFFFFF,BET_DynamicJump,true);
dec_End(NullAddress, BET_DynamicJump, true);
}
//jmp @<REG_N>
sh4dec(i0100_nnnn_0010_1011)
@ -197,39 +198,36 @@ sh4dec(i0100_nnnn_0010_1011)
u32 n = GetN(op);
dec_DynamicSet(reg_r0+n);
dec_End(0xFFFFFFFF,BET_DynamicJump,true);
dec_End(NullAddress, BET_DynamicJump, true);
}
//bsr <bdisp12>
sh4dec(i1011_iiii_iiii_iiii)
{
//TODO: set PR
dec_set_pr();
dec_End(dec_jump_simm12(op),BET_StaticCall,true);
dec_End(dec_jump_simm12(op), BET_StaticCall, true);
}
//bsrf <REG_N>
sh4dec(i0000_nnnn_0000_0011)
{
u32 n = GetN(op);
//TODO: set PR
u32 retaddr=dec_set_pr();
dec_DynamicSet(reg_r0+n,retaddr);
dec_End(0xFFFFFFFF,BET_DynamicCall,true);
dec_End(NullAddress, BET_DynamicCall, true);
}
//jsr @<REG_N>
sh4dec(i0100_nnnn_0000_1011)
{
u32 n = GetN(op);
//TODO: Set pr
dec_set_pr();
dec_DynamicSet(reg_r0+n);
dec_End(0xFFFFFFFF,BET_DynamicCall,true);
dec_End(NullAddress, BET_DynamicCall, true);
}
//rts
sh4dec(i0000_0000_0000_1011)
{
dec_DynamicSet(reg_pr);
dec_End(0xFFFFFFFF,BET_DynamicRet,true);
dec_End(NullAddress, BET_DynamicRet, true);
}
//rte
sh4dec(i0000_0000_0010_1011)
@ -238,7 +236,7 @@ sh4dec(i0000_0000_0010_1011)
dec_write_sr(reg_ssr);
Emit(shop_sync_sr);
dec_DynamicSet(reg_spc);
dec_End(0xFFFFFFFF,BET_DynamicIntr,true);
dec_End(NullAddress, BET_DynamicIntr, true);
}
//trapa #<imm>
sh4dec(i1100_0011_iiii_iiii)
@ -246,7 +244,7 @@ sh4dec(i1100_0011_iiii_iiii)
//TODO: ifb
dec_fallback(op);
dec_DynamicSet(reg_nextpc);
dec_End(0xFFFFFFFF,BET_DynamicJump,false);
dec_End(NullAddress, BET_DynamicJump, false);
}
//sleep
sh4dec(i0000_0000_0001_1011)
@ -254,7 +252,7 @@ sh4dec(i0000_0000_0001_1011)
//TODO: ifb
dec_fallback(op);
dec_DynamicSet(reg_nextpc);
dec_End(0xFFFFFFFF,BET_DynamicJump,false);
dec_End(NullAddress, BET_DynamicJump, false);
}
//ldc.l @<REG_N>+,SR
@ -272,7 +270,7 @@ sh4dec(i0100_nnnn_0000_0111)
//FIXME only if interrupts got on .. :P
UpdateINTC();
}
dec_End(0xFFFFFFFF,BET_StaticIntr,false);
dec_End(NullAddress,BET_StaticIntr,false);
}
*/
@ -283,7 +281,7 @@ sh4dec(i0100_nnnn_0000_1110)
dec_write_sr((Sh4RegType)(reg_r0+n));
Emit(shop_sync_sr);
dec_End(0xFFFFFFFF,BET_StaticIntr,false);
dec_End(NullAddress, BET_StaticIntr, false);
}
//nop !
@ -975,16 +973,29 @@ static void state_Setup(u32 rpc,fpscr_t fpu_cfg)
//verify(fpu_cfg.RM<2); // Happens with many wince games (set to 3)
//what about fp/fs ?
state.NextOp=NDO_NextOp;
state.BlockType=BET_SCL_Intr;
state.JumpAddr=0xFFFFFFFF;
state.NextAddr=0xFFFFFFFF;
state.NextOp = NDO_NextOp;
state.BlockType = BET_SCL_Intr;
state.JumpAddr = NullAddress;
state.NextAddr = NullAddress;
state.info.has_readm=false;
state.info.has_writem=false;
state.info.has_fpu=false;
}
void dec_updateBlockCycles(RuntimeBlockInfo *block, u16 op)
{
if (!mmu_enabled())
{
if (op < 0xF000)
block->guest_cycles++;
}
else
{
block->guest_cycles += std::max((int)OpDesc[op]->LatencyCycles, 1);
}
}
bool dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles)
{
blk=rbi;
@ -1015,15 +1026,8 @@ bool dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles)
u32 op = IReadMem16(state.cpu.rpc);
blk->guest_opcodes++;
if (!mmu_enabled())
{
if (op < 0xF000)
blk->guest_cycles++;
}
else
{
blk->guest_cycles += std::max((int)OpDesc[op]->LatencyCycles, 1);
}
dec_updateBlockCycles(blk, op);
if (OpDesc[op]->IsFloatingPoint())
{
if (sr.FD == 1)
@ -1045,11 +1049,11 @@ bool dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles)
if (OpDesc[op]->SetPC())
{
dec_DynamicSet(reg_nextpc);
dec_End(0xFFFFFFFF,BET_DynamicJump,false);
dec_End(NullAddress, BET_DynamicJump, false);
}
if (OpDesc[op]->SetFPSCR() && !state.cpu.is_delayslot)
else if (OpDesc[op]->SetFPSCR() && !state.cpu.is_delayslot)
{
dec_End(state.cpu.rpc+2,BET_StaticJump,false);
dec_End(state.cpu.rpc + 2, BET_StaticJump, false);
}
}
}
@ -1062,13 +1066,33 @@ bool dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles)
}
break;
case NDO_Jump:
die("Too old");
//state.NextOp=state.JumpOp;
//state.cpu.rpc=state.JumpAddr;
break;
case NDO_End:
// Disabled for now since we need to know if the block is read-only,
// which isn't determined until after the decoding.
// This is a relatively rare optimization anyway
#if 0
// detect if calling an empty subroutine and skip it
if (state.BlockType == BET_StaticCall && blk->read_only)
{
if ((state.JumpAddr >> 12) == (blk->vaddr >> 12)
|| (state.JumpAddr >> 12) == ((blk->vaddr + (blk->guest_opcodes - 1) * 2) >> 12))
{
u32 op = IReadMem16(state.JumpAddr);
if (op == 0x000B) // rts
{
u16 delayOp = IReadMem16(state.JumpAddr + 2);
if (delayOp == 0x0000 || delayOp == 0x0009) // nop
{
state.NextOp = NDO_NextOp;
state.cpu.is_delayslot = false;
dec_updateBlockCycles(blk, op);
dec_updateBlockCycles(blk, delayOp);
continue;
}
}
}
}
#endif
goto _end;
}
}

View File

@ -35,7 +35,6 @@ enum NextDecoderOperation
NDO_NextOp, //pc+=2
NDO_End, //End the block, Type = BlockEndType
NDO_Delayslot, //pc+=2, NextOp=DelayOp
NDO_Jump, //pc=JumpAddr,NextOp=JumpOp
};
//ngen features
struct ngen_features
@ -46,12 +45,12 @@ struct ngen_features
struct RuntimeBlockInfo;
bool dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles);
void dec_updateBlockCycles(RuntimeBlockInfo *block, u16 op);
struct state_t
{
NextDecoderOperation NextOp;
NextDecoderOperation DelayOp;
NextDecoderOperation JumpOp;
u32 JumpAddr;
u32 NextAddr;
BlockEndType BlockType;
@ -73,5 +72,8 @@ struct state_t
bool has_writem;
bool has_fpu;
} info;
};
} ;
const u32 NullAddress = 0xFFFFFFFF;
#define GetImm12(str) ((str>>0) & 0xfff)
#define GetSImm12(str) (((short)((GetImm12(str))<<4))>>4)

View File

@ -149,8 +149,8 @@ bool RuntimeBlockInfo::Setup(u32 rpc,fpscr_t rfpu_cfg)
pBranchBlock=pNextBlock=0;
code=0;
has_jcond=false;
BranchBlock = 0xFFFFFFFF;
NextBlock = 0xFFFFFFFF;
BranchBlock = NullAddress;
NextBlock = NullAddress;
BlockType = BET_SCL_Intr;
has_fpu_op = false;
temp_block = false;

View File

@ -249,7 +249,7 @@ bool SSAOptimizer::ExecuteConstOp(shil_opcode* op)
block->BranchBlock = block->NextBlock;
}
block->BlockType = BET_StaticJump;
block->NextBlock = 0xFFFFFFFF;
block->NextBlock = NullAddress;
block->has_jcond = false;
// same remark regarding jdyn as in the previous case
block->oplist.erase(block->oplist.begin() + opnum);

View File

@ -50,6 +50,7 @@ public:
CombineShiftsPass();
DeadRegisterPass();
IdentityMovePass();
SingleBranchTargetPass();
#if DEBUG
if (stats.prop_constants > 0 || stats.dead_code_ops > 0 || stats.constant_ops_replaced > 0
@ -714,6 +715,50 @@ private:
}
}
bool skipSingleBranchTarget(u32& addr, bool updateCycles)
{
if (addr == NullAddress)
return false;
bool success = false;
while (true)
{
if ((addr >> 12) != (block->vaddr >> 12)
&& (addr >> 12) != ((block->vaddr + (block->guest_opcodes - 1) * 2) >> 12))
break;
u32 op = IReadMem16(addr);
// Axxx: bra <bdisp12>
if ((op & 0xF000) != 0xA000)
break;
u16 delayOp = IReadMem16(addr + 2);
if (delayOp != 0x0000 && delayOp != 0x0009) // nop
break;
int disp = GetSImm12(op) * 2 + 4;
if (disp == 0)
// infiniloop
break;
addr += disp;
if (updateCycles)
{
dec_updateBlockCycles(block, op);
dec_updateBlockCycles(block, delayOp);
}
success = true;
}
return success;
}
void SingleBranchTargetPass()
{
if (block->read_only)
{
bool updateCycles = !skipSingleBranchTarget(block->BranchBlock, true);
skipSingleBranchTarget(block->NextBlock, updateCycles);
}
}
RuntimeBlockInfo* block;
std::set<RegValue> writeback_values;