diff --git a/core/hw/sh4/dyna/blockmanager.cpp b/core/hw/sh4/dyna/blockmanager.cpp index dd0a3b818..2392a8a99 100644 --- a/core/hw/sh4/dyna/blockmanager.cpp +++ b/core/hw/sh4/dyna/blockmanager.cpp @@ -18,6 +18,7 @@ //#include "../intc.h" //#include "../tmu.h" #include "hw/sh4/sh4_mem.h" +#include "hw/sh4/sh4_sched.h" #if HOST_OS==OS_LINUX && defined(DYNA_OPROF) @@ -86,6 +87,7 @@ u32 bm_gc_luc,bm_gcf_luc; #define FPCA(x) ((DynarecCodeEntryPtr&)sh4rcb.fpcb[(x>>1)&FPCB_MASK]) +// addr must be a physical address DynarecCodeEntryPtr DYNACALL bm_GetCode(u32 addr) { //rdv_FailedToFindBlock_pc=addr; @@ -94,11 +96,51 @@ DynarecCodeEntryPtr DYNACALL bm_GetCode(u32 addr) return (DynarecCodeEntryPtr)rv; } +// addr must be a virtual address DynarecCodeEntryPtr DYNACALL bm_GetCode2(u32 addr) { - return (DynarecCodeEntryPtr)bm_GetCode(addr); +#ifndef NO_MMU + if (!mmu_enabled()) +#endif + return (DynarecCodeEntryPtr)bm_GetCode(addr); +#ifndef NO_MMU + else + { + if (addr & 1) + { + switch (addr) + { + case 0xfffffde7: // GetTickCount + // This should make this syscall faster + r[0] = sh4_sched_now64() * 1000 / SH4_MAIN_CLOCK; + next_pc = pr; + addr = next_pc; + break; + default: + Do_Exception(addr, 0xE0, 0x100); + addr = next_pc; + break; + } + } + + try { + u32 paddr; + bool shared; + mmu_instruction_translation(addr, paddr, shared); + + return (DynarecCodeEntryPtr)bm_GetCode(paddr); + } catch (SH4ThrownException& ex) { + Do_Exception(addr, ex.expEvn, ex.callVect); + u32 paddr; + bool shared; + mmu_instruction_translation(next_pc, paddr, shared); + return (DynarecCodeEntryPtr)bm_GetCode(paddr); + } + } +#endif } +// addr must be a physical address RuntimeBlockInfo* DYNACALL bm_GetBlock(u32 addr) { DynarecCodeEntryPtr cde=bm_GetCode(addr); @@ -165,6 +207,22 @@ void bm_AddBlock(RuntimeBlockInfo* blk) } +void bm_RemoveBlock(RuntimeBlockInfo* block) +{ + verify((void*)bm_GetCode(block->addr) != (void*)ngen_FailedToFindBlock); + FPCA(block->addr) = ngen_FailedToFindBlock; + auto it = blkmap.find(block); + if (it != blkmap.end()) + blkmap.erase(it); + for (auto it = all_blocks.begin(); it != all_blocks.end(); it++) + if (*it == block) + { + all_blocks.erase(it); + break; + } + delete block; +} + bool UDgreaterX ( RuntimeBlockInfo* elem1, RuntimeBlockInfo* elem2 ) { return elem1->runs > elem2->runs; @@ -594,7 +652,8 @@ void print_blocks() if (f) { fprintf(f,"block: %p\n",blk); - fprintf(f,"addr: %08X\n",blk->addr); + fprintf(f,"vaddr: %08X\n",blk->vaddr); + fprintf(f,"paddr: %08X\n",blk->addr); fprintf(f,"hash: %s\n",blk->hash()); fprintf(f,"hash_rloc: %s\n",blk->hash(false,true)); fprintf(f,"code: %p\n",blk->code); @@ -624,17 +683,21 @@ void print_blocks() if (gcode!=op->guest_offs) { gcode=op->guest_offs; - u32 rpc=blk->addr+gcode; - u16 op=ReadMem16(rpc); + u32 rpc=blk->vaddr+gcode; + try { + u16 op=IReadMem16(rpc); - char temp[128]; - OpDesc[op]->Disassemble(temp,rpc,op); + char temp[128]; + OpDesc[op]->Disassemble(temp,rpc,op); - fprintf(f,"//g:%s\n",temp); + fprintf(f,"//g: %04X %s\n", op, temp); + } catch (SH4ThrownException& ex) { + fprintf(f,"//g: ???? (page fault)\n"); + } } string s=op->dissasm(); - fprintf(f,"//il:%d:%d:%s\n",op->guest_offs,op->host_offs,s.c_str()); + fprintf(f,"//il:%d:%d: %s\n",op->guest_offs,op->host_offs,s.c_str()); } fprint_hex(f,"//h:",pucode,hcode,blk->host_code_size); diff --git a/core/hw/sh4/dyna/blockmanager.h b/core/hw/sh4/dyna/blockmanager.h index f676500c3..b750449e4 100644 --- a/core/hw/sh4/dyna/blockmanager.h +++ b/core/hw/sh4/dyna/blockmanager.h @@ -20,6 +20,8 @@ struct RuntimeBlockInfo: RuntimeBlockInfo_Core void Setup(u32 pc,fpscr_t fpu_cfg); const char* hash(bool full=true, bool reloc=false); + u32 vaddr; + u32 host_code_size; //in bytes u32 sh4_code_size; //in bytes @@ -33,7 +35,8 @@ struct RuntimeBlockInfo: RuntimeBlockInfo_Core u32 guest_cycles; u32 guest_opcodes; u32 host_opcodes; - + bool has_fpu_op; + u32 asid; // if not 0xFFFFFFFF then private page belonging to this id u32 BranchBlock; //if not 0xFFFFFFFF then jump target u32 NextBlock; //if not 0xFFFFFFFF then next block (by position) @@ -94,6 +97,7 @@ RuntimeBlockInfo* bm_GetStaleBlock(void* dynarec_code); RuntimeBlockInfo* DYNACALL bm_GetBlock(u32 addr); void bm_AddBlock(RuntimeBlockInfo* blk); +void bm_RemoveBlock(RuntimeBlockInfo* block); void bm_Reset(); void bm_Periodical_1s(); void bm_Periodical_14k(); diff --git a/core/hw/sh4/dyna/decoder.cpp b/core/hw/sh4/dyna/decoder.cpp index 3b6705ef7..cda15f6c4 100644 --- a/core/hw/sh4/dyna/decoder.cpp +++ b/core/hw/sh4/dyna/decoder.cpp @@ -82,7 +82,8 @@ void Emit(shilop op,shil_param rd=shil_param(),shil_param rs1=shil_param(),shil_ sp.rs1=(rs1); sp.rs2=(rs2); sp.rs3=(rs3); - sp.guest_offs=state.cpu.rpc-blk->addr; + sp.guest_offs = state.cpu.rpc - blk->vaddr; + sp.delay_slot = state.cpu.is_delayslot; blk->oplist.push_back(sp); } @@ -96,22 +97,14 @@ void dec_fallback(u32 op) opcd.rs2=shil_param(FMT_IMM,state.cpu.rpc+2); opcd.rs3=shil_param(FMT_IMM,op); + + opcd.guest_offs = state.cpu.rpc - blk->vaddr; + opcd.delay_slot = state.cpu.is_delayslot; blk->oplist.push_back(opcd); } #if 1 -#define FMT_I32 ERROR!WRONG++!! -#define FMT_F32 ERROR!WRONG++!! -#define FMT_F32 ERROR!WRONG++!! -#define FMT_TYPE ERROR!WRONG++!! - -#define FMT_REG ERROR!WRONG++!! -#define FMT_IMM ERROR!WRONG++!! - -#define FMT_PARAM ERROR!WRONG++!! -#define FMT_MASK ERROR!WRONG++!! - void dec_DynamicSet(u32 regbase,u32 offs=0) { if (offs==0) @@ -277,6 +270,7 @@ sh4dec(i0000_0000_0001_1011) } //ldc.l @+,SR +/* sh4dec(i0100_nnnn_0000_0111) { /* @@ -290,9 +284,10 @@ sh4dec(i0100_nnnn_0000_0111) { //FIXME only if interrupts got on .. :P UpdateINTC(); - }*/ + }* / dec_End(0xFFFFFFFF,BET_StaticIntr,false); } +*/ //ldc ,SR sh4dec(i0100_nnnn_0000_1110) @@ -309,6 +304,7 @@ sh4dec(i0000_0000_0000_1001) { } +//fschg sh4dec(i1111_0011_1111_1101) { //fpscr.SZ is bit 20 @@ -594,7 +590,7 @@ u32 MatchDiv32(u32 pc , Sh4RegType ®1,Sh4RegType ®2 , Sh4RegType ®3) u32 match=1; for (int i=0;i<32;i++) { - u16 opcode=ReadMem16(v_pc); + u16 opcode=IReadMem16(v_pc); v_pc+=2; if ((opcode&MASK_N)==ROTCL_KEY) { @@ -610,7 +606,7 @@ u32 MatchDiv32(u32 pc , Sh4RegType ®1,Sh4RegType ®2 , Sh4RegType ®3) break; } - opcode=ReadMem16(v_pc); + opcode=IReadMem16(v_pc); v_pc+=2; if ((opcode&MASK_N_M)==DIV1_KEY) { @@ -684,11 +680,11 @@ bool MatchDiv32s(u32 op,u32 pc) else //no match ... { /* - printf("%04X\n",ReadMem16(pc-2)); - printf("%04X\n",ReadMem16(pc-0)); - printf("%04X\n",ReadMem16(pc+2)); - printf("%04X\n",ReadMem16(pc+4)); - printf("%04X\n",ReadMem16(pc+6));*/ + printf("%04X\n",IReadMem16(pc-2)); + printf("%04X\n",IReadMem16(pc-0)); + printf("%04X\n",IReadMem16(pc+2)); + printf("%04X\n",IReadMem16(pc+4)); + printf("%04X\n",IReadMem16(pc+6));*/ return false; } } @@ -697,11 +693,11 @@ bool MatchDiv32s(u32 op,u32 pc) //This ended up too rare (and too hard to match) bool MatchDiv0S_0(u32 pc) { - if (ReadMem16(pc+0)==0x233A && //XOR r3,r3 - ReadMem16(pc+2)==0x2137 && //DIV0S r3,r1 - ReadMem16(pc+4)==0x322A && //SUBC r2,r2 - ReadMem16(pc+6)==0x313A && //SUBC r3,r1 - (ReadMem16(pc+8)&0xF00F)==0x2007) //DIV0S x,x + if (IReadMem16(pc+0)==0x233A && //XOR r3,r3 + IReadMem16(pc+2)==0x2137 && //DIV0S r3,r1 + IReadMem16(pc+4)==0x322A && //SUBC r2,r2 + IReadMem16(pc+6)==0x313A && //SUBC r3,r1 + (IReadMem16(pc+8)&0xF00F)==0x2007) //DIV0S x,x return true; else return false; @@ -829,7 +825,7 @@ bool dec_generic(u32 op) bool update_after=false; if ((s32)e<0) { - if (rs1._reg!=rs2._reg) //reg shouldn't be updated if its written + if (rs1._reg!=rs2._reg && !mmu_enabled()) //reg shouldn't be updated if its written { Emit(shop_sub,rs1,rs1,mk_imm(-e)); } @@ -998,7 +994,7 @@ void state_Setup(u32 rpc,fpscr_t fpu_cfg) state.cpu.FPR64=fpu_cfg.PR; state.cpu.FSZ64=fpu_cfg.SZ; state.cpu.RoundToZero=fpu_cfg.RM==1; - verify(fpu_cfg.RM<2); + //verify(fpu_cfg.RM<2); // Happens with many wince games (set to 3) //what about fp/fs ? state.NextOp=NDO_NextOp; @@ -1014,7 +1010,7 @@ void state_Setup(u32 rpc,fpscr_t fpu_cfg) void dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles) { blk=rbi; - state_Setup(blk->addr,blk->fpu_cfg); + state_Setup(blk->vaddr, blk->fpu_cfg); ngen_GetFeatures(&state.ngen); blk->guest_opcodes=0; @@ -1057,7 +1053,7 @@ void dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles) } */ - u32 op=ReadMem16(state.cpu.rpc); + u32 op=IReadMem16(state.cpu.rpc); if (op==0 && state.cpu.is_delayslot) { printf("Delayslot 0 hack!\n"); @@ -1069,6 +1065,8 @@ void dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles) blk->guest_cycles+=0; else blk->guest_cycles+=CPU_RATIO; + if (OpDesc[op]->IsFloatingPoint()) + blk->has_fpu_op = true; verify(!(state.cpu.is_delayslot && OpDesc[op]->SetPC())); if (state.ngen.OnlyDynamicEnds || !OpDesc[op]->rec_oph) @@ -1116,7 +1114,7 @@ void dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles) } _end: - blk->sh4_code_size=state.cpu.rpc-blk->addr; + blk->sh4_code_size=state.cpu.rpc-blk->vaddr; blk->NextBlock=state.NextAddr; blk->BranchBlock=state.JumpAddr; blk->BlockType=state.BlockType; @@ -1150,12 +1148,12 @@ _end: //Small-n-simple idle loop detector :p if (state.info.has_readm && !state.info.has_writem && !state.info.has_fpu && blk->guest_opcodes<6) { - if (blk->BlockType==BET_Cond_0 || (blk->BlockType==BET_Cond_1 && blk->BranchBlock<=blk->addr)) + if (blk->BlockType==BET_Cond_0 || (blk->BlockType==BET_Cond_1 && blk->BranchBlock<=blk->vaddr)) { blk->guest_cycles*=3; } - if (blk->BranchBlock==blk->addr) + if (blk->BranchBlock==blk->vaddr) { blk->guest_cycles*=10; } diff --git a/core/hw/sh4/dyna/driver.cpp b/core/hw/sh4/dyna/driver.cpp index d398a6f70..d410b8546 100644 --- a/core/hw/sh4/dyna/driver.cpp +++ b/core/hw/sh4/dyna/driver.cpp @@ -14,6 +14,7 @@ #include "hw/sh4/sh4_interrupts.h" #include "hw/sh4/sh4_mem.h" +#include "hw/sh4/modules/mmu.h" #include "hw/pvr/pvr_mem.h" #include "hw/aica/aica_if.h" #include "hw/gdrom/gdrom_if.h" @@ -118,7 +119,7 @@ u32 emit_FreeSpace() return CODE_SIZE-LastAddr; } - +// pc must be a physical address bool DoCheck(u32 pc) { if (IsOnRam(pc)) @@ -202,8 +203,19 @@ void RuntimeBlockInfo::Setup(u32 rpc,fpscr_t rfpu_cfg) has_jcond=false; BranchBlock=NextBlock=csc_RetCache=0xFFFFFFFF; BlockType=BET_SCL_Intr; + has_fpu_op = false; + asid = 0xFFFFFFFF; - addr=rpc; + vaddr = rpc; + if (mmu_enabled()) + { + bool shared; + mmu_instruction_translation(vaddr, addr, shared); + if (addr != vaddr && !shared) + asid = CCN_PTEH.ASID; + } + else + addr = vaddr; fpu_cfg=rfpu_cfg; oplist.clear(); @@ -219,37 +231,44 @@ DynarecCodeEntryPtr rdv_CompilePC() if (emit_FreeSpace()<16*1024 || pc==0x8c0000e0 || pc==0xac010000 || pc==0xac008300) recSh4_ClearCache(); - RuntimeBlockInfo* rv=0; - do - { - RuntimeBlockInfo* rbi = ngen_AllocateBlock(); - if (rv==0) rv=rbi; - + RuntimeBlockInfo* rbi = ngen_AllocateBlock(); +#ifndef NO_MMU + try { +#endif rbi->Setup(pc,fpscr); - + bool do_opts=((rbi->addr&0x3FFFFFFF)>0x0C010100); rbi->staging_runs=do_opts?100:-100; ngen_Compile(rbi,DoCheck(rbi->addr),(pc&0xFFFFFF)==0x08300 || (pc&0xFFFFFF)==0x10000,false,do_opts); verify(rbi->code!=0); bm_AddBlock(rbi); +#ifndef NO_MMU + } catch (SH4ThrownException& ex) { + delete rbi; + throw ex; + } +#endif - if (rbi->BlockType==BET_Cond_0 || rbi->BlockType==BET_Cond_1) - pc=rbi->NextBlock; - else - pc=0; - } while(false && pc); - - return rv->code; + return rbi->code; } DynarecCodeEntryPtr DYNACALL rdv_FailedToFindBlock(u32 pc) { //printf("rdv_FailedToFindBlock ~ %08X\n",pc); - next_pc=pc; +#ifndef NO_MMU + try { +#endif + next_pc=pc; - return rdv_CompilePC(); + return rdv_CompilePC(); +#ifndef NO_MMU + } catch (SH4ThrownException& ex) { + Do_Exception(pc, ex.expEvn, ex.callVect); + return bm_GetCode2(next_pc); + } +#endif } static void ngen_FailedToFindBlock_internal() { @@ -258,9 +277,6 @@ static void ngen_FailedToFindBlock_internal() { void (*ngen_FailedToFindBlock)() = &ngen_FailedToFindBlock_internal; -extern u32 rebuild_counter; - - u32 DYNACALL rdv_DoInterrupts_pc(u32 pc) { next_pc = pc; UpdateINTC(); @@ -268,37 +284,39 @@ u32 DYNACALL rdv_DoInterrupts_pc(u32 pc) { //We can only safely relocate/etc stuff here, as in other generic update cases //There's a RET, meaning the code can't move around //Interrupts happen at least 50 times/second, so its not a problem .. + /* if (rebuild_counter == 0) { // TODO: Why is this commented, etc. //bm_Rebuild(); } + */ return next_pc; } -void bm_Rebuild(); u32 DYNACALL rdv_DoInterrupts(void* block_cpde) { RuntimeBlockInfo* rbi = bm_GetBlock(block_cpde); - return rdv_DoInterrupts_pc(rbi->addr); + return rdv_DoInterrupts_pc(rbi->vaddr); } -DynarecCodeEntryPtr DYNACALL rdv_BlockCheckFail(u32 pc) +// addr must be the physical address of the start of the block +DynarecCodeEntryPtr DYNACALL rdv_BlockCheckFail(u32 addr) { - next_pc=pc; - recSh4_ClearCache(); + RuntimeBlockInfo *block = bm_GetBlock(addr); + bm_RemoveBlock(block); return rdv_CompilePC(); } -DynarecCodeEntryPtr rdv_FindCode() -{ - DynarecCodeEntryPtr rv=bm_GetCode(next_pc); - if (rv==ngen_FailedToFindBlock) - return 0; - - return rv; -} +//DynarecCodeEntryPtr rdv_FindCode() +//{ +// DynarecCodeEntryPtr rv=bm_GetCode(next_pc); +// if (rv==ngen_FailedToFindBlock) +// return 0; +// +// return rv; +//} DynarecCodeEntryPtr rdv_FindOrCompile() { diff --git a/core/hw/sh4/dyna/ngen.h b/core/hw/sh4/dyna/ngen.h index 0e7d9cd3b..f2b07aef2 100644 --- a/core/hw/sh4/dyna/ngen.h +++ b/core/hw/sh4/dyna/ngen.h @@ -70,7 +70,7 @@ DynarecCodeEntryPtr DYNACALL rdv_BlockCheckFail(u32 pc); //Called to compile code @pc DynarecCodeEntryPtr rdv_CompilePC(); //Returns 0 if there is no code @pc, code ptr otherwise -DynarecCodeEntryPtr rdv_FindCode(); +//DynarecCodeEntryPtr rdv_FindCode(); //Finds or compiles code @pc DynarecCodeEntryPtr rdv_FindOrCompile(); diff --git a/core/hw/sh4/dyna/regalloc.h b/core/hw/sh4/dyna/regalloc.h index eee756571..774d3d4a6 100644 --- a/core/hw/sh4/dyna/regalloc.h +++ b/core/hw/sh4/dyna/regalloc.h @@ -368,7 +368,8 @@ struct RegAlloc verify(opid>=0 && opidoplist.size()); shil_opcode* op=&block->oplist[opid]; - return op->op == shop_sync_fpscr || op->op == shop_sync_sr || op->op == shop_ifb; + return op->op == shop_sync_fpscr || op->op == shop_sync_sr || op->op == shop_ifb + || (mmu_enabled() && (op->op == shop_readm || op->op == shop_writem || op->op == shop_pref)); } bool IsRegWallOp(RuntimeBlockInfo* block, int opid, bool is_fpr) @@ -496,6 +497,12 @@ struct RegAlloc { fp=true; } + else + { + all = true; + fp = true; + gpr_b = true; + } if (all) { @@ -1119,6 +1126,30 @@ struct RegAlloc } } + void BailOut(u32 opid) + { + for (u32 sid = 0; sid < all_spans.size(); sid++) + { + RegSpan* spn = all_spans[sid]; + + if (spn->end >= opid && spn->start < opid && spn->writeback) + { + if (spn->fpr) + { + //printf("Op %d: Writing back f%d from %d\n",current_opid,spn->regstart,spn->nregf); + writeback_fpu++; + Writeback_FPU(spn->regstart,spn->nregf); + } + else + { + //printf("Op %d: Writing back r%d from %d\n",current_opid,spn->regstart,spn->nreg); + writeback_gpr++; + Writeback(spn->regstart,spn->nreg); + } + } + } + } + void Cleanup() { writeback_gpr=writeback_fpu=0; diff --git a/core/hw/sh4/dyna/shil.h b/core/hw/sh4/dyna/shil.h index c85b6ce87..315ab806a 100644 --- a/core/hw/sh4/dyna/shil.h +++ b/core/hw/sh4/dyna/shil.h @@ -151,6 +151,7 @@ struct shil_opcode u16 host_offs; u16 guest_offs; + bool delay_slot; string dissasm(); }; diff --git a/core/hw/sh4/interpr/sh4_interpreter.cpp b/core/hw/sh4/interpr/sh4_interpreter.cpp index ddc7d509c..fc358ee05 100644 --- a/core/hw/sh4/interpr/sh4_interpreter.cpp +++ b/core/hw/sh4/interpr/sh4_interpreter.cpp @@ -173,11 +173,7 @@ void ExecuteDelayslot() #if !defined(NO_MMU) } catch (SH4ThrownException& ex) { - ex.epc -= 2; - if (ex.expEvn == 0x800) // FPU disable exception - ex.expEvn = 0x820; // Slot FPU disable exception - else if (ex.expEvn == 0x180) // Illegal instruction exception - ex.expEvn = 0x1A0; // Slot illegal instruction exception + AdjustDelaySlotException(ex); //printf("Delay slot exception\n"); throw ex; } diff --git a/core/hw/sh4/modules/ccn.cpp b/core/hw/sh4/modules/ccn.cpp index 49b7c574f..d084f58fc 100644 --- a/core/hw/sh4/modules/ccn.cpp +++ b/core/hw/sh4/modules/ccn.cpp @@ -50,11 +50,8 @@ void CCN_MMUCR_write(u32 addr, u32 value) if (temp.TI != 0) { - for (u32 i = 0; i < 4; i++) - ITLB[i].Data.V = 0; - - for (u32 i = 0; i < 64; i++) - UTLB[i].Data.V = 0; + //sh4_cpu.ResetCache(); + mmu_flush_table(); temp.TI = 0; } @@ -63,6 +60,7 @@ void CCN_MMUCR_write(u32 addr, u32 value) if (mmu_changed_state) { //printf("<*******>MMU Enabled , ONLY SQ remaps work<*******>\n"); + sh4_cpu.ResetCache(); mmu_set_state(); } } diff --git a/core/hw/sh4/modules/mmu.cpp b/core/hw/sh4/modules/mmu.cpp index 1b0189cbc..07b20f923 100644 --- a/core/hw/sh4/modules/mmu.cpp +++ b/core/hw/sh4/modules/mmu.cpp @@ -201,7 +201,7 @@ void mmu_raise_exception(u32 mmu_error, u32 address, u32 am) return; break; - //TLB Multyhit + //TLB Multihit case MMU_ERROR_TLB_MHIT: printf("MMU_ERROR_TLB_MHIT @ 0x%X\n", address); break; @@ -239,19 +239,10 @@ void mmu_raise_exception(u32 mmu_error, u32 address, u32 am) else //IADDERR - Instruction Address Error { #ifdef TRACE_WINCE_SYSCALLS - bool skip_exception = false; - if (!print_wince_syscall(address, skip_exception)) - printf_mmu("MMU_ERROR_BADADDR(i) 0x%X\n", address); - //if (!skip_exception) - RaiseException(0xE0, 0x100); - //else { - // SH4ThrownException ex = { 0, 0, 0 }; - // throw ex; - //} -#else - printf_mmu("MMU_ERROR_BADADDR(i) 0x%X\n", address); - RaiseException(0xE0, 0x100); + if (!print_wince_syscall(address)) #endif + printf_mmu("MMU_ERROR_BADADDR(i) 0x%X\n", address); + RaiseException(0xE0, 0x100); return; } printf_mmu("MMU_ERROR_BADADDR(d) 0x%X, handled\n", address); @@ -291,9 +282,10 @@ bool mmu_match(u32 va, CCN_PTEH_type Address, CCN_PTEL_type Data) return false; } + //Do a full lookup on the UTLB entry's template -u32 mmu_full_lookup(u32 va, u32& idx, u32& rv) +u32 mmu_full_lookup(u32 va, const TLB_Entry** tlb_entry_ret, u32& rv) { if (!internal) { @@ -302,21 +294,21 @@ u32 mmu_full_lookup(u32 va, u32& idx, u32& rv) CCN_MMUCR.URC = 0; } - u32 entry = 0; + u32 entry = -1; u32 nom = 0; - for (u32 i = 0; i<64; i++) { //verify(sz!=0); - if (mmu_match(va, UTLB[i].Address, UTLB[i].Data)) + TLB_Entry *tlb_entry = &UTLB[i]; + if (mmu_match(va, tlb_entry->Address, tlb_entry->Data)) { entry = i; nom++; - u32 sz = UTLB[i].Data.SZ1 * 2 + UTLB[i].Data.SZ0; + u32 sz = tlb_entry->Data.SZ1 * 2 + tlb_entry->Data.SZ0; u32 mask = mmu_mask[sz]; //VPN->PPN | low bits - rv = ((UTLB[i].Data.PPN << 10)&mask) | (va&(~mask)); + rv = ((tlb_entry->Data.PPN << 10) & mask) | (va & (~mask)); } } @@ -332,7 +324,7 @@ u32 mmu_full_lookup(u32 va, u32& idx, u32& rv) } } - idx = entry; + *tlb_entry_ret = &UTLB[entry]; return MMU_ERROR_NONE; } @@ -364,15 +356,15 @@ u32 mmu_full_SQ(u32 va, u32& rv) { //Address=Dest&0xFFFFFFE0; - u32 entry; - u32 lookup = mmu_full_lookup(va, entry, rv); + const TLB_Entry *entry; + u32 lookup = mmu_full_lookup(va, &entry, rv); rv &= ~31;//lower 5 bits are forced to 0 if (lookup != MMU_ERROR_NONE) return lookup; - u32 md = UTLB[entry].Data.PR >> 1; + u32 md = entry->Data.PR >> 1; //Priv mode protection if ((md == 0) && sr.MD == 0) @@ -383,9 +375,9 @@ u32 mmu_full_SQ(u32 va, u32& rv) //Write Protection (Lock or FW) if (translation_type == MMU_TT_DWRITE) { - if ((UTLB[entry].Data.PR & 1) == 0) + if ((entry->Data.PR & 1) == 0) return MMU_ERROR_PROTECTED; - else if (UTLB[entry].Data.D == 0) + else if (entry->Data.D == 0) return MMU_ERROR_FIRSTWRITE; } } @@ -395,48 +387,50 @@ u32 mmu_full_SQ(u32 va, u32& rv) } return MMU_ERROR_NONE; } -template -u32 mmu_data_translation(u32 va, u32& rv) +template +void mmu_data_translation(u32 va, u32& rv) { - //*opt notice* this could be only checked for writes, as reads are invalid - if ((va & 0xFC000000) == 0xE0000000) + if (va & (sizeof(T) - 1)) + mmu_raise_exception(MMU_ERROR_BADADDR, va, translation_type); + + if (translation_type == MMU_TT_DWRITE) { - u32 lookup = mmu_full_SQ(va, rv); - if (lookup != MMU_ERROR_NONE) - return lookup; - rv = va; //SQ writes are not translated, only write backs are. - return MMU_ERROR_NONE; + if ((va & 0xFC000000) == 0xE0000000) + { + u32 lookup = mmu_full_SQ(va, rv); + if (lookup != MMU_ERROR_NONE) + mmu_raise_exception(lookup, va, translation_type); + + rv = va; //SQ writes are not translated, only write backs are. + return; + } } if ((sr.MD == 0) && (va & 0x80000000) != 0) { //if on kernel, and not SQ addr -> error - return MMU_ERROR_BADADDR; + mmu_raise_exception(MMU_ERROR_BADADDR, va, translation_type); } if (sr.MD == 1 && ((va & 0xFC000000) == 0x7C000000)) { rv = va; - return MMU_ERROR_NONE; + return; } - if ((CCN_MMUCR.AT == 0) || (fast_reg_lut[va >> 29] != 0)) + // Not called if CCN_MMUCR.AT == 0 + //if ((CCN_MMUCR.AT == 0) || (fast_reg_lut[va >> 29] != 0)) + if (fast_reg_lut[va >> 29] != 0) { rv = va; - return MMU_ERROR_NONE; + return; } - /* - if ( CCN_CCR.ORA && ((va&0xFC000000)==0x7C000000)) - { - verify(false); - return va; - } - */ - u32 entry; - u32 lookup = mmu_full_lookup(va, entry, rv); + + const TLB_Entry *entry; + u32 lookup = mmu_full_lookup(va, &entry, rv); if (lookup != MMU_ERROR_NONE) - return lookup; + mmu_raise_exception(lookup, va, translation_type); #ifdef TRACE_WINCE_SYSCALLS if (unresolved_unicode_string != 0) @@ -449,13 +443,13 @@ u32 mmu_data_translation(u32 va, u32& rv) } #endif - u32 md = UTLB[entry].Data.PR >> 1; + u32 md = entry->Data.PR >> 1; //0X & User mode-> protection violation //Priv mode protection if ((md == 0) && sr.MD == 0) { - return MMU_ERROR_PROTECTED; + mmu_raise_exception(MMU_ERROR_PROTECTED, va, translation_type); } //X0 -> read olny @@ -464,27 +458,33 @@ u32 mmu_data_translation(u32 va, u32& rv) //Write Protection (Lock or FW) if (translation_type == MMU_TT_DWRITE) { - if ((UTLB[entry].Data.PR & 1) == 0) - return MMU_ERROR_PROTECTED; - else if (UTLB[entry].Data.D == 0) - return MMU_ERROR_FIRSTWRITE; + if ((entry->Data.PR & 1) == 0) + mmu_raise_exception(MMU_ERROR_PROTECTED, va, translation_type); + else if (entry->Data.D == 0) + mmu_raise_exception(MMU_ERROR_FIRSTWRITE, va, translation_type); } - return MMU_ERROR_NONE; } -u32 mmu_instruction_translation(u32 va, u32& rv) +template void mmu_data_translation(u32 va, u32& rv); +template void mmu_data_translation(u32 va, u32& rv); + +void mmu_instruction_translation(u32 va, u32& rv, bool& shared) { + if (va & 1) + { + mmu_raise_exception(MMU_ERROR_BADADDR, va, MMU_TT_IREAD); + } if ((sr.MD == 0) && (va & 0x80000000) != 0) { //if SQ disabled , or if if SQ on but out of SQ mem then BAD ADDR ;) if (va >= 0xE0000000) - return MMU_ERROR_BADADDR; + mmu_raise_exception(MMU_ERROR_BADADDR, va, MMU_TT_IREAD); } if ((CCN_MMUCR.AT == 0) || (fast_reg_lut[va >> 29] != 0)) { rv = va; - return MMU_ERROR_NONE; + return; } bool mmach = false; @@ -509,6 +509,7 @@ retry_ITLB_Match: nom++; //VPN->PPN | low bits rv = ((ITLB[i].Data.PPN << 10)&mask) | (va&(~mask)); + shared = ITLB[i].Data.SH == 1; } } } @@ -516,13 +517,15 @@ retry_ITLB_Match: if (entry == 4) { verify(mmach == false); - u32 lookup = mmu_full_lookup(va, entry, rv); + const TLB_Entry *tlb_entry; + u32 lookup = mmu_full_lookup(va, &tlb_entry, rv); if (lookup != MMU_ERROR_NONE) - return lookup; + mmu_raise_exception(lookup, va, MMU_TT_IREAD); + u32 replace_index = ITLB_LRU_USE[CCN_MMUCR.LRUI]; verify(replace_index != 0xFFFFFFFF); - ITLB[replace_index] = UTLB[entry]; + ITLB[replace_index] = *tlb_entry; entry = replace_index; ITLB_Sync(entry); mmach = true; @@ -532,11 +535,11 @@ retry_ITLB_Match: { if (nom) { - return MMU_ERROR_TLB_MHIT; + mmu_raise_exception(MMU_ERROR_TLB_MHIT, va, MMU_TT_IREAD); } else { - return MMU_ERROR_TLB_MISS; + mmu_raise_exception(MMU_ERROR_TLB_MISS, va, MMU_TT_IREAD); } } @@ -549,10 +552,8 @@ retry_ITLB_Match: //Priv mode protection if ((md == 0) && sr.MD == 0) { - return MMU_ERROR_PROTECTED; + mmu_raise_exception(MMU_ERROR_PROTECTED, va, MMU_TT_IREAD); } - - return MMU_ERROR_NONE; } void mmu_set_state() @@ -570,6 +571,7 @@ void mmu_set_state() WriteMem16 = &mmu_WriteMem16; WriteMem32 = &mmu_WriteMem32; WriteMem64 = &mmu_WriteMem64; + mmu_flush_table(); } else { @@ -617,149 +619,77 @@ void MMU_term() { } +void mmu_flush_table() +{ + //printf("MMU tables flushed\n"); + + ITLB[0].Data.V = 0; + ITLB[1].Data.V = 0; + ITLB[2].Data.V = 0; + ITLB[3].Data.V = 0; + + for (u32 i = 0; i < 64; i++) + UTLB[i].Data.V = 0; +} + u8 DYNACALL mmu_ReadMem8(u32 adr) { u32 addr; - u32 tv = mmu_data_translation(adr, addr); - if (tv == 0) - return _vmem_ReadMem8(addr); - else - mmu_raise_exception(tv, adr, MMU_TT_DREAD); - - return 0; + mmu_data_translation(adr, addr); + return _vmem_ReadMem8(addr); } u16 DYNACALL mmu_ReadMem16(u32 adr) { - if (adr & 1) - { - mmu_raise_exception(MMU_ERROR_BADADDR, adr, MMU_TT_DREAD); - return 0; - } u32 addr; - u32 tv = mmu_data_translation(adr, addr); - if (tv == 0) - return _vmem_ReadMem16(addr); - else - mmu_raise_exception(tv, adr, MMU_TT_DREAD); - - return 0; + mmu_data_translation(adr, addr); + return _vmem_ReadMem16(addr); } -u16 DYNACALL mmu_IReadMem16(u32 adr) +u16 DYNACALL mmu_IReadMem16(u32 vaddr) { - if (adr & 1) - { - mmu_raise_exception(MMU_ERROR_BADADDR, adr, MMU_TT_IREAD); - return 0; - } u32 addr; - u32 tv = mmu_instruction_translation(adr, addr); - if (tv == 0) - return _vmem_ReadMem16(addr); - else - mmu_raise_exception(tv, adr, MMU_TT_IREAD); - - return 0; + bool shared; + mmu_instruction_translation(vaddr, addr, shared); + return _vmem_ReadMem16(addr); } u32 DYNACALL mmu_ReadMem32(u32 adr) { - if (adr & 3) - { - mmu_raise_exception(MMU_ERROR_BADADDR, adr, MMU_TT_DREAD); - return 0; - } u32 addr; - u32 tv = mmu_data_translation(adr, addr); - if (tv == 0) - return _vmem_ReadMem32(addr); - else - mmu_raise_exception(tv, adr, MMU_TT_DREAD); - - return 0; + mmu_data_translation(adr, addr); + return _vmem_ReadMem32(addr); } u64 DYNACALL mmu_ReadMem64(u32 adr) { - if (adr & 7) - { - mmu_raise_exception(MMU_ERROR_BADADDR, adr, MMU_TT_DREAD); - return 0; - } u32 addr; - u32 tv = mmu_data_translation(adr, addr); - if (tv == 0) - { - return _vmem_ReadMem64(addr); - } - else - mmu_raise_exception(tv, adr, MMU_TT_DREAD); - - return 0; + mmu_data_translation(adr, addr); + return _vmem_ReadMem64(addr); } void DYNACALL mmu_WriteMem8(u32 adr, u8 data) { u32 addr; - u32 tv = mmu_data_translation(adr, addr); - if (tv == 0) - { - _vmem_WriteMem8(addr, data); - return; - } - else - mmu_raise_exception(tv, adr, MMU_TT_DWRITE); + mmu_data_translation(adr, addr); + _vmem_WriteMem8(addr, data); } void DYNACALL mmu_WriteMem16(u32 adr, u16 data) { - if (adr & 1) - { - mmu_raise_exception(MMU_ERROR_BADADDR, adr, MMU_TT_DWRITE); - return; - } u32 addr; - u32 tv = mmu_data_translation(adr, addr); - if (tv == 0) - { - _vmem_WriteMem16(addr, data); - return; - } - else - mmu_raise_exception(tv, adr, MMU_TT_DWRITE); + mmu_data_translation(adr, addr); + _vmem_WriteMem16(addr, data); } void DYNACALL mmu_WriteMem32(u32 adr, u32 data) { - if (adr & 3) - { - mmu_raise_exception(MMU_ERROR_BADADDR, adr, MMU_TT_DWRITE); - return; - } u32 addr; - u32 tv = mmu_data_translation(adr, addr); - if (tv == 0) - { - _vmem_WriteMem32(addr, data); - return; - } - else - mmu_raise_exception(tv, adr, MMU_TT_DWRITE); + mmu_data_translation(adr, addr); + _vmem_WriteMem32(addr, data); } void DYNACALL mmu_WriteMem64(u32 adr, u64 data) { - if (adr & 7) - { - mmu_raise_exception(MMU_ERROR_BADADDR, adr, MMU_TT_DWRITE); - return; - } u32 addr; - u32 tv = mmu_data_translation(adr, addr); - if (tv == 0) - { - _vmem_WriteMem64(addr, data); - return; - } - else - mmu_raise_exception(tv, adr, MMU_TT_DWRITE); + mmu_data_translation(adr, addr); + _vmem_WriteMem64(addr, data); } bool mmu_TranslateSQW(u32 adr, u32* out) diff --git a/core/hw/sh4/modules/mmu_impl.h b/core/hw/sh4/modules/mmu_impl.h index 4564a8e4e..c0eaba84d 100644 --- a/core/hw/sh4/modules/mmu_impl.h +++ b/core/hw/sh4/modules/mmu_impl.h @@ -10,7 +10,7 @@ #define MMU_ERROR_NONE 0 //TLB miss #define MMU_ERROR_TLB_MISS 1 -//TLB Multyhit +//TLB Multihit #define MMU_ERROR_TLB_MHIT 2 //Mem is read/write protected (depends on translation type) #define MMU_ERROR_PROTECTED 3 @@ -21,15 +21,6 @@ //Can't Execute #define MMU_ERROR_EXECPROT 6 -//Translation Types -//Opcode read -#define MMU_TT_IREAD 0 -//Data write -#define MMU_TT_DWRITE 1 -//Data write -#define MMU_TT_DREAD 2 -//Do an mmu lookup for va , returns translation status , if MMU_ERROR_NONE , rv is set to translated index - extern u32 mmu_error_TT; void MMU_Init(); diff --git a/core/hw/sh4/modules/wince.h b/core/hw/sh4/modules/wince.h index 570b7d701..b4fab717a 100644 --- a/core/hw/sh4/modules/wince.h +++ b/core/hw/sh4/modules/wince.h @@ -7,11 +7,13 @@ #define SH_CURTHREAD 1 #define SH_CURPROC 2 +extern const u32 mmu_mask[4]; + static bool read_mem32(u32 addr, u32& data) { u32 pa; - u32 idx; - if (mmu_full_lookup(addr, idx, pa) != MMU_ERROR_NONE) + const TLB_Entry *entry; + if (mmu_full_lookup(addr, &entry, pa) != MMU_ERROR_NONE) return false; data = ReadMem32_nommu(pa); return true; @@ -20,8 +22,8 @@ static bool read_mem32(u32 addr, u32& data) static bool read_mem16(u32 addr, u16& data) { u32 pa; - u32 idx; - if (mmu_full_lookup(addr, idx, pa) != MMU_ERROR_NONE) + const TLB_Entry *entry; + if (mmu_full_lookup(addr, &entry, pa) != MMU_ERROR_NONE) return false; data = ReadMem16_nommu(pa); return true; @@ -30,8 +32,8 @@ static bool read_mem16(u32 addr, u16& data) static bool read_mem8(u32 addr, u8& data) { u32 pa; - u32 idx; - if (mmu_full_lookup(addr, idx, pa) != MMU_ERROR_NONE) + const TLB_Entry *entry; + if (mmu_full_lookup(addr, &entry, pa) != MMU_ERROR_NONE) return false; data = ReadMem8_nommu(pa); return true; @@ -246,9 +248,8 @@ std::string get_ascii_string(u32 addr) return str; } -static bool print_wince_syscall(u32 address, bool &skip_exception) +static bool print_wince_syscall(u32 address) { - skip_exception = false; if (address & 1) { if (address == 0xfffffd5d || address == 0xfffffd05) // Sleep, QueryPerformanceCounter @@ -329,3 +330,59 @@ static bool print_wince_syscall(u32 address, bool &skip_exception) return false; } + +static bool wince_resolve_address(u32 va, TLB_Entry &entry) +{ + // WinCE hack + if ((va & 0x80000000) == 0) + { + u32 page_group = ReadMem32_nommu(CCN_TTB + ((va >> 25) << 2)); + u32 page = ((va >> 16) & 0x1ff) << 2; + u32 paddr = ReadMem32_nommu(page_group + page); + if (paddr & 0x80000000) + { + u32 whatever = ReadMem32_nommu(r_bank[4] + 0x14); + if (whatever != ReadMem32_nommu(paddr)) + { + paddr += 12; + u32 ptel = ReadMem32_nommu(paddr + ((va >> 10) & 0x3c)); + //FIXME CCN_PTEA = paddr >> 29; + if (ptel != 0) + { + entry.Data.reg_data = ptel - 1; + entry.Address.ASID = CCN_PTEH.ASID; + entry.Assistance.reg_data = 0; + u32 sz = entry.Data.SZ1 * 2 + entry.Data.SZ0; + entry.Address.VPN = (va & mmu_mask[sz]) >> 10; + + true; + } + } + } + } + else + { + // SQ + if (((va >> 26) & 0x3F) == 0x38) + { + u32 r1 = (va - 0xe0000000) & 0xfff00000; + //r1 &= 0xfff00000; + //u32 r0 = ReadMem32_nommu(0x8C01258C); // FIXME + //u32 r0 = 0x8c138b14; + //r0 = ReadMem32_nommu(r0); // 0xE0001F5 + u32 r0 = 0xe0001f5; + r0 += r1; + entry.Data.reg_data = r0 - 1; + entry.Assistance.reg_data = r0 >> 29; + entry.Address.ASID = CCN_PTEH.ASID; + u32 sz = entry.Data.SZ1 * 2 + entry.Data.SZ0; + entry.Address.VPN = (va & mmu_mask[sz]) >> 10; + + return true; + } + } + + return false; +} + + diff --git a/core/hw/sh4/sh4_core.h b/core/hw/sh4/sh4_core.h index a975b0cd6..15ec9fb02 100644 --- a/core/hw/sh4/sh4_core.h +++ b/core/hw/sh4/sh4_core.h @@ -122,6 +122,15 @@ static INLINE void RaiseFPUDisableException() #endif } +static INLINE void AdjustDelaySlotException(SH4ThrownException& ex) +{ + ex.epc -= 2; + if (ex.expEvn == 0x800) // FPU disable exception + ex.expEvn = 0x820; // Slot FPU disable exception + else if (ex.expEvn == 0x180) // Illegal instruction exception + ex.expEvn = 0x1A0; // Slot illegal instruction exception +} + // The SH4 sets the signaling bit to 0 for qNaN (unlike all recent CPUs). Some games relies on this. static INLINE f32 fixNaN(f32 f) { diff --git a/core/rec-x64/rec_x64.cpp b/core/rec-x64/rec_x64.cpp index 19e9ba835..e70a9b60e 100644 --- a/core/rec-x64/rec_x64.cpp +++ b/core/rec-x64/rec_x64.cpp @@ -11,6 +11,7 @@ #include "hw/sh4/sh4_opcode_list.h" #include "hw/sh4/dyna/ngen.h" #include "hw/sh4/modules/ccn.h" +#include "hw/sh4/modules/mmu.h" #include "hw/sh4/sh4_interrupts.h" #include "hw/sh4/sh4_core.h" @@ -206,6 +207,99 @@ static void ngen_blockcheckfail(u32 pc) { rdv_BlockCheckFail(pc); } +static u32 exception_raised; + +template +static T ReadMemNoEx(u32 addr, u32 pc) +{ + try { + exception_raised = 0; + if (sizeof(T) == 1) + return ReadMem8(addr); + else if (sizeof(T) == 2) + return ReadMem16(addr); + else if (sizeof(T) == 4) + return ReadMem32(addr); + else if (sizeof(T) == 8) + return ReadMem64(addr); + } catch (SH4ThrownException& ex) { + if (pc & 1) + { + // Delay slot + AdjustDelaySlotException(ex); + pc--; + } + Do_Exception(pc, ex.expEvn, ex.callVect); + exception_raised = 1; + return 0; + } +} + +template +static void WriteMemNoEx(u32 addr, T data, u32 pc) +{ + try { + if (sizeof(T) == 1) + WriteMem8(addr, data); + else if (sizeof(T) == 2) + WriteMem16(addr, data); + else if (sizeof(T) == 4) + WriteMem32(addr, data); + else if (sizeof(T) == 8) + WriteMem64(addr, data); + exception_raised = 0; + } catch (SH4ThrownException& ex) { + if (pc & 1) + { + // Delay slot + AdjustDelaySlotException(ex); + pc--; + } + Do_Exception(pc, ex.expEvn, ex.callVect); + exception_raised = 1; + } +} + +static void interpreter_fallback(u16 op, u32 pc) +{ + try { + OpDesc[op]->oph(op); + exception_raised = 0; + } catch (SH4ThrownException& ex) { + printf("HOLY SHIT! interpreter_fallback exception pc %08x evn %x vect %x\n", pc, ex.expEvn, ex.callVect); + if (pc & 1) + { + // Delay slot + AdjustDelaySlotException(ex); + pc--; + } + Do_Exception(pc, ex.expEvn, ex.callVect); + exception_raised = 1; + } +} + +static void do_sqw_mmu_no_ex(u32 addr, u32 pc) +{ + try { + do_sqw_mmu(addr); + exception_raised = 0; + } catch (SH4ThrownException& ex) { + if (pc & 1) + { + // Delay slot + AdjustDelaySlotException(ex); + pc--; + } + Do_Exception(pc, ex.expEvn, ex.callVect); + exception_raised = 1; + } +} + +static void do_sqw_nommu_local(u32 addr, u8* sqb) +{ + do_sqw_nommu(addr, sqb); +} + class BlockCompiler : public Xbyak::CodeGenerator { public: @@ -258,25 +352,55 @@ public: #else sub(rsp, 0x8); // align stack #endif + Xbyak::Label exit_block; - for (size_t i = 0; i < block->oplist.size(); i++) + if (mmu_enabled() && block->has_fpu_op) { - shil_opcode& op = block->oplist[i]; + Xbyak::Label fpu_enabled; + mov(rax, (uintptr_t)&sr); + mov(eax, dword[rax]); + and_(eax, 0x8000); // test SR.FD bit + jz(fpu_enabled); + mov(call_regs[0], block->vaddr); // pc + mov(call_regs[1], 0x800); // event + mov(call_regs[2], 0x100); // vector + GenCall(Do_Exception); + jmp(exit_block, T_NEAR); + L(fpu_enabled); + } - regalloc.OpBegin(&op, i); + for (current_opid = 0; current_opid < block->oplist.size(); current_opid++) + { + shil_opcode& op = block->oplist[current_opid]; + + regalloc.OpBegin(&op, current_opid); switch (op.op) { case shop_ifb: - if (op.rs1._imm) { - mov(rax, (size_t)&next_pc); - mov(dword[rax], op.rs2._imm); + if (op.rs1._imm) + { + mov(rax, (size_t)&next_pc); + mov(dword[rax], op.rs2._imm); + } + + mov(call_regs[0], op.rs3._imm); + + if (!mmu_enabled()) + { + GenCall(OpDesc[op.rs3._imm]->oph); + } + else + { + mov(call_regs[1], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc + + GenCall(interpreter_fallback); + + test(dword[(void *)&exception_raised], 1); + jnz(exit_block, T_NEAR); + } } - - mov(call_regs[0], op.rs3._imm); - - GenCall(OpDesc[op.rs3._imm]->oph); break; case shop_jcond: @@ -325,11 +449,28 @@ public: case shop_readm: { u32 size = op.flags & 0x7f; - - if (op.rs1.is_imm()) + bool immediate_address = op.rs1.is_imm(); + if (immediate_address && mmu_enabled() && (op.rs1._imm >> 12) != (block->vaddr >> 12)) { + // When full mmu is on, only consider addresses in the same 4k page + immediate_address = false; + } + if (immediate_address) + { + u32 addr = op.rs1._imm; + if (mmu_enabled()) + { + u32 paddr; + if (size == 2) + mmu_data_translation(addr, paddr); + else if (size == 4) + mmu_data_translation(addr, paddr); + else + die("Invalid immediate size"); + addr = paddr; + } bool isram = false; - void* ptr = _vmem_read_const(op.rs1._imm, isram, size); + void* ptr = _vmem_read_const(addr, isram, size); if (isram) { @@ -338,14 +479,27 @@ public: switch (size) { case 2: - movsx(regalloc.MapRegister(op.rd), word[rax]); + if (regalloc.IsAllocg(op.rd)) + movsx(regalloc.MapRegister(op.rd), word[rax]); + else + { + movsx(eax, word[rax]); + mov(rcx, (uintptr_t)op.rd.reg_ptr()); + mov(dword[rcx], eax); + } break; case 4: if (regalloc.IsAllocg(op.rd)) mov(regalloc.MapRegister(op.rd), dword[rax]); - else + else if (regalloc.IsAllocf(op.rd)) movd(regalloc.MapXRegister(op.rd), dword[rax]); + else + { + mov(eax, dword[rax]); + mov(rcx, (uintptr_t)op.rd.reg_ptr()); + mov(dword[rcx], eax); + } break; default: @@ -356,7 +510,7 @@ public: else { // Not RAM: the returned pointer is a memory handler - mov(call_regs[0], op.rs1._imm); + mov(call_regs[0], addr); switch(size) { @@ -385,42 +539,71 @@ public: { if (op.rs3.is_imm()) add(call_regs[0], op.rs3._imm); - else + else if (regalloc.IsAllocg(op.rs3)) add(call_regs[0], regalloc.MapRegister(op.rs3)); + else + { + mov(rax, (uintptr_t)op.rs3.reg_ptr()); + add(call_regs[0], dword[rax]); + } } + if (mmu_enabled()) + mov(call_regs[1], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc if (size == 1) { - GenCall(ReadMem8); + if (!mmu_enabled()) + GenCall(ReadMem8); + else + GenCall(ReadMemNoEx); movsx(ecx, al); } else if (size == 2) { - GenCall(ReadMem16); + if (!mmu_enabled()) + GenCall(ReadMem16); + else + GenCall(ReadMemNoEx); movsx(ecx, ax); } else if (size == 4) { - GenCall(ReadMem32); + if (!mmu_enabled()) + GenCall(ReadMem32); + else + GenCall(ReadMemNoEx); mov(ecx, eax); } else if (size == 8) { - GenCall(ReadMem64); + if (!mmu_enabled()) + GenCall(ReadMem64); + else + GenCall(ReadMemNoEx); mov(rcx, rax); } else { die("1..8 bytes"); } + if (mmu_enabled()) + { + test(dword[(void *)&exception_raised], 1); + jnz(exit_block, T_NEAR); + } + if (size != 8) host_reg_to_shil_param(op.rd, ecx); else { #ifdef EXPLODE_SPANS - verify(op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1)); - movd(regalloc.MapXRegister(op.rd, 0), ecx); - shr(rcx, 32); - movd(regalloc.MapXRegister(op.rd, 1), ecx); -#else - mov(rax, (uintptr_t)op.rd.reg_ptr()); - mov(qword[rax], rcx); + if (op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1)) + { + movd(regalloc.MapXRegister(op.rd, 0), ecx); + shr(rcx, 32); + movd(regalloc.MapXRegister(op.rd, 1), ecx); + } + else #endif + { + mov(rax, (uintptr_t)op.rd.reg_ptr()); + mov(qword[rax], rcx); + } } } } @@ -434,36 +617,69 @@ public: { if (op.rs3.is_imm()) add(call_regs[0], op.rs3._imm); - else + else if (regalloc.IsAllocg(op.rs3)) add(call_regs[0], regalloc.MapRegister(op.rs3)); + else + { + mov(rax, (uintptr_t)op.rs3.reg_ptr()); + add(call_regs[0], dword[rax]); + } } if (size != 8) shil_param_to_host_reg(op.rs2, call_regs[1]); else { #ifdef EXPLODE_SPANS - verify(op.rs2.count() == 2 && regalloc.IsAllocf(op.rs2, 0) && regalloc.IsAllocf(op.rs2, 1)); - movd(call_regs[1], regalloc.MapXRegister(op.rs2, 1)); - shl(call_regs64[1], 32); - movd(eax, regalloc.MapXRegister(op.rs2, 0)); - or_(call_regs64[1], rax); -#else - mov(rax, (uintptr_t)op.rs2.reg_ptr()); - mov(call_regs64[1], qword[rax]); + if (op.rs2.count() == 2 && regalloc.IsAllocf(op.rs2, 0) && regalloc.IsAllocf(op.rs2, 1)) + { + movd(call_regs[1], regalloc.MapXRegister(op.rs2, 1)); + shl(call_regs64[1], 32); + movd(eax, regalloc.MapXRegister(op.rs2, 0)); + or_(call_regs64[1], rax); + } + else #endif + { + mov(rax, (uintptr_t)op.rs2.reg_ptr()); + mov(call_regs64[1], qword[rax]); + } } + if (mmu_enabled()) + mov(call_regs[2], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc - if (size == 1) - GenCall(WriteMem8); - else if (size == 2) - GenCall(WriteMem16); - else if (size == 4) - GenCall(WriteMem32); - else if (size == 8) - GenCall(WriteMem64); + if (size == 1) { + if (!mmu_enabled()) + GenCall(WriteMem8); + else + GenCall(WriteMemNoEx); + } + else if (size == 2) { + if (!mmu_enabled()) + GenCall(WriteMem16); + else + GenCall(WriteMemNoEx); + } + else if (size == 4) { + if (!mmu_enabled()) + GenCall(WriteMem32); + else + GenCall(WriteMemNoEx); + } + else if (size == 8) { + if (!mmu_enabled()) + GenCall(WriteMem64); + else + GenCall(WriteMemNoEx); + } else { die("1..8 bytes"); } + + if (mmu_enabled()) + { + test(dword[(void *)&exception_raised], 1); + jnz(exit_block, T_NEAR); + } } break; @@ -683,11 +899,52 @@ public: shr(rax, 32); mov(regalloc.MapRegister(op.rd2), eax); break; -/* + case shop_pref: - // TODO + { + Xbyak::Reg32 rn; + if (regalloc.IsAllocg(op.rs1)) + { + rn = regalloc.MapRegister(op.rs1); + } + else + { + mov(rax, (uintptr_t)op.rs1.reg_ptr()); + mov(eax, dword[rax]); + rn = eax; + } + mov(ecx, rn); + shr(ecx, 26); + cmp(ecx, 0x38); + Xbyak::Label no_sqw; + jne(no_sqw); + + mov(call_regs[0], rn); + if (mmu_enabled()) + { + mov(call_regs[1], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc + + GenCall(do_sqw_mmu_no_ex); + + test(dword[(void *)&exception_raised], 1); + jnz(exit_block, T_NEAR); + } + else + { + if (CCN_MMUCR.AT == 1) + { + GenCall(do_sqw_mmu); + } + else + { + mov(call_regs64[1], (uintptr_t)sq_both); + GenCall(&do_sqw_nommu_local); + } + } + L(no_sqw); + } break; -*/ + case shop_ext_s8: mov(eax, regalloc.MapRegister(op.rs1)); movsx(regalloc.MapRegister(op.rd), al); @@ -968,6 +1225,7 @@ public: die("Invalid block end type"); } + L(exit_block); #ifdef _WIN32 add(rsp, 0x28); #else @@ -978,6 +1236,7 @@ public: ready(); block->code = (DynarecCodeEntryPtr)getCode(); + block->host_code_size = getSize(); emit_Skip(getSize()); } @@ -1089,6 +1348,19 @@ private: void CheckBlock(RuntimeBlockInfo* block) { mov(call_regs[0], block->addr); +// if (mmu_enabled() && block->asid != 0xFFFFFFFF) +// { +// mov(rax, (uintptr_t)&CCN_PTEH.reg_data); +// cmp(byte[rax], block->asid); +// jne(reinterpret_cast(&ngen_blockcheckfail)); +// } + if (mmu_enabled()) + { + mov(rax, (uintptr_t)&next_pc); + cmp(dword[rax], block->vaddr); + jne(reinterpret_cast(&ngen_blockcheckfail)); + } + s32 sz=block->sh4_code_size; u32 sa=block->addr; @@ -1147,22 +1419,66 @@ private: void GenCall(Ret(*function)(Params...)) { #ifndef _WIN32 + bool xmm8_mapped = regalloc.IsMapped(xmm8, current_opid); + bool xmm9_mapped = regalloc.IsMapped(xmm9, current_opid); + bool xmm10_mapped = regalloc.IsMapped(xmm10, current_opid); + bool xmm11_mapped = regalloc.IsMapped(xmm11, current_opid); + // Need to save xmm registers as they are not preserved in linux/mach - sub(rsp, 16); - movd(ptr[rsp + 0], xmm8); - movd(ptr[rsp + 4], xmm9); - movd(ptr[rsp + 8], xmm10); - movd(ptr[rsp + 12], xmm11); + int offset = 0; + if (xmm8_mapped || xmm9_mapped || xmm10_mapped || xmm11_mapped) + { + sub(rsp, 4 * (xmm8_mapped + xmm9_mapped + xmm10_mapped + xmm11_mapped)); + if (xmm8_mapped) + { + movd(ptr[rsp + offset], xmm8); + offset += 4; + } + if (xmm9_mapped) + { + movd(ptr[rsp + offset], xmm9); + offset += 4; + } + if (xmm10_mapped) + { + movd(ptr[rsp + offset], xmm10); + offset += 4; + } + if (xmm11_mapped) + { + movd(ptr[rsp + offset], xmm11); + offset += 4; + } + } #endif call(function); #ifndef _WIN32 - movd(xmm8, ptr[rsp + 0]); - movd(xmm9, ptr[rsp + 4]); - movd(xmm10, ptr[rsp + 8]); - movd(xmm11, ptr[rsp + 12]); - add(rsp, 16); + if (xmm8_mapped || xmm9_mapped || xmm10_mapped || xmm11_mapped) + { + if (xmm11_mapped) + { + offset -= 4; + movd(xmm11, ptr[rsp + offset]); + } + if (xmm10_mapped) + { + offset -= 4; + movd(xmm10, ptr[rsp + offset]); + } + if (xmm9_mapped) + { + offset -= 4; + movd(xmm9, ptr[rsp + offset]); + } + if (xmm8_mapped) + { + offset -= 4; + movd(xmm8, ptr[rsp + offset]); + } + add(rsp, 4 * (xmm8_mapped + xmm9_mapped + xmm10_mapped + xmm11_mapped)); + } #endif } @@ -1183,17 +1499,36 @@ private: { if (param.is_r32f()) { - if (!reg.isXMM()) - movd((const Xbyak::Reg32 &)reg, regalloc.MapXRegister(param)); + if (regalloc.IsAllocf(param)) + { + if (!reg.isXMM()) + movd((const Xbyak::Reg32 &)reg, regalloc.MapXRegister(param)); + else + movss((const Xbyak::Xmm &)reg, regalloc.MapXRegister(param)); + } else - movss((const Xbyak::Xmm &)reg, regalloc.MapXRegister(param)); + { + mov(rax, (size_t)param.reg_ptr()); + mov((const Xbyak::Reg32 &)reg, dword[rax]); + } } else { - if (!reg.isXMM()) - mov((const Xbyak::Reg32 &)reg, regalloc.MapRegister(param)); + if (regalloc.IsAllocg(param)) + { + if (!reg.isXMM()) + mov((const Xbyak::Reg32 &)reg, regalloc.MapRegister(param)); + else + movd((const Xbyak::Xmm &)reg, regalloc.MapRegister(param)); + } else - movd((const Xbyak::Xmm &)reg, regalloc.MapRegister(param)); + { + mov(rax, (size_t)param.reg_ptr()); + if (!reg.isXMM()) + mov((const Xbyak::Reg32 &)reg, dword[rax]); + else + movss((const Xbyak::Xmm &)reg, dword[rax]); + } } } else @@ -1212,13 +1547,21 @@ private: else movd(regalloc.MapRegister(param), (const Xbyak::Xmm &)reg); } - else + else if (regalloc.IsAllocf(param)) { if (!reg.isXMM()) movd(regalloc.MapXRegister(param), (const Xbyak::Reg32 &)reg); else movss(regalloc.MapXRegister(param), (const Xbyak::Xmm &)reg); } + else + { + mov(rax, (size_t)param.reg_ptr()); + if (!reg.isXMM()) + mov(dword[rax], (const Xbyak::Reg32 &)reg); + else + movss(dword[rax], (const Xbyak::Xmm &)reg); + } } vector call_regs; @@ -1234,6 +1577,7 @@ private: X64RegAlloc regalloc; Xbyak::util::Cpu cpu; + size_t current_opid; static const u32 float_sign_mask; static const u32 float_abs_mask; static const f32 cvtf2i_pos_saturation; diff --git a/core/rec-x64/x64_regalloc.h b/core/rec-x64/x64_regalloc.h index 4e87c3489..7614ba1f6 100644 --- a/core/rec-x64/x64_regalloc.h +++ b/core/rec-x64/x64_regalloc.h @@ -71,6 +71,16 @@ struct X64RegAlloc : RegAllocnregf == xmm.getIdx() && all_spans[sid]->contains(opid)) + return true; + } + return false; + } + BlockCompiler *compiler; };