dynarec: WinCE support WIP

Only for the x64 dynarec atm
Bugs remaining
This commit is contained in:
Flyinghead 2019-03-25 11:53:13 +01:00
parent cae22b9fbb
commit ef43883fb5
15 changed files with 791 additions and 341 deletions

View File

@ -18,6 +18,7 @@
//#include "../intc.h"
//#include "../tmu.h"
#include "hw/sh4/sh4_mem.h"
#include "hw/sh4/sh4_sched.h"
#if HOST_OS==OS_LINUX && defined(DYNA_OPROF)
@ -86,6 +87,7 @@ u32 bm_gc_luc,bm_gcf_luc;
#define FPCA(x) ((DynarecCodeEntryPtr&)sh4rcb.fpcb[(x>>1)&FPCB_MASK])
// addr must be a physical address
DynarecCodeEntryPtr DYNACALL bm_GetCode(u32 addr)
{
//rdv_FailedToFindBlock_pc=addr;
@ -94,11 +96,51 @@ DynarecCodeEntryPtr DYNACALL bm_GetCode(u32 addr)
return (DynarecCodeEntryPtr)rv;
}
// addr must be a virtual address
DynarecCodeEntryPtr DYNACALL bm_GetCode2(u32 addr)
{
return (DynarecCodeEntryPtr)bm_GetCode(addr);
#ifndef NO_MMU
if (!mmu_enabled())
#endif
return (DynarecCodeEntryPtr)bm_GetCode(addr);
#ifndef NO_MMU
else
{
if (addr & 1)
{
switch (addr)
{
case 0xfffffde7: // GetTickCount
// This should make this syscall faster
r[0] = sh4_sched_now64() * 1000 / SH4_MAIN_CLOCK;
next_pc = pr;
addr = next_pc;
break;
default:
Do_Exception(addr, 0xE0, 0x100);
addr = next_pc;
break;
}
}
try {
u32 paddr;
bool shared;
mmu_instruction_translation(addr, paddr, shared);
return (DynarecCodeEntryPtr)bm_GetCode(paddr);
} catch (SH4ThrownException& ex) {
Do_Exception(addr, ex.expEvn, ex.callVect);
u32 paddr;
bool shared;
mmu_instruction_translation(next_pc, paddr, shared);
return (DynarecCodeEntryPtr)bm_GetCode(paddr);
}
}
#endif
}
// addr must be a physical address
RuntimeBlockInfo* DYNACALL bm_GetBlock(u32 addr)
{
DynarecCodeEntryPtr cde=bm_GetCode(addr);
@ -165,6 +207,22 @@ void bm_AddBlock(RuntimeBlockInfo* blk)
}
void bm_RemoveBlock(RuntimeBlockInfo* block)
{
verify((void*)bm_GetCode(block->addr) != (void*)ngen_FailedToFindBlock);
FPCA(block->addr) = ngen_FailedToFindBlock;
auto it = blkmap.find(block);
if (it != blkmap.end())
blkmap.erase(it);
for (auto it = all_blocks.begin(); it != all_blocks.end(); it++)
if (*it == block)
{
all_blocks.erase(it);
break;
}
delete block;
}
bool UDgreaterX ( RuntimeBlockInfo* elem1, RuntimeBlockInfo* elem2 )
{
return elem1->runs > elem2->runs;
@ -594,7 +652,8 @@ void print_blocks()
if (f)
{
fprintf(f,"block: %p\n",blk);
fprintf(f,"addr: %08X\n",blk->addr);
fprintf(f,"vaddr: %08X\n",blk->vaddr);
fprintf(f,"paddr: %08X\n",blk->addr);
fprintf(f,"hash: %s\n",blk->hash());
fprintf(f,"hash_rloc: %s\n",blk->hash(false,true));
fprintf(f,"code: %p\n",blk->code);
@ -624,17 +683,21 @@ void print_blocks()
if (gcode!=op->guest_offs)
{
gcode=op->guest_offs;
u32 rpc=blk->addr+gcode;
u16 op=ReadMem16(rpc);
u32 rpc=blk->vaddr+gcode;
try {
u16 op=IReadMem16(rpc);
char temp[128];
OpDesc[op]->Disassemble(temp,rpc,op);
char temp[128];
OpDesc[op]->Disassemble(temp,rpc,op);
fprintf(f,"//g:%s\n",temp);
fprintf(f,"//g: %04X %s\n", op, temp);
} catch (SH4ThrownException& ex) {
fprintf(f,"//g: ???? (page fault)\n");
}
}
string s=op->dissasm();
fprintf(f,"//il:%d:%d:%s\n",op->guest_offs,op->host_offs,s.c_str());
fprintf(f,"//il:%d:%d: %s\n",op->guest_offs,op->host_offs,s.c_str());
}
fprint_hex(f,"//h:",pucode,hcode,blk->host_code_size);

View File

@ -20,6 +20,8 @@ struct RuntimeBlockInfo: RuntimeBlockInfo_Core
void Setup(u32 pc,fpscr_t fpu_cfg);
const char* hash(bool full=true, bool reloc=false);
u32 vaddr;
u32 host_code_size; //in bytes
u32 sh4_code_size; //in bytes
@ -33,7 +35,8 @@ struct RuntimeBlockInfo: RuntimeBlockInfo_Core
u32 guest_cycles;
u32 guest_opcodes;
u32 host_opcodes;
bool has_fpu_op;
u32 asid; // if not 0xFFFFFFFF then private page belonging to this id
u32 BranchBlock; //if not 0xFFFFFFFF then jump target
u32 NextBlock; //if not 0xFFFFFFFF then next block (by position)
@ -94,6 +97,7 @@ RuntimeBlockInfo* bm_GetStaleBlock(void* dynarec_code);
RuntimeBlockInfo* DYNACALL bm_GetBlock(u32 addr);
void bm_AddBlock(RuntimeBlockInfo* blk);
void bm_RemoveBlock(RuntimeBlockInfo* block);
void bm_Reset();
void bm_Periodical_1s();
void bm_Periodical_14k();

View File

@ -82,7 +82,8 @@ void Emit(shilop op,shil_param rd=shil_param(),shil_param rs1=shil_param(),shil_
sp.rs1=(rs1);
sp.rs2=(rs2);
sp.rs3=(rs3);
sp.guest_offs=state.cpu.rpc-blk->addr;
sp.guest_offs = state.cpu.rpc - blk->vaddr;
sp.delay_slot = state.cpu.is_delayslot;
blk->oplist.push_back(sp);
}
@ -96,22 +97,14 @@ void dec_fallback(u32 op)
opcd.rs2=shil_param(FMT_IMM,state.cpu.rpc+2);
opcd.rs3=shil_param(FMT_IMM,op);
opcd.guest_offs = state.cpu.rpc - blk->vaddr;
opcd.delay_slot = state.cpu.is_delayslot;
blk->oplist.push_back(opcd);
}
#if 1
#define FMT_I32 ERROR!WRONG++!!
#define FMT_F32 ERROR!WRONG++!!
#define FMT_F32 ERROR!WRONG++!!
#define FMT_TYPE ERROR!WRONG++!!
#define FMT_REG ERROR!WRONG++!!
#define FMT_IMM ERROR!WRONG++!!
#define FMT_PARAM ERROR!WRONG++!!
#define FMT_MASK ERROR!WRONG++!!
void dec_DynamicSet(u32 regbase,u32 offs=0)
{
if (offs==0)
@ -277,6 +270,7 @@ sh4dec(i0000_0000_0001_1011)
}
//ldc.l @<REG_N>+,SR
/*
sh4dec(i0100_nnnn_0000_0111)
{
/*
@ -290,9 +284,10 @@ sh4dec(i0100_nnnn_0000_0111)
{
//FIXME only if interrupts got on .. :P
UpdateINTC();
}*/
}* /
dec_End(0xFFFFFFFF,BET_StaticIntr,false);
}
*/
//ldc <REG_N>,SR
sh4dec(i0100_nnnn_0000_1110)
@ -309,6 +304,7 @@ sh4dec(i0000_0000_0000_1001)
{
}
//fschg
sh4dec(i1111_0011_1111_1101)
{
//fpscr.SZ is bit 20
@ -594,7 +590,7 @@ u32 MatchDiv32(u32 pc , Sh4RegType &reg1,Sh4RegType &reg2 , Sh4RegType &reg3)
u32 match=1;
for (int i=0;i<32;i++)
{
u16 opcode=ReadMem16(v_pc);
u16 opcode=IReadMem16(v_pc);
v_pc+=2;
if ((opcode&MASK_N)==ROTCL_KEY)
{
@ -610,7 +606,7 @@ u32 MatchDiv32(u32 pc , Sh4RegType &reg1,Sh4RegType &reg2 , Sh4RegType &reg3)
break;
}
opcode=ReadMem16(v_pc);
opcode=IReadMem16(v_pc);
v_pc+=2;
if ((opcode&MASK_N_M)==DIV1_KEY)
{
@ -684,11 +680,11 @@ bool MatchDiv32s(u32 op,u32 pc)
else //no match ...
{
/*
printf("%04X\n",ReadMem16(pc-2));
printf("%04X\n",ReadMem16(pc-0));
printf("%04X\n",ReadMem16(pc+2));
printf("%04X\n",ReadMem16(pc+4));
printf("%04X\n",ReadMem16(pc+6));*/
printf("%04X\n",IReadMem16(pc-2));
printf("%04X\n",IReadMem16(pc-0));
printf("%04X\n",IReadMem16(pc+2));
printf("%04X\n",IReadMem16(pc+4));
printf("%04X\n",IReadMem16(pc+6));*/
return false;
}
}
@ -697,11 +693,11 @@ bool MatchDiv32s(u32 op,u32 pc)
//This ended up too rare (and too hard to match)
bool MatchDiv0S_0(u32 pc)
{
if (ReadMem16(pc+0)==0x233A && //XOR r3,r3
ReadMem16(pc+2)==0x2137 && //DIV0S r3,r1
ReadMem16(pc+4)==0x322A && //SUBC r2,r2
ReadMem16(pc+6)==0x313A && //SUBC r3,r1
(ReadMem16(pc+8)&0xF00F)==0x2007) //DIV0S x,x
if (IReadMem16(pc+0)==0x233A && //XOR r3,r3
IReadMem16(pc+2)==0x2137 && //DIV0S r3,r1
IReadMem16(pc+4)==0x322A && //SUBC r2,r2
IReadMem16(pc+6)==0x313A && //SUBC r3,r1
(IReadMem16(pc+8)&0xF00F)==0x2007) //DIV0S x,x
return true;
else
return false;
@ -829,7 +825,7 @@ bool dec_generic(u32 op)
bool update_after=false;
if ((s32)e<0)
{
if (rs1._reg!=rs2._reg) //reg shouldn't be updated if its written
if (rs1._reg!=rs2._reg && !mmu_enabled()) //reg shouldn't be updated if its written
{
Emit(shop_sub,rs1,rs1,mk_imm(-e));
}
@ -998,7 +994,7 @@ void state_Setup(u32 rpc,fpscr_t fpu_cfg)
state.cpu.FPR64=fpu_cfg.PR;
state.cpu.FSZ64=fpu_cfg.SZ;
state.cpu.RoundToZero=fpu_cfg.RM==1;
verify(fpu_cfg.RM<2);
//verify(fpu_cfg.RM<2); // Happens with many wince games (set to 3)
//what about fp/fs ?
state.NextOp=NDO_NextOp;
@ -1014,7 +1010,7 @@ void state_Setup(u32 rpc,fpscr_t fpu_cfg)
void dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles)
{
blk=rbi;
state_Setup(blk->addr,blk->fpu_cfg);
state_Setup(blk->vaddr, blk->fpu_cfg);
ngen_GetFeatures(&state.ngen);
blk->guest_opcodes=0;
@ -1057,7 +1053,7 @@ void dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles)
}
*/
u32 op=ReadMem16(state.cpu.rpc);
u32 op=IReadMem16(state.cpu.rpc);
if (op==0 && state.cpu.is_delayslot)
{
printf("Delayslot 0 hack!\n");
@ -1069,6 +1065,8 @@ void dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles)
blk->guest_cycles+=0;
else
blk->guest_cycles+=CPU_RATIO;
if (OpDesc[op]->IsFloatingPoint())
blk->has_fpu_op = true;
verify(!(state.cpu.is_delayslot && OpDesc[op]->SetPC()));
if (state.ngen.OnlyDynamicEnds || !OpDesc[op]->rec_oph)
@ -1116,7 +1114,7 @@ void dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles)
}
_end:
blk->sh4_code_size=state.cpu.rpc-blk->addr;
blk->sh4_code_size=state.cpu.rpc-blk->vaddr;
blk->NextBlock=state.NextAddr;
blk->BranchBlock=state.JumpAddr;
blk->BlockType=state.BlockType;
@ -1150,12 +1148,12 @@ _end:
//Small-n-simple idle loop detector :p
if (state.info.has_readm && !state.info.has_writem && !state.info.has_fpu && blk->guest_opcodes<6)
{
if (blk->BlockType==BET_Cond_0 || (blk->BlockType==BET_Cond_1 && blk->BranchBlock<=blk->addr))
if (blk->BlockType==BET_Cond_0 || (blk->BlockType==BET_Cond_1 && blk->BranchBlock<=blk->vaddr))
{
blk->guest_cycles*=3;
}
if (blk->BranchBlock==blk->addr)
if (blk->BranchBlock==blk->vaddr)
{
blk->guest_cycles*=10;
}

View File

@ -14,6 +14,7 @@
#include "hw/sh4/sh4_interrupts.h"
#include "hw/sh4/sh4_mem.h"
#include "hw/sh4/modules/mmu.h"
#include "hw/pvr/pvr_mem.h"
#include "hw/aica/aica_if.h"
#include "hw/gdrom/gdrom_if.h"
@ -118,7 +119,7 @@ u32 emit_FreeSpace()
return CODE_SIZE-LastAddr;
}
// pc must be a physical address
bool DoCheck(u32 pc)
{
if (IsOnRam(pc))
@ -202,8 +203,19 @@ void RuntimeBlockInfo::Setup(u32 rpc,fpscr_t rfpu_cfg)
has_jcond=false;
BranchBlock=NextBlock=csc_RetCache=0xFFFFFFFF;
BlockType=BET_SCL_Intr;
has_fpu_op = false;
asid = 0xFFFFFFFF;
addr=rpc;
vaddr = rpc;
if (mmu_enabled())
{
bool shared;
mmu_instruction_translation(vaddr, addr, shared);
if (addr != vaddr && !shared)
asid = CCN_PTEH.ASID;
}
else
addr = vaddr;
fpu_cfg=rfpu_cfg;
oplist.clear();
@ -219,37 +231,44 @@ DynarecCodeEntryPtr rdv_CompilePC()
if (emit_FreeSpace()<16*1024 || pc==0x8c0000e0 || pc==0xac010000 || pc==0xac008300)
recSh4_ClearCache();
RuntimeBlockInfo* rv=0;
do
{
RuntimeBlockInfo* rbi = ngen_AllocateBlock();
if (rv==0) rv=rbi;
RuntimeBlockInfo* rbi = ngen_AllocateBlock();
#ifndef NO_MMU
try {
#endif
rbi->Setup(pc,fpscr);
bool do_opts=((rbi->addr&0x3FFFFFFF)>0x0C010100);
rbi->staging_runs=do_opts?100:-100;
ngen_Compile(rbi,DoCheck(rbi->addr),(pc&0xFFFFFF)==0x08300 || (pc&0xFFFFFF)==0x10000,false,do_opts);
verify(rbi->code!=0);
bm_AddBlock(rbi);
#ifndef NO_MMU
} catch (SH4ThrownException& ex) {
delete rbi;
throw ex;
}
#endif
if (rbi->BlockType==BET_Cond_0 || rbi->BlockType==BET_Cond_1)
pc=rbi->NextBlock;
else
pc=0;
} while(false && pc);
return rv->code;
return rbi->code;
}
DynarecCodeEntryPtr DYNACALL rdv_FailedToFindBlock(u32 pc)
{
//printf("rdv_FailedToFindBlock ~ %08X\n",pc);
next_pc=pc;
#ifndef NO_MMU
try {
#endif
next_pc=pc;
return rdv_CompilePC();
return rdv_CompilePC();
#ifndef NO_MMU
} catch (SH4ThrownException& ex) {
Do_Exception(pc, ex.expEvn, ex.callVect);
return bm_GetCode2(next_pc);
}
#endif
}
static void ngen_FailedToFindBlock_internal() {
@ -258,9 +277,6 @@ static void ngen_FailedToFindBlock_internal() {
void (*ngen_FailedToFindBlock)() = &ngen_FailedToFindBlock_internal;
extern u32 rebuild_counter;
u32 DYNACALL rdv_DoInterrupts_pc(u32 pc) {
next_pc = pc;
UpdateINTC();
@ -268,37 +284,39 @@ u32 DYNACALL rdv_DoInterrupts_pc(u32 pc) {
//We can only safely relocate/etc stuff here, as in other generic update cases
//There's a RET, meaning the code can't move around
//Interrupts happen at least 50 times/second, so its not a problem ..
/*
if (rebuild_counter == 0)
{
// TODO: Why is this commented, etc.
//bm_Rebuild();
}
*/
return next_pc;
}
void bm_Rebuild();
u32 DYNACALL rdv_DoInterrupts(void* block_cpde)
{
RuntimeBlockInfo* rbi = bm_GetBlock(block_cpde);
return rdv_DoInterrupts_pc(rbi->addr);
return rdv_DoInterrupts_pc(rbi->vaddr);
}
DynarecCodeEntryPtr DYNACALL rdv_BlockCheckFail(u32 pc)
// addr must be the physical address of the start of the block
DynarecCodeEntryPtr DYNACALL rdv_BlockCheckFail(u32 addr)
{
next_pc=pc;
recSh4_ClearCache();
RuntimeBlockInfo *block = bm_GetBlock(addr);
bm_RemoveBlock(block);
return rdv_CompilePC();
}
DynarecCodeEntryPtr rdv_FindCode()
{
DynarecCodeEntryPtr rv=bm_GetCode(next_pc);
if (rv==ngen_FailedToFindBlock)
return 0;
return rv;
}
//DynarecCodeEntryPtr rdv_FindCode()
//{
// DynarecCodeEntryPtr rv=bm_GetCode(next_pc);
// if (rv==ngen_FailedToFindBlock)
// return 0;
//
// return rv;
//}
DynarecCodeEntryPtr rdv_FindOrCompile()
{

View File

@ -70,7 +70,7 @@ DynarecCodeEntryPtr DYNACALL rdv_BlockCheckFail(u32 pc);
//Called to compile code @pc
DynarecCodeEntryPtr rdv_CompilePC();
//Returns 0 if there is no code @pc, code ptr otherwise
DynarecCodeEntryPtr rdv_FindCode();
//DynarecCodeEntryPtr rdv_FindCode();
//Finds or compiles code @pc
DynarecCodeEntryPtr rdv_FindOrCompile();

View File

@ -368,7 +368,8 @@ struct RegAlloc
verify(opid>=0 && opid<block->oplist.size());
shil_opcode* op=&block->oplist[opid];
return op->op == shop_sync_fpscr || op->op == shop_sync_sr || op->op == shop_ifb;
return op->op == shop_sync_fpscr || op->op == shop_sync_sr || op->op == shop_ifb
|| (mmu_enabled() && (op->op == shop_readm || op->op == shop_writem || op->op == shop_pref));
}
bool IsRegWallOp(RuntimeBlockInfo* block, int opid, bool is_fpr)
@ -496,6 +497,12 @@ struct RegAlloc
{
fp=true;
}
else
{
all = true;
fp = true;
gpr_b = true;
}
if (all)
{
@ -1119,6 +1126,30 @@ struct RegAlloc
}
}
void BailOut(u32 opid)
{
for (u32 sid = 0; sid < all_spans.size(); sid++)
{
RegSpan* spn = all_spans[sid];
if (spn->end >= opid && spn->start < opid && spn->writeback)
{
if (spn->fpr)
{
//printf("Op %d: Writing back f%d from %d\n",current_opid,spn->regstart,spn->nregf);
writeback_fpu++;
Writeback_FPU(spn->regstart,spn->nregf);
}
else
{
//printf("Op %d: Writing back r%d from %d\n",current_opid,spn->regstart,spn->nreg);
writeback_gpr++;
Writeback(spn->regstart,spn->nreg);
}
}
}
}
void Cleanup()
{
writeback_gpr=writeback_fpu=0;

View File

@ -151,6 +151,7 @@ struct shil_opcode
u16 host_offs;
u16 guest_offs;
bool delay_slot;
string dissasm();
};

View File

@ -173,11 +173,7 @@ void ExecuteDelayslot()
#if !defined(NO_MMU)
}
catch (SH4ThrownException& ex) {
ex.epc -= 2;
if (ex.expEvn == 0x800) // FPU disable exception
ex.expEvn = 0x820; // Slot FPU disable exception
else if (ex.expEvn == 0x180) // Illegal instruction exception
ex.expEvn = 0x1A0; // Slot illegal instruction exception
AdjustDelaySlotException(ex);
//printf("Delay slot exception\n");
throw ex;
}

View File

@ -50,11 +50,8 @@ void CCN_MMUCR_write(u32 addr, u32 value)
if (temp.TI != 0)
{
for (u32 i = 0; i < 4; i++)
ITLB[i].Data.V = 0;
for (u32 i = 0; i < 64; i++)
UTLB[i].Data.V = 0;
//sh4_cpu.ResetCache();
mmu_flush_table();
temp.TI = 0;
}
@ -63,6 +60,7 @@ void CCN_MMUCR_write(u32 addr, u32 value)
if (mmu_changed_state)
{
//printf("<*******>MMU Enabled , ONLY SQ remaps work<*******>\n");
sh4_cpu.ResetCache();
mmu_set_state();
}
}

View File

@ -201,7 +201,7 @@ void mmu_raise_exception(u32 mmu_error, u32 address, u32 am)
return;
break;
//TLB Multyhit
//TLB Multihit
case MMU_ERROR_TLB_MHIT:
printf("MMU_ERROR_TLB_MHIT @ 0x%X\n", address);
break;
@ -239,19 +239,10 @@ void mmu_raise_exception(u32 mmu_error, u32 address, u32 am)
else //IADDERR - Instruction Address Error
{
#ifdef TRACE_WINCE_SYSCALLS
bool skip_exception = false;
if (!print_wince_syscall(address, skip_exception))
printf_mmu("MMU_ERROR_BADADDR(i) 0x%X\n", address);
//if (!skip_exception)
RaiseException(0xE0, 0x100);
//else {
// SH4ThrownException ex = { 0, 0, 0 };
// throw ex;
//}
#else
printf_mmu("MMU_ERROR_BADADDR(i) 0x%X\n", address);
RaiseException(0xE0, 0x100);
if (!print_wince_syscall(address))
#endif
printf_mmu("MMU_ERROR_BADADDR(i) 0x%X\n", address);
RaiseException(0xE0, 0x100);
return;
}
printf_mmu("MMU_ERROR_BADADDR(d) 0x%X, handled\n", address);
@ -291,9 +282,10 @@ bool mmu_match(u32 va, CCN_PTEH_type Address, CCN_PTEL_type Data)
return false;
}
//Do a full lookup on the UTLB entry's
template<bool internal>
u32 mmu_full_lookup(u32 va, u32& idx, u32& rv)
u32 mmu_full_lookup(u32 va, const TLB_Entry** tlb_entry_ret, u32& rv)
{
if (!internal)
{
@ -302,21 +294,21 @@ u32 mmu_full_lookup(u32 va, u32& idx, u32& rv)
CCN_MMUCR.URC = 0;
}
u32 entry = 0;
u32 entry = -1;
u32 nom = 0;
for (u32 i = 0; i<64; i++)
{
//verify(sz!=0);
if (mmu_match(va, UTLB[i].Address, UTLB[i].Data))
TLB_Entry *tlb_entry = &UTLB[i];
if (mmu_match(va, tlb_entry->Address, tlb_entry->Data))
{
entry = i;
nom++;
u32 sz = UTLB[i].Data.SZ1 * 2 + UTLB[i].Data.SZ0;
u32 sz = tlb_entry->Data.SZ1 * 2 + tlb_entry->Data.SZ0;
u32 mask = mmu_mask[sz];
//VPN->PPN | low bits
rv = ((UTLB[i].Data.PPN << 10)&mask) | (va&(~mask));
rv = ((tlb_entry->Data.PPN << 10) & mask) | (va & (~mask));
}
}
@ -332,7 +324,7 @@ u32 mmu_full_lookup(u32 va, u32& idx, u32& rv)
}
}
idx = entry;
*tlb_entry_ret = &UTLB[entry];
return MMU_ERROR_NONE;
}
@ -364,15 +356,15 @@ u32 mmu_full_SQ(u32 va, u32& rv)
{
//Address=Dest&0xFFFFFFE0;
u32 entry;
u32 lookup = mmu_full_lookup(va, entry, rv);
const TLB_Entry *entry;
u32 lookup = mmu_full_lookup(va, &entry, rv);
rv &= ~31;//lower 5 bits are forced to 0
if (lookup != MMU_ERROR_NONE)
return lookup;
u32 md = UTLB[entry].Data.PR >> 1;
u32 md = entry->Data.PR >> 1;
//Priv mode protection
if ((md == 0) && sr.MD == 0)
@ -383,9 +375,9 @@ u32 mmu_full_SQ(u32 va, u32& rv)
//Write Protection (Lock or FW)
if (translation_type == MMU_TT_DWRITE)
{
if ((UTLB[entry].Data.PR & 1) == 0)
if ((entry->Data.PR & 1) == 0)
return MMU_ERROR_PROTECTED;
else if (UTLB[entry].Data.D == 0)
else if (entry->Data.D == 0)
return MMU_ERROR_FIRSTWRITE;
}
}
@ -395,48 +387,50 @@ u32 mmu_full_SQ(u32 va, u32& rv)
}
return MMU_ERROR_NONE;
}
template<u32 translation_type>
u32 mmu_data_translation(u32 va, u32& rv)
template<u32 translation_type, typename T>
void mmu_data_translation(u32 va, u32& rv)
{
//*opt notice* this could be only checked for writes, as reads are invalid
if ((va & 0xFC000000) == 0xE0000000)
if (va & (sizeof(T) - 1))
mmu_raise_exception(MMU_ERROR_BADADDR, va, translation_type);
if (translation_type == MMU_TT_DWRITE)
{
u32 lookup = mmu_full_SQ<translation_type>(va, rv);
if (lookup != MMU_ERROR_NONE)
return lookup;
rv = va; //SQ writes are not translated, only write backs are.
return MMU_ERROR_NONE;
if ((va & 0xFC000000) == 0xE0000000)
{
u32 lookup = mmu_full_SQ<translation_type>(va, rv);
if (lookup != MMU_ERROR_NONE)
mmu_raise_exception(lookup, va, translation_type);
rv = va; //SQ writes are not translated, only write backs are.
return;
}
}
if ((sr.MD == 0) && (va & 0x80000000) != 0)
{
//if on kernel, and not SQ addr -> error
return MMU_ERROR_BADADDR;
mmu_raise_exception(MMU_ERROR_BADADDR, va, translation_type);
}
if (sr.MD == 1 && ((va & 0xFC000000) == 0x7C000000))
{
rv = va;
return MMU_ERROR_NONE;
return;
}
if ((CCN_MMUCR.AT == 0) || (fast_reg_lut[va >> 29] != 0))
// Not called if CCN_MMUCR.AT == 0
//if ((CCN_MMUCR.AT == 0) || (fast_reg_lut[va >> 29] != 0))
if (fast_reg_lut[va >> 29] != 0)
{
rv = va;
return MMU_ERROR_NONE;
return;
}
/*
if ( CCN_CCR.ORA && ((va&0xFC000000)==0x7C000000))
{
verify(false);
return va;
}
*/
u32 entry;
u32 lookup = mmu_full_lookup(va, entry, rv);
const TLB_Entry *entry;
u32 lookup = mmu_full_lookup(va, &entry, rv);
if (lookup != MMU_ERROR_NONE)
return lookup;
mmu_raise_exception(lookup, va, translation_type);
#ifdef TRACE_WINCE_SYSCALLS
if (unresolved_unicode_string != 0)
@ -449,13 +443,13 @@ u32 mmu_data_translation(u32 va, u32& rv)
}
#endif
u32 md = UTLB[entry].Data.PR >> 1;
u32 md = entry->Data.PR >> 1;
//0X & User mode-> protection violation
//Priv mode protection
if ((md == 0) && sr.MD == 0)
{
return MMU_ERROR_PROTECTED;
mmu_raise_exception(MMU_ERROR_PROTECTED, va, translation_type);
}
//X0 -> read olny
@ -464,27 +458,33 @@ u32 mmu_data_translation(u32 va, u32& rv)
//Write Protection (Lock or FW)
if (translation_type == MMU_TT_DWRITE)
{
if ((UTLB[entry].Data.PR & 1) == 0)
return MMU_ERROR_PROTECTED;
else if (UTLB[entry].Data.D == 0)
return MMU_ERROR_FIRSTWRITE;
if ((entry->Data.PR & 1) == 0)
mmu_raise_exception(MMU_ERROR_PROTECTED, va, translation_type);
else if (entry->Data.D == 0)
mmu_raise_exception(MMU_ERROR_FIRSTWRITE, va, translation_type);
}
return MMU_ERROR_NONE;
}
u32 mmu_instruction_translation(u32 va, u32& rv)
template void mmu_data_translation<MMU_TT_DREAD, u16>(u32 va, u32& rv);
template void mmu_data_translation<MMU_TT_DREAD, u32>(u32 va, u32& rv);
void mmu_instruction_translation(u32 va, u32& rv, bool& shared)
{
if (va & 1)
{
mmu_raise_exception(MMU_ERROR_BADADDR, va, MMU_TT_IREAD);
}
if ((sr.MD == 0) && (va & 0x80000000) != 0)
{
//if SQ disabled , or if if SQ on but out of SQ mem then BAD ADDR ;)
if (va >= 0xE0000000)
return MMU_ERROR_BADADDR;
mmu_raise_exception(MMU_ERROR_BADADDR, va, MMU_TT_IREAD);
}
if ((CCN_MMUCR.AT == 0) || (fast_reg_lut[va >> 29] != 0))
{
rv = va;
return MMU_ERROR_NONE;
return;
}
bool mmach = false;
@ -509,6 +509,7 @@ retry_ITLB_Match:
nom++;
//VPN->PPN | low bits
rv = ((ITLB[i].Data.PPN << 10)&mask) | (va&(~mask));
shared = ITLB[i].Data.SH == 1;
}
}
}
@ -516,13 +517,15 @@ retry_ITLB_Match:
if (entry == 4)
{
verify(mmach == false);
u32 lookup = mmu_full_lookup(va, entry, rv);
const TLB_Entry *tlb_entry;
u32 lookup = mmu_full_lookup(va, &tlb_entry, rv);
if (lookup != MMU_ERROR_NONE)
return lookup;
mmu_raise_exception(lookup, va, MMU_TT_IREAD);
u32 replace_index = ITLB_LRU_USE[CCN_MMUCR.LRUI];
verify(replace_index != 0xFFFFFFFF);
ITLB[replace_index] = UTLB[entry];
ITLB[replace_index] = *tlb_entry;
entry = replace_index;
ITLB_Sync(entry);
mmach = true;
@ -532,11 +535,11 @@ retry_ITLB_Match:
{
if (nom)
{
return MMU_ERROR_TLB_MHIT;
mmu_raise_exception(MMU_ERROR_TLB_MHIT, va, MMU_TT_IREAD);
}
else
{
return MMU_ERROR_TLB_MISS;
mmu_raise_exception(MMU_ERROR_TLB_MISS, va, MMU_TT_IREAD);
}
}
@ -549,10 +552,8 @@ retry_ITLB_Match:
//Priv mode protection
if ((md == 0) && sr.MD == 0)
{
return MMU_ERROR_PROTECTED;
mmu_raise_exception(MMU_ERROR_PROTECTED, va, MMU_TT_IREAD);
}
return MMU_ERROR_NONE;
}
void mmu_set_state()
@ -570,6 +571,7 @@ void mmu_set_state()
WriteMem16 = &mmu_WriteMem16;
WriteMem32 = &mmu_WriteMem32;
WriteMem64 = &mmu_WriteMem64;
mmu_flush_table();
}
else
{
@ -617,149 +619,77 @@ void MMU_term()
{
}
void mmu_flush_table()
{
//printf("MMU tables flushed\n");
ITLB[0].Data.V = 0;
ITLB[1].Data.V = 0;
ITLB[2].Data.V = 0;
ITLB[3].Data.V = 0;
for (u32 i = 0; i < 64; i++)
UTLB[i].Data.V = 0;
}
u8 DYNACALL mmu_ReadMem8(u32 adr)
{
u32 addr;
u32 tv = mmu_data_translation<MMU_TT_DREAD>(adr, addr);
if (tv == 0)
return _vmem_ReadMem8(addr);
else
mmu_raise_exception(tv, adr, MMU_TT_DREAD);
return 0;
mmu_data_translation<MMU_TT_DREAD, u8>(adr, addr);
return _vmem_ReadMem8(addr);
}
u16 DYNACALL mmu_ReadMem16(u32 adr)
{
if (adr & 1)
{
mmu_raise_exception(MMU_ERROR_BADADDR, adr, MMU_TT_DREAD);
return 0;
}
u32 addr;
u32 tv = mmu_data_translation<MMU_TT_DREAD>(adr, addr);
if (tv == 0)
return _vmem_ReadMem16(addr);
else
mmu_raise_exception(tv, adr, MMU_TT_DREAD);
return 0;
mmu_data_translation<MMU_TT_DREAD, u16>(adr, addr);
return _vmem_ReadMem16(addr);
}
u16 DYNACALL mmu_IReadMem16(u32 adr)
u16 DYNACALL mmu_IReadMem16(u32 vaddr)
{
if (adr & 1)
{
mmu_raise_exception(MMU_ERROR_BADADDR, adr, MMU_TT_IREAD);
return 0;
}
u32 addr;
u32 tv = mmu_instruction_translation(adr, addr);
if (tv == 0)
return _vmem_ReadMem16(addr);
else
mmu_raise_exception(tv, adr, MMU_TT_IREAD);
return 0;
bool shared;
mmu_instruction_translation(vaddr, addr, shared);
return _vmem_ReadMem16(addr);
}
u32 DYNACALL mmu_ReadMem32(u32 adr)
{
if (adr & 3)
{
mmu_raise_exception(MMU_ERROR_BADADDR, adr, MMU_TT_DREAD);
return 0;
}
u32 addr;
u32 tv = mmu_data_translation<MMU_TT_DREAD>(adr, addr);
if (tv == 0)
return _vmem_ReadMem32(addr);
else
mmu_raise_exception(tv, adr, MMU_TT_DREAD);
return 0;
mmu_data_translation<MMU_TT_DREAD, u32>(adr, addr);
return _vmem_ReadMem32(addr);
}
u64 DYNACALL mmu_ReadMem64(u32 adr)
{
if (adr & 7)
{
mmu_raise_exception(MMU_ERROR_BADADDR, adr, MMU_TT_DREAD);
return 0;
}
u32 addr;
u32 tv = mmu_data_translation<MMU_TT_DREAD>(adr, addr);
if (tv == 0)
{
return _vmem_ReadMem64(addr);
}
else
mmu_raise_exception(tv, adr, MMU_TT_DREAD);
return 0;
mmu_data_translation<MMU_TT_DREAD, u64>(adr, addr);
return _vmem_ReadMem64(addr);
}
void DYNACALL mmu_WriteMem8(u32 adr, u8 data)
{
u32 addr;
u32 tv = mmu_data_translation<MMU_TT_DWRITE>(adr, addr);
if (tv == 0)
{
_vmem_WriteMem8(addr, data);
return;
}
else
mmu_raise_exception(tv, adr, MMU_TT_DWRITE);
mmu_data_translation<MMU_TT_DWRITE, u8>(adr, addr);
_vmem_WriteMem8(addr, data);
}
void DYNACALL mmu_WriteMem16(u32 adr, u16 data)
{
if (adr & 1)
{
mmu_raise_exception(MMU_ERROR_BADADDR, adr, MMU_TT_DWRITE);
return;
}
u32 addr;
u32 tv = mmu_data_translation<MMU_TT_DWRITE>(adr, addr);
if (tv == 0)
{
_vmem_WriteMem16(addr, data);
return;
}
else
mmu_raise_exception(tv, adr, MMU_TT_DWRITE);
mmu_data_translation<MMU_TT_DWRITE, u16>(adr, addr);
_vmem_WriteMem16(addr, data);
}
void DYNACALL mmu_WriteMem32(u32 adr, u32 data)
{
if (adr & 3)
{
mmu_raise_exception(MMU_ERROR_BADADDR, adr, MMU_TT_DWRITE);
return;
}
u32 addr;
u32 tv = mmu_data_translation<MMU_TT_DWRITE>(adr, addr);
if (tv == 0)
{
_vmem_WriteMem32(addr, data);
return;
}
else
mmu_raise_exception(tv, adr, MMU_TT_DWRITE);
mmu_data_translation<MMU_TT_DWRITE, u32>(adr, addr);
_vmem_WriteMem32(addr, data);
}
void DYNACALL mmu_WriteMem64(u32 adr, u64 data)
{
if (adr & 7)
{
mmu_raise_exception(MMU_ERROR_BADADDR, adr, MMU_TT_DWRITE);
return;
}
u32 addr;
u32 tv = mmu_data_translation<MMU_TT_DWRITE>(adr, addr);
if (tv == 0)
{
_vmem_WriteMem64(addr, data);
return;
}
else
mmu_raise_exception(tv, adr, MMU_TT_DWRITE);
mmu_data_translation<MMU_TT_DWRITE, u64>(adr, addr);
_vmem_WriteMem64(addr, data);
}
bool mmu_TranslateSQW(u32 adr, u32* out)

View File

@ -10,7 +10,7 @@
#define MMU_ERROR_NONE 0
//TLB miss
#define MMU_ERROR_TLB_MISS 1
//TLB Multyhit
//TLB Multihit
#define MMU_ERROR_TLB_MHIT 2
//Mem is read/write protected (depends on translation type)
#define MMU_ERROR_PROTECTED 3
@ -21,15 +21,6 @@
//Can't Execute
#define MMU_ERROR_EXECPROT 6
//Translation Types
//Opcode read
#define MMU_TT_IREAD 0
//Data write
#define MMU_TT_DWRITE 1
//Data write
#define MMU_TT_DREAD 2
//Do an mmu lookup for va , returns translation status , if MMU_ERROR_NONE , rv is set to translated index
extern u32 mmu_error_TT;
void MMU_Init();

View File

@ -7,11 +7,13 @@
#define SH_CURTHREAD 1
#define SH_CURPROC 2
extern const u32 mmu_mask[4];
static bool read_mem32(u32 addr, u32& data)
{
u32 pa;
u32 idx;
if (mmu_full_lookup<true>(addr, idx, pa) != MMU_ERROR_NONE)
const TLB_Entry *entry;
if (mmu_full_lookup<true>(addr, &entry, pa) != MMU_ERROR_NONE)
return false;
data = ReadMem32_nommu(pa);
return true;
@ -20,8 +22,8 @@ static bool read_mem32(u32 addr, u32& data)
static bool read_mem16(u32 addr, u16& data)
{
u32 pa;
u32 idx;
if (mmu_full_lookup<true>(addr, idx, pa) != MMU_ERROR_NONE)
const TLB_Entry *entry;
if (mmu_full_lookup<true>(addr, &entry, pa) != MMU_ERROR_NONE)
return false;
data = ReadMem16_nommu(pa);
return true;
@ -30,8 +32,8 @@ static bool read_mem16(u32 addr, u16& data)
static bool read_mem8(u32 addr, u8& data)
{
u32 pa;
u32 idx;
if (mmu_full_lookup<true>(addr, idx, pa) != MMU_ERROR_NONE)
const TLB_Entry *entry;
if (mmu_full_lookup<true>(addr, &entry, pa) != MMU_ERROR_NONE)
return false;
data = ReadMem8_nommu(pa);
return true;
@ -246,9 +248,8 @@ std::string get_ascii_string(u32 addr)
return str;
}
static bool print_wince_syscall(u32 address, bool &skip_exception)
static bool print_wince_syscall(u32 address)
{
skip_exception = false;
if (address & 1)
{
if (address == 0xfffffd5d || address == 0xfffffd05) // Sleep, QueryPerformanceCounter
@ -329,3 +330,59 @@ static bool print_wince_syscall(u32 address, bool &skip_exception)
return false;
}
static bool wince_resolve_address(u32 va, TLB_Entry &entry)
{
// WinCE hack
if ((va & 0x80000000) == 0)
{
u32 page_group = ReadMem32_nommu(CCN_TTB + ((va >> 25) << 2));
u32 page = ((va >> 16) & 0x1ff) << 2;
u32 paddr = ReadMem32_nommu(page_group + page);
if (paddr & 0x80000000)
{
u32 whatever = ReadMem32_nommu(r_bank[4] + 0x14);
if (whatever != ReadMem32_nommu(paddr))
{
paddr += 12;
u32 ptel = ReadMem32_nommu(paddr + ((va >> 10) & 0x3c));
//FIXME CCN_PTEA = paddr >> 29;
if (ptel != 0)
{
entry.Data.reg_data = ptel - 1;
entry.Address.ASID = CCN_PTEH.ASID;
entry.Assistance.reg_data = 0;
u32 sz = entry.Data.SZ1 * 2 + entry.Data.SZ0;
entry.Address.VPN = (va & mmu_mask[sz]) >> 10;
true;
}
}
}
}
else
{
// SQ
if (((va >> 26) & 0x3F) == 0x38)
{
u32 r1 = (va - 0xe0000000) & 0xfff00000;
//r1 &= 0xfff00000;
//u32 r0 = ReadMem32_nommu(0x8C01258C); // FIXME
//u32 r0 = 0x8c138b14;
//r0 = ReadMem32_nommu(r0); // 0xE0001F5
u32 r0 = 0xe0001f5;
r0 += r1;
entry.Data.reg_data = r0 - 1;
entry.Assistance.reg_data = r0 >> 29;
entry.Address.ASID = CCN_PTEH.ASID;
u32 sz = entry.Data.SZ1 * 2 + entry.Data.SZ0;
entry.Address.VPN = (va & mmu_mask[sz]) >> 10;
return true;
}
}
return false;
}

View File

@ -122,6 +122,15 @@ static INLINE void RaiseFPUDisableException()
#endif
}
static INLINE void AdjustDelaySlotException(SH4ThrownException& ex)
{
ex.epc -= 2;
if (ex.expEvn == 0x800) // FPU disable exception
ex.expEvn = 0x820; // Slot FPU disable exception
else if (ex.expEvn == 0x180) // Illegal instruction exception
ex.expEvn = 0x1A0; // Slot illegal instruction exception
}
// The SH4 sets the signaling bit to 0 for qNaN (unlike all recent CPUs). Some games relies on this.
static INLINE f32 fixNaN(f32 f)
{

View File

@ -11,6 +11,7 @@
#include "hw/sh4/sh4_opcode_list.h"
#include "hw/sh4/dyna/ngen.h"
#include "hw/sh4/modules/ccn.h"
#include "hw/sh4/modules/mmu.h"
#include "hw/sh4/sh4_interrupts.h"
#include "hw/sh4/sh4_core.h"
@ -206,6 +207,99 @@ static void ngen_blockcheckfail(u32 pc) {
rdv_BlockCheckFail(pc);
}
static u32 exception_raised;
template<typename T>
static T ReadMemNoEx(u32 addr, u32 pc)
{
try {
exception_raised = 0;
if (sizeof(T) == 1)
return ReadMem8(addr);
else if (sizeof(T) == 2)
return ReadMem16(addr);
else if (sizeof(T) == 4)
return ReadMem32(addr);
else if (sizeof(T) == 8)
return ReadMem64(addr);
} catch (SH4ThrownException& ex) {
if (pc & 1)
{
// Delay slot
AdjustDelaySlotException(ex);
pc--;
}
Do_Exception(pc, ex.expEvn, ex.callVect);
exception_raised = 1;
return 0;
}
}
template<typename T>
static void WriteMemNoEx(u32 addr, T data, u32 pc)
{
try {
if (sizeof(T) == 1)
WriteMem8(addr, data);
else if (sizeof(T) == 2)
WriteMem16(addr, data);
else if (sizeof(T) == 4)
WriteMem32(addr, data);
else if (sizeof(T) == 8)
WriteMem64(addr, data);
exception_raised = 0;
} catch (SH4ThrownException& ex) {
if (pc & 1)
{
// Delay slot
AdjustDelaySlotException(ex);
pc--;
}
Do_Exception(pc, ex.expEvn, ex.callVect);
exception_raised = 1;
}
}
static void interpreter_fallback(u16 op, u32 pc)
{
try {
OpDesc[op]->oph(op);
exception_raised = 0;
} catch (SH4ThrownException& ex) {
printf("HOLY SHIT! interpreter_fallback exception pc %08x evn %x vect %x\n", pc, ex.expEvn, ex.callVect);
if (pc & 1)
{
// Delay slot
AdjustDelaySlotException(ex);
pc--;
}
Do_Exception(pc, ex.expEvn, ex.callVect);
exception_raised = 1;
}
}
static void do_sqw_mmu_no_ex(u32 addr, u32 pc)
{
try {
do_sqw_mmu(addr);
exception_raised = 0;
} catch (SH4ThrownException& ex) {
if (pc & 1)
{
// Delay slot
AdjustDelaySlotException(ex);
pc--;
}
Do_Exception(pc, ex.expEvn, ex.callVect);
exception_raised = 1;
}
}
static void do_sqw_nommu_local(u32 addr, u8* sqb)
{
do_sqw_nommu(addr, sqb);
}
class BlockCompiler : public Xbyak::CodeGenerator
{
public:
@ -258,25 +352,55 @@ public:
#else
sub(rsp, 0x8); // align stack
#endif
Xbyak::Label exit_block;
for (size_t i = 0; i < block->oplist.size(); i++)
if (mmu_enabled() && block->has_fpu_op)
{
shil_opcode& op = block->oplist[i];
Xbyak::Label fpu_enabled;
mov(rax, (uintptr_t)&sr);
mov(eax, dword[rax]);
and_(eax, 0x8000); // test SR.FD bit
jz(fpu_enabled);
mov(call_regs[0], block->vaddr); // pc
mov(call_regs[1], 0x800); // event
mov(call_regs[2], 0x100); // vector
GenCall(Do_Exception);
jmp(exit_block, T_NEAR);
L(fpu_enabled);
}
regalloc.OpBegin(&op, i);
for (current_opid = 0; current_opid < block->oplist.size(); current_opid++)
{
shil_opcode& op = block->oplist[current_opid];
regalloc.OpBegin(&op, current_opid);
switch (op.op) {
case shop_ifb:
if (op.rs1._imm)
{
mov(rax, (size_t)&next_pc);
mov(dword[rax], op.rs2._imm);
if (op.rs1._imm)
{
mov(rax, (size_t)&next_pc);
mov(dword[rax], op.rs2._imm);
}
mov(call_regs[0], op.rs3._imm);
if (!mmu_enabled())
{
GenCall(OpDesc[op.rs3._imm]->oph);
}
else
{
mov(call_regs[1], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
GenCall(interpreter_fallback);
test(dword[(void *)&exception_raised], 1);
jnz(exit_block, T_NEAR);
}
}
mov(call_regs[0], op.rs3._imm);
GenCall(OpDesc[op.rs3._imm]->oph);
break;
case shop_jcond:
@ -325,11 +449,28 @@ public:
case shop_readm:
{
u32 size = op.flags & 0x7f;
if (op.rs1.is_imm())
bool immediate_address = op.rs1.is_imm();
if (immediate_address && mmu_enabled() && (op.rs1._imm >> 12) != (block->vaddr >> 12))
{
// When full mmu is on, only consider addresses in the same 4k page
immediate_address = false;
}
if (immediate_address)
{
u32 addr = op.rs1._imm;
if (mmu_enabled())
{
u32 paddr;
if (size == 2)
mmu_data_translation<MMU_TT_DREAD, u16>(addr, paddr);
else if (size == 4)
mmu_data_translation<MMU_TT_DREAD, u32>(addr, paddr);
else
die("Invalid immediate size");
addr = paddr;
}
bool isram = false;
void* ptr = _vmem_read_const(op.rs1._imm, isram, size);
void* ptr = _vmem_read_const(addr, isram, size);
if (isram)
{
@ -338,14 +479,27 @@ public:
switch (size)
{
case 2:
movsx(regalloc.MapRegister(op.rd), word[rax]);
if (regalloc.IsAllocg(op.rd))
movsx(regalloc.MapRegister(op.rd), word[rax]);
else
{
movsx(eax, word[rax]);
mov(rcx, (uintptr_t)op.rd.reg_ptr());
mov(dword[rcx], eax);
}
break;
case 4:
if (regalloc.IsAllocg(op.rd))
mov(regalloc.MapRegister(op.rd), dword[rax]);
else
else if (regalloc.IsAllocf(op.rd))
movd(regalloc.MapXRegister(op.rd), dword[rax]);
else
{
mov(eax, dword[rax]);
mov(rcx, (uintptr_t)op.rd.reg_ptr());
mov(dword[rcx], eax);
}
break;
default:
@ -356,7 +510,7 @@ public:
else
{
// Not RAM: the returned pointer is a memory handler
mov(call_regs[0], op.rs1._imm);
mov(call_regs[0], addr);
switch(size)
{
@ -385,42 +539,71 @@ public:
{
if (op.rs3.is_imm())
add(call_regs[0], op.rs3._imm);
else
else if (regalloc.IsAllocg(op.rs3))
add(call_regs[0], regalloc.MapRegister(op.rs3));
else
{
mov(rax, (uintptr_t)op.rs3.reg_ptr());
add(call_regs[0], dword[rax]);
}
}
if (mmu_enabled())
mov(call_regs[1], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
if (size == 1) {
GenCall(ReadMem8);
if (!mmu_enabled())
GenCall(ReadMem8);
else
GenCall(ReadMemNoEx<u8>);
movsx(ecx, al);
}
else if (size == 2) {
GenCall(ReadMem16);
if (!mmu_enabled())
GenCall(ReadMem16);
else
GenCall(ReadMemNoEx<u16>);
movsx(ecx, ax);
}
else if (size == 4) {
GenCall(ReadMem32);
if (!mmu_enabled())
GenCall(ReadMem32);
else
GenCall(ReadMemNoEx<u32>);
mov(ecx, eax);
}
else if (size == 8) {
GenCall(ReadMem64);
if (!mmu_enabled())
GenCall(ReadMem64);
else
GenCall(ReadMemNoEx<u64>);
mov(rcx, rax);
}
else {
die("1..8 bytes");
}
if (mmu_enabled())
{
test(dword[(void *)&exception_raised], 1);
jnz(exit_block, T_NEAR);
}
if (size != 8)
host_reg_to_shil_param(op.rd, ecx);
else {
#ifdef EXPLODE_SPANS
verify(op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1));
movd(regalloc.MapXRegister(op.rd, 0), ecx);
shr(rcx, 32);
movd(regalloc.MapXRegister(op.rd, 1), ecx);
#else
mov(rax, (uintptr_t)op.rd.reg_ptr());
mov(qword[rax], rcx);
if (op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1))
{
movd(regalloc.MapXRegister(op.rd, 0), ecx);
shr(rcx, 32);
movd(regalloc.MapXRegister(op.rd, 1), ecx);
}
else
#endif
{
mov(rax, (uintptr_t)op.rd.reg_ptr());
mov(qword[rax], rcx);
}
}
}
}
@ -434,36 +617,69 @@ public:
{
if (op.rs3.is_imm())
add(call_regs[0], op.rs3._imm);
else
else if (regalloc.IsAllocg(op.rs3))
add(call_regs[0], regalloc.MapRegister(op.rs3));
else
{
mov(rax, (uintptr_t)op.rs3.reg_ptr());
add(call_regs[0], dword[rax]);
}
}
if (size != 8)
shil_param_to_host_reg(op.rs2, call_regs[1]);
else {
#ifdef EXPLODE_SPANS
verify(op.rs2.count() == 2 && regalloc.IsAllocf(op.rs2, 0) && regalloc.IsAllocf(op.rs2, 1));
movd(call_regs[1], regalloc.MapXRegister(op.rs2, 1));
shl(call_regs64[1], 32);
movd(eax, regalloc.MapXRegister(op.rs2, 0));
or_(call_regs64[1], rax);
#else
mov(rax, (uintptr_t)op.rs2.reg_ptr());
mov(call_regs64[1], qword[rax]);
if (op.rs2.count() == 2 && regalloc.IsAllocf(op.rs2, 0) && regalloc.IsAllocf(op.rs2, 1))
{
movd(call_regs[1], regalloc.MapXRegister(op.rs2, 1));
shl(call_regs64[1], 32);
movd(eax, regalloc.MapXRegister(op.rs2, 0));
or_(call_regs64[1], rax);
}
else
#endif
{
mov(rax, (uintptr_t)op.rs2.reg_ptr());
mov(call_regs64[1], qword[rax]);
}
}
if (mmu_enabled())
mov(call_regs[2], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
if (size == 1)
GenCall(WriteMem8);
else if (size == 2)
GenCall(WriteMem16);
else if (size == 4)
GenCall(WriteMem32);
else if (size == 8)
GenCall(WriteMem64);
if (size == 1) {
if (!mmu_enabled())
GenCall(WriteMem8);
else
GenCall(WriteMemNoEx<u8>);
}
else if (size == 2) {
if (!mmu_enabled())
GenCall(WriteMem16);
else
GenCall(WriteMemNoEx<u16>);
}
else if (size == 4) {
if (!mmu_enabled())
GenCall(WriteMem32);
else
GenCall(WriteMemNoEx<u32>);
}
else if (size == 8) {
if (!mmu_enabled())
GenCall(WriteMem64);
else
GenCall(WriteMemNoEx<u64>);
}
else {
die("1..8 bytes");
}
if (mmu_enabled())
{
test(dword[(void *)&exception_raised], 1);
jnz(exit_block, T_NEAR);
}
}
break;
@ -683,11 +899,52 @@ public:
shr(rax, 32);
mov(regalloc.MapRegister(op.rd2), eax);
break;
/*
case shop_pref:
// TODO
{
Xbyak::Reg32 rn;
if (regalloc.IsAllocg(op.rs1))
{
rn = regalloc.MapRegister(op.rs1);
}
else
{
mov(rax, (uintptr_t)op.rs1.reg_ptr());
mov(eax, dword[rax]);
rn = eax;
}
mov(ecx, rn);
shr(ecx, 26);
cmp(ecx, 0x38);
Xbyak::Label no_sqw;
jne(no_sqw);
mov(call_regs[0], rn);
if (mmu_enabled())
{
mov(call_regs[1], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
GenCall(do_sqw_mmu_no_ex);
test(dword[(void *)&exception_raised], 1);
jnz(exit_block, T_NEAR);
}
else
{
if (CCN_MMUCR.AT == 1)
{
GenCall(do_sqw_mmu);
}
else
{
mov(call_regs64[1], (uintptr_t)sq_both);
GenCall(&do_sqw_nommu_local);
}
}
L(no_sqw);
}
break;
*/
case shop_ext_s8:
mov(eax, regalloc.MapRegister(op.rs1));
movsx(regalloc.MapRegister(op.rd), al);
@ -968,6 +1225,7 @@ public:
die("Invalid block end type");
}
L(exit_block);
#ifdef _WIN32
add(rsp, 0x28);
#else
@ -978,6 +1236,7 @@ public:
ready();
block->code = (DynarecCodeEntryPtr)getCode();
block->host_code_size = getSize();
emit_Skip(getSize());
}
@ -1089,6 +1348,19 @@ private:
void CheckBlock(RuntimeBlockInfo* block) {
mov(call_regs[0], block->addr);
// if (mmu_enabled() && block->asid != 0xFFFFFFFF)
// {
// mov(rax, (uintptr_t)&CCN_PTEH.reg_data);
// cmp(byte[rax], block->asid);
// jne(reinterpret_cast<const void*>(&ngen_blockcheckfail));
// }
if (mmu_enabled())
{
mov(rax, (uintptr_t)&next_pc);
cmp(dword[rax], block->vaddr);
jne(reinterpret_cast<const void*>(&ngen_blockcheckfail));
}
s32 sz=block->sh4_code_size;
u32 sa=block->addr;
@ -1147,22 +1419,66 @@ private:
void GenCall(Ret(*function)(Params...))
{
#ifndef _WIN32
bool xmm8_mapped = regalloc.IsMapped(xmm8, current_opid);
bool xmm9_mapped = regalloc.IsMapped(xmm9, current_opid);
bool xmm10_mapped = regalloc.IsMapped(xmm10, current_opid);
bool xmm11_mapped = regalloc.IsMapped(xmm11, current_opid);
// Need to save xmm registers as they are not preserved in linux/mach
sub(rsp, 16);
movd(ptr[rsp + 0], xmm8);
movd(ptr[rsp + 4], xmm9);
movd(ptr[rsp + 8], xmm10);
movd(ptr[rsp + 12], xmm11);
int offset = 0;
if (xmm8_mapped || xmm9_mapped || xmm10_mapped || xmm11_mapped)
{
sub(rsp, 4 * (xmm8_mapped + xmm9_mapped + xmm10_mapped + xmm11_mapped));
if (xmm8_mapped)
{
movd(ptr[rsp + offset], xmm8);
offset += 4;
}
if (xmm9_mapped)
{
movd(ptr[rsp + offset], xmm9);
offset += 4;
}
if (xmm10_mapped)
{
movd(ptr[rsp + offset], xmm10);
offset += 4;
}
if (xmm11_mapped)
{
movd(ptr[rsp + offset], xmm11);
offset += 4;
}
}
#endif
call(function);
#ifndef _WIN32
movd(xmm8, ptr[rsp + 0]);
movd(xmm9, ptr[rsp + 4]);
movd(xmm10, ptr[rsp + 8]);
movd(xmm11, ptr[rsp + 12]);
add(rsp, 16);
if (xmm8_mapped || xmm9_mapped || xmm10_mapped || xmm11_mapped)
{
if (xmm11_mapped)
{
offset -= 4;
movd(xmm11, ptr[rsp + offset]);
}
if (xmm10_mapped)
{
offset -= 4;
movd(xmm10, ptr[rsp + offset]);
}
if (xmm9_mapped)
{
offset -= 4;
movd(xmm9, ptr[rsp + offset]);
}
if (xmm8_mapped)
{
offset -= 4;
movd(xmm8, ptr[rsp + offset]);
}
add(rsp, 4 * (xmm8_mapped + xmm9_mapped + xmm10_mapped + xmm11_mapped));
}
#endif
}
@ -1183,17 +1499,36 @@ private:
{
if (param.is_r32f())
{
if (!reg.isXMM())
movd((const Xbyak::Reg32 &)reg, regalloc.MapXRegister(param));
if (regalloc.IsAllocf(param))
{
if (!reg.isXMM())
movd((const Xbyak::Reg32 &)reg, regalloc.MapXRegister(param));
else
movss((const Xbyak::Xmm &)reg, regalloc.MapXRegister(param));
}
else
movss((const Xbyak::Xmm &)reg, regalloc.MapXRegister(param));
{
mov(rax, (size_t)param.reg_ptr());
mov((const Xbyak::Reg32 &)reg, dword[rax]);
}
}
else
{
if (!reg.isXMM())
mov((const Xbyak::Reg32 &)reg, regalloc.MapRegister(param));
if (regalloc.IsAllocg(param))
{
if (!reg.isXMM())
mov((const Xbyak::Reg32 &)reg, regalloc.MapRegister(param));
else
movd((const Xbyak::Xmm &)reg, regalloc.MapRegister(param));
}
else
movd((const Xbyak::Xmm &)reg, regalloc.MapRegister(param));
{
mov(rax, (size_t)param.reg_ptr());
if (!reg.isXMM())
mov((const Xbyak::Reg32 &)reg, dword[rax]);
else
movss((const Xbyak::Xmm &)reg, dword[rax]);
}
}
}
else
@ -1212,13 +1547,21 @@ private:
else
movd(regalloc.MapRegister(param), (const Xbyak::Xmm &)reg);
}
else
else if (regalloc.IsAllocf(param))
{
if (!reg.isXMM())
movd(regalloc.MapXRegister(param), (const Xbyak::Reg32 &)reg);
else
movss(regalloc.MapXRegister(param), (const Xbyak::Xmm &)reg);
}
else
{
mov(rax, (size_t)param.reg_ptr());
if (!reg.isXMM())
mov(dword[rax], (const Xbyak::Reg32 &)reg);
else
movss(dword[rax], (const Xbyak::Xmm &)reg);
}
}
vector<Xbyak::Reg32> call_regs;
@ -1234,6 +1577,7 @@ private:
X64RegAlloc regalloc;
Xbyak::util::Cpu cpu;
size_t current_opid;
static const u32 float_sign_mask;
static const u32 float_abs_mask;
static const f32 cvtf2i_pos_saturation;

View File

@ -71,6 +71,16 @@ struct X64RegAlloc : RegAlloc<Xbyak::Operand::Code, s8,
return Xbyak::Xmm(ereg);
}
bool IsMapped(const Xbyak::Xmm &xmm, size_t opid)
{
for (size_t sid = 0; sid < all_spans.size(); sid++)
{
if (all_spans[sid]->nregf == xmm.getIdx() && all_spans[sid]->contains(opid))
return true;
}
return false;
}
BlockCompiler *compiler;
};