diff --git a/core/rec-ARM/arm_dyna.cpp b/core/rec-ARM/rec_arm.cpp old mode 100755 new mode 100644 similarity index 100% rename from core/rec-ARM/arm_dyna.cpp rename to core/rec-ARM/rec_arm.cpp diff --git a/core/rec-x86/lin86_asm.S b/core/rec-x86/rec_lin86_asm.S similarity index 100% rename from core/rec-x86/lin86_asm.S rename to core/rec-x86/rec_lin86_asm.S diff --git a/core/windows/win86_driver.cpp b/core/rec-x86/rec_x86_driver.cpp similarity index 96% rename from core/windows/win86_driver.cpp rename to core/rec-x86/rec_x86_driver.cpp index b59db8961..be6428592 100644 --- a/core/windows/win86_driver.cpp +++ b/core/rec-x86/rec_x86_driver.cpp @@ -1,7 +1,7 @@ #include "types.h" #ifndef HOST_NO_REC -#include "win86_ngen.h" +#include "rec_x86_ngen.h" @@ -37,6 +37,7 @@ void DetectCpuFeatures() if (detected) return; detected=true; +#if HOST_OS==OS_WINDOWS __try { __asm addps xmm0,xmm0 @@ -83,6 +84,7 @@ void DetectCpuFeatures() { mmx=false; } + #endif } @@ -638,6 +640,10 @@ void gen_hande(u32 w, u32 sz, u32 mode) { //General + #if HOST_OS != OS_WINDOWS + //maintain 16 byte alignment + x86e->Emit(op_sub32, ESP, 12); + #endif if ((sz==SZ_32F || sz==SZ_64F) && w==1) { if (sz==SZ_32F) @@ -646,7 +652,10 @@ void gen_hande(u32 w, u32 sz, u32 mode) } else { - x86e->Emit(op_sub32,ESP,8); + #if HOST_OS == OS_WINDOWS + //on linux, we have scratch space on esp + x86e->Emit(op_sub32,ESP,8); + #endif x86e->Emit(op_movss,x86_mrm(ESP,x86_ptr::create(+4)),XMM1); x86e->Emit(op_movss,x86_mrm(ESP,x86_ptr::create(-0)),XMM0); } @@ -662,6 +671,15 @@ void gen_hande(u32 w, u32 sz, u32 mode) x86e->Emit(op_movd_xmm_from_r32,XMM1,EDX); } } + #if HOST_OS != OS_WINDOWS + //maintain 16 byte alignment + if ((sz == SZ_64F) && w == 1) { + x86e->Emit(op_add32, ESP, 4); + } + else { + x86e->Emit(op_add32, ESP, 12); + } + #endif } x86e->Emit(op_ret); @@ -750,7 +768,7 @@ bool ngen_Rewrite(unat& addr,unat retadr,unat acc) printf("WARNING: SQ AREA READ, %08X from sh4:%08X. THIS IS UNDEFINED ON A REAL DREACMAST.\n", acc, bm_GetBlock(x86e->x86_buff)->addr); } - if ((acc >> 26) == 0x38 && w) //sq ? + if ((acc >> 26) == 0x38) //sq ? { verify(w == 1); x86e->Emit(op_call, x86_ptr_imm(mem_code[1][w][i])); diff --git a/core/windows/win86_il.cpp b/core/rec-x86/rec_x86_il.cpp similarity index 99% rename from core/windows/win86_il.cpp rename to core/rec-x86/rec_x86_il.cpp index 13917124b..b170b6e14 100644 --- a/core/windows/win86_il.cpp +++ b/core/rec-x86/rec_x86_il.cpp @@ -1,7 +1,7 @@ #include "types.h" #ifndef HOST_NO_REC -#include "win86_ngen.h" +#include "rec_x86_ngen.h" #include "hw/sh4/sh4_mmr.h" #include "hw/sh4/sh4_rom.h" diff --git a/core/rec-x86/win86_ngen.cpp b/core/rec-x86/rec_x86_ngen.cpp similarity index 95% rename from core/rec-x86/win86_ngen.cpp rename to core/rec-x86/rec_x86_ngen.cpp index 64986ec51..68d4dedd2 100644 --- a/core/rec-x86/win86_ngen.cpp +++ b/core/rec-x86/rec_x86_ngen.cpp @@ -1,4 +1,8 @@ -#include "win86_ngen.h" +#include "types.h" + +#ifndef HOST_NO_REC + +#include "rec_x86_ngen.h" #if HOST_OS == OS_WINDOWS @@ -125,4 +129,5 @@ naked void DYNACALL ngen_blockcheckfail2(u32 addr) #else u32 gas_offs=offsetof(Sh4RCB,cntx.jdyn); void (*ngen_FailedToFindBlock)()=&ngen_FailedToFindBlock_; +#endif #endif \ No newline at end of file diff --git a/core/rec-x86/win86_ngen.h b/core/rec-x86/rec_x86_ngen.h similarity index 95% rename from core/rec-x86/win86_ngen.h rename to core/rec-x86/rec_x86_ngen.h index 680e9cc5c..7ae2ac1a1 100644 --- a/core/rec-x86/win86_ngen.h +++ b/core/rec-x86/rec_x86_ngen.h @@ -23,7 +23,7 @@ void ngen_LinkBlock_Generic_stub(); void ngen_LinkBlock_cond_Next_stub(); void ngen_LinkBlock_cond_Branch_stub(); void ngen_FailedToFindBlock_(); -void ngen_mainloop(void* p); +void ngen_mainloop(void* cntx); void DYNACALL ngen_blockcheckfail(u32 addr); @@ -56,4 +56,4 @@ struct x86_reg_alloc: RegAlloc void ThawXMM(); }; -extern x86_reg_alloc reg; +extern x86_reg_alloc reg; \ No newline at end of file diff --git a/core/rec-x86/win86_driver.cpp b/core/rec-x86/win86_driver.cpp deleted file mode 100644 index 0e419aba9..000000000 --- a/core/rec-x86/win86_driver.cpp +++ /dev/null @@ -1,790 +0,0 @@ -#include "types.h" - -#ifndef HOST_NO_REC -#include "win86_ngen.h" - - - -struct DynaRBI: RuntimeBlockInfo -{ - x86_block_externs* reloc_info; - - virtual ~DynaRBI() { if (reloc_info) reloc_info->Free(); } - - virtual u32 Relink(); - virtual void Relocate(void* dst) - { - reloc_info->Apply(dst); - } -}; - -x86_block* x86e; - -u32 cycle_counter; - -void* loop_no_update; -void* intc_sched; - -bool sse_1=true; -bool sse_2=true; -bool sse_3=true; -bool ssse_3=true; -bool mmx=true; - -void DetectCpuFeatures() -{ - static bool detected=false; - if (detected) return; - detected=true; - -#if HOST_OS==OS_WINDOWS - __try - { - __asm addps xmm0,xmm0 - } - __except(1) - { - sse_1=false; - } - - __try - { - __asm addpd xmm0,xmm0 - } - __except(1) - { - sse_2=false; - } - - __try - { - __asm addsubpd xmm0,xmm0 - } - __except(1) - { - sse_3=false; - } - - __try - { - __asm phaddw xmm0,xmm0 - } - __except(1) - { - ssse_3=false; - } - - - __try - { - __asm paddd mm0,mm1 - __asm emms; - } - __except(1) - { - mmx=false; - } - #endif -} - - -#define CSC_SIZE 64 -struct csc_et -{ - u32 pc; - void* code; -}; -csc_et csc[CSC_SIZE<32?32:CSC_SIZE]; - - -#define CSC_SHIFT 1 -u32 csc_hash(u32 addr) -{ - return (addr>>CSC_SHIFT)&(CSC_SIZE-1); -} - -u32 csc_mode=0; - -u32 csc_sidx=1; - -x86_reg alloc_regs[]={EBX,EBP,ESI,EDI,NO_REG}; -x86_reg xmm_alloc_regs[]={XMM7,XMM6,XMM5,XMM4,NO_REG}; -f32 DECL_ALIGN(16) thaw_regs[4]; - - -void x86_reg_alloc::Preload(u32 reg,x86_reg nreg) -{ - x86e->Emit(op_mov32,nreg,GetRegPtr(reg)); -} -void x86_reg_alloc::Writeback(u32 reg,x86_reg nreg) -{ - x86e->Emit(op_mov32,GetRegPtr(reg),nreg); -} - -void x86_reg_alloc::Preload_FPU(u32 reg,x86_reg nreg) -{ - x86e->Emit(op_movss,nreg,GetRegPtr(reg)); -} -void x86_reg_alloc::Writeback_FPU(u32 reg,x86_reg nreg) -{ - x86e->Emit(op_movss,GetRegPtr(reg),nreg); -} -#ifdef PROF2 -extern u32 flsh; -#endif - -void x86_reg_alloc::FreezeXMM() -{ - x86_reg* fpreg=xmm_alloc_regs; - f32* slpc=thaw_regs; - while(*fpreg!=-1) - { - if (SpanNRegfIntr(current_opid,*fpreg)) - x86e->Emit(op_movss,slpc++,*fpreg); - fpreg++; - } -#ifdef PROF2 - x86e->Emit(op_add32,&flsh,1); -#endif -} - -void x86_reg_alloc::ThawXMM() -{ - x86_reg* fpreg=xmm_alloc_regs; - f32* slpc=thaw_regs; - while(*fpreg!=-1) - { - if (SpanNRegfIntr(current_opid,*fpreg)) - x86e->Emit(op_movss,*fpreg,slpc++); - fpreg++; - } -} - - -x86_reg_alloc reg; - -u32 ret_hit,ret_all,ret_stc; - -void csc_push(RuntimeBlockInfo* block) -{ - if (csc_mode==0) - { - x86e->Emit(op_mov32,&csc[csc_hash(block->NextBlock)].pc,block->NextBlock); - } - else if (csc_mode==1) - { - //x86e->Emit(op_int3); - x86e->Emit(op_ror32,&csc_sidx,1); - x86e->Emit(op_bsr32,EAX,&csc_sidx); - x86e->Emit(op_mov32,x86_mrm(EAX,sib_scale_8,x86_ptr(csc)),block->NextBlock); - } -} - -void DYNACALL csc_fail(u32 addr,u32 addy) -{ - if (csc_mode==0) - { - //too bad ? - } - else if (csc_mode==1) - { - u32 fail_idx=(csc_sidx>>1)|(csc_sidx<<31); - - printf("Ret Mismatch: %08X instead of %08X!\n",addr,addy); - } -} -void csc_pop(RuntimeBlockInfo* block) -{ - x86_Label* end=x86e->CreateLabel(false,8); - x86_Label* try_dyn=x86e->CreateLabel(false,8); - - //static guess - x86_Label* stc_hit=x86e->CreateLabel(false,8); - x86e->Emit(op_cmp32,ECX,&block->csc_RetCache); - x86e->Emit(op_je,stc_hit); - //if !eq - { - //if (cached) goto dyn - x86e->Emit(op_cmp32,&block->csc_RetCache,-1); - x86e->Emit(op_jne,try_dyn); - //else, do cache - x86e->Emit(op_mov32,&block->csc_RetCache,ECX); - } - - x86e->MarkLabel(stc_hit); - x86e->Emit(op_add32,&ret_stc,1); - if (csc_mode==1) - x86e->Emit(op_rol32,&csc_sidx,1); - x86e->Emit(op_jmp,end); - - x86e->MarkLabel(try_dyn); - - if (csc_mode==0) - { - //csc ! - //x86e->Emit(op_int3); - x86e->Emit(op_mov32,ECX,GetRegPtr(reg_pc_dyn)); - x86e->Emit(op_mov32,EAX,ECX); - x86e->Emit(op_shr32,EAX,CSC_SHIFT); - x86e->Emit(op_and32,EAX,CSC_SIZE-1); - x86e->Emit(op_cmp32,x86_mrm(EAX,sib_scale_8,x86_ptr(csc)),ECX); - } - else if (csc_mode==1) - { - //x86e->Emit(op_int3); - x86e->Emit(op_mov32,ECX,GetRegPtr(reg_pc_dyn)); - x86e->Emit(op_bsr32,EAX,&csc_sidx); - x86e->Emit(op_rol32,&csc_sidx,1); - x86e->Emit(op_mov32,EDX,x86_mrm(EAX,sib_scale_8,x86_ptr(csc))); - x86e->Emit(op_cmp32,EDX,ECX); - } - - - x86e->Emit(op_jne,end); - x86e->Emit(op_add32,&ret_hit,1); - //x86e->Emit(op_jmp,end); - - x86e->MarkLabel(end); - x86e->Emit(op_add32,&ret_all,1); - -} - -void DYNACALL PrintBlock(u32 pc) -{ - printf("block: 0x%08X\n",pc); - for (int i=0;i<16;i++) - printf("%08X ",r[i]); - printf("\n"); -} - -u32* GetRegPtr(u32 reg) -{ - return Sh4_int_GetRegisterPtr((Sh4RegType)reg); -} - -u32 cvld; -u32 rdmt[6]; -extern u32 memops_t,memops_l; - -void CheckBlock(RuntimeBlockInfo* block,x86_ptr_imm place) -{ - s32 sz=block->sh4_code_size; - u32 sa=block->addr; - while(sz>0) - { - void* ptr=(void*)GetMemPtr(sa,4); - if (ptr) - { - if (sz==2) - x86e->Emit(op_cmp16,ptr,*(u16*)ptr); - else - x86e->Emit(op_cmp32,ptr,*(u32*)ptr); - x86e->Emit(op_jne,place); - } - sz-=4; - sa+=4; - } - -} -void ngen_Compile(RuntimeBlockInfo* block,bool force_checks, bool reset, bool staging,bool optimise) -{ - //initialise stuff - DetectCpuFeatures(); - - ((DynaRBI*)block)->reloc_info=0; - - - //Setup emitter - x86e = new x86_block(); - x86e->Init(0,0); - x86e->x86_buff=(u8*)emit_GetCCPtr(); - x86e->x86_size=emit_FreeSpace(); - x86e->do_realloc=false; - - block->code=(DynarecCodeEntryPtr)emit_GetCCPtr(); - - x86e->Emit(op_add32,&memops_t,block->memops); - x86e->Emit(op_add32,&memops_l,block->linkedmemops); - - //run register allocator - reg.DoAlloc(block,alloc_regs,xmm_alloc_regs); - - //block header// - - //block invl. checks - x86e->Emit(op_mov32,ECX,block->addr); - - CheckBlock(block,force_checks?x86_ptr_imm(ngen_blockcheckfail):x86_ptr_imm(ngen_blockcheckfail2)); - - //Scheduler - x86_Label* no_up=x86e->CreateLabel(false,8); - - x86e->Emit(op_sub32,&cycle_counter,block->guest_cycles); - - x86e->Emit(op_jns,no_up); - { - x86e->Emit(op_call,x86_ptr_imm(intc_sched)); - } - - x86e->MarkLabel(no_up); - - //stating counter - if (staging) x86e->Emit(op_sub32,&block->staging_runs,1); - - //profiler - if (prof.enable || 1) - x86e->Emit(op_add32,&block->runs,1); - - if (prof.enable) - { - if (force_checks) - x86e->Emit(op_add32,&prof.counters.blkrun.force_check,1); - - x86e->Emit(op_add32,&prof.counters.blkrun.cycles[block->guest_cycles],1); - } - - for (size_t i=0;ioplist.size();i++) - { - shil_opcode* op=&block->oplist[i]; - - u32 opcd_start=x86e->opcode_count; - if (prof.enable) - { - x86e->Emit(op_add32,&prof.counters.shil.executed[op->op],1); - } - - op->host_offs=x86e->x86_indx; - - if (prof.enable) - { - set reg_wt; - set reg_rd; - - for (int z=0;op->rd.is_reg() && zrd.count();z++) - reg_wt.insert(op->rd._reg+z); - - for (int z=0;op->rd2.is_reg() && zrd2.count();z++) - reg_wt.insert(op->rd2._reg+z); - - for (int z=0;op->rs1.is_reg() && zrs1.count();z++) - reg_rd.insert(op->rs1._reg+z); - - for (int z=0;op->rs2.is_reg() && zrs2.count();z++) - reg_rd.insert(op->rs2._reg+z); - - for (int z=0;op->rs3.is_reg() && zrs3.count();z++) - reg_rd.insert(op->rs3._reg+z); - - set::iterator iter=reg_wt.begin(); - while( iter != reg_wt.end() ) - { - if (reg_rd.count(*iter)) - { - reg_rd.erase(*iter); - x86e->Emit(op_add32, &prof.counters.ralloc.reg_rw[*iter], 1); - } - else - { - x86e->Emit(op_add32, &prof.counters.ralloc.reg_w[*iter], 1); - } - - ++iter; - } - - iter=reg_rd.begin(); - while( iter != reg_rd.end() ) - { - x86e->Emit(op_add32,&prof.counters.ralloc.reg_r[*iter],1); - ++iter; - } - } - - reg.OpBegin(op,i); - - ngen_opcode(block,op,x86e,staging,optimise); - - if (prof.enable) x86e->Emit(op_add32,&prof.counters.shil.host_ops[op->op],x86e->opcode_count-opcd_start); - - reg.OpEnd(op); - } - - block->relink_offset=x86e->x86_indx; - block->relink_data=0; - - x86e->x86_indx+=block->Relink(); - - x86e->Generate(); - block->host_code_size=x86e->x86_indx; - block->host_opcodes=x86e->opcode_count; - - emit_Skip(block->host_code_size); - - delete x86e; - x86e=0; -} - -u32 DynaRBI::Relink() -{ - x86_block* x86e=new x86_block(); - x86e->Init(0,0); - x86e->x86_buff=(u8*)code + relink_offset; - x86e->x86_size=512; - x86e->do_realloc=false; - - if (BlockType==BET_StaticCall || BlockType==BET_DynamicCall) - { - //csc_push(this); - } - - switch(BlockType) - { - case BET_Cond_0: - case BET_Cond_1: - { - x86e->Emit(op_cmp32,GetRegPtr(has_jcond?reg_pc_dyn:reg_sr_T),BlockType&1); - - x86_Label* noBranch=x86e->CreateLabel(0,8); - - x86e->Emit(op_jne,noBranch); - { - //branch block - if (pBranchBlock) - x86e->Emit(op_jmp,x86_ptr_imm(pBranchBlock->code)); - else - x86e->Emit(op_call,x86_ptr_imm(ngen_LinkBlock_cond_Branch_stub)); - } - x86e->MarkLabel(noBranch); - { - //no branch block - if (pNextBlock) - x86e->Emit(op_jmp,x86_ptr_imm(pNextBlock->code)); - else - x86e->Emit(op_call,x86_ptr_imm(ngen_LinkBlock_cond_Next_stub)); - } - } - break; - - - case BET_DynamicRet: - { - //csc_pop(this); - } - case BET_DynamicCall: - case BET_DynamicJump: - { - if (relink_data==0) - { - if (pBranchBlock) - { - x86e->Emit(op_cmp32,GetRegPtr(reg_pc_dyn),pBranchBlock->addr); - x86e->Emit(op_je,x86_ptr_imm(pBranchBlock->code)); - x86e->Emit(op_call,x86_ptr_imm(ngen_LinkBlock_Generic_stub)); - } - else - { - x86e->Emit(op_cmp32,GetRegPtr(reg_pc_dyn),0xFABCDECF); - x86e->Emit(op_call,x86_ptr_imm(ngen_LinkBlock_Generic_stub)); - x86e->Emit(op_je,x86_ptr_imm(ngen_LinkBlock_Generic_stub)); - } - } - else - { - verify(pBranchBlock==0); - x86e->Emit(op_mov32,ECX,GetRegPtr(reg_pc_dyn)); - x86e->Emit(op_jmp,x86_ptr_imm(loop_no_update)); - } - } - break; - - case BET_StaticCall: - case BET_StaticJump: - { - if (pBranchBlock) - x86e->Emit(op_jmp,x86_ptr_imm(pBranchBlock->code)); - else - x86e->Emit(op_call,x86_ptr_imm(ngen_LinkBlock_Generic_stub)); - break; - } - - case BET_StaticIntr: - case BET_DynamicIntr: - if (BlockType==BET_StaticIntr) - { - x86e->Emit(op_mov32,&next_pc,NextBlock); - } - else - { - x86e->Emit(op_mov32,EAX,GetRegPtr(reg_pc_dyn)); - x86e->Emit(op_mov32,&next_pc,EAX); - } - x86e->Emit(op_call,x86_ptr_imm(UpdateINTC)); - - x86e->Emit(op_mov32,ECX,&next_pc); - - x86e->Emit(op_jmp,x86_ptr_imm(loop_no_update)); - - break; - } - - - - x86e->Generate(); - return x86e->x86_indx; -} - - -/* - //10 - R S8 B,M - R S16 B,M - R I32 B,M - R F32 B,M - R F32v2 B{,M} - - //13 - W I8 B,M - W I16 B,M - W I32 B,S,M - W F32 B,S,M - W F32v2 B,S{,M} -*/ - -extern u8* virt_ram_base; -#include "hw/sh4/sh4_mmr.h" - -enum mem_op_type -{ - SZ_8, - SZ_16, - SZ_32I, - SZ_32F, - SZ_64F, -}; - -void gen_hande(u32 w, u32 sz, u32 mode) -{ - static const x86_ptr_imm rwm[2][5]= - { - {x86_ptr_imm(&_vmem_ReadMem8SX32),x86_ptr_imm(&_vmem_ReadMem16SX32),x86_ptr_imm(&ReadMem32),x86_ptr_imm(&ReadMem32),x86_ptr_imm(&ReadMem64),}, - {x86_ptr_imm(&WriteMem8),x86_ptr_imm(&WriteMem16),x86_ptr_imm(&WriteMem32),x86_ptr_imm(&WriteMem32),x86_ptr_imm(&WriteMem64),} - }; - - static const x86_opcode_class opcl_i[2][3]= - { - {op_movsx8to32,op_movsx16to32,op_mov32}, - {op_mov8,op_mov16,op_mov32} - }; - - u32 si=x86e->x86_indx; - - if (mode==0) - { - //Buffer - x86e->Emit(op_mov32,EAX,ECX); - x86e->Emit(op_and32,ECX,0x1FFFFFFF); - - x86_mrm_t buff=x86_mrm(ECX,virt_ram_base); - x86_mrm_t buff4=x86_mrm(ECX,virt_ram_base+4); - - if (sz==SZ_8 || sz==SZ_16 || sz==SZ_32I) - { - if (w==0) - x86e->Emit(opcl_i[w][sz],sz==SZ_8?AL:sz==SZ_16?AX:EAX,buff); - else - x86e->Emit(opcl_i[w][sz],buff,sz==SZ_8?DL:sz==SZ_16?DX:EDX); - } - else - { - if (w==0) - { - x86e->Emit(op_movss,XMM0,buff); - if (sz==SZ_64F) - x86e->Emit(op_movss,XMM1,buff4); - } - else - { - x86e->Emit(op_movss,buff,XMM0); - if (sz==SZ_64F) - x86e->Emit(op_movss,buff4,XMM1); - } - } - } - else if (mode==1) - { - //SQ - verify(w==1); - x86e->Emit(op_mov32,EAX,ECX); - x86e->Emit(op_and32,ECX,0x3f); - - x86e->Emit(op_shr32,EAX,26); - x86e->Emit(op_cmp32,EAX,0x38); - x86_Label* l=x86e->CreateLabel(false,8); - x86e->Emit(op_je,l); - x86e->Emit(op_int3); - x86e->MarkLabel(l); - - if (sz==SZ_32I) - x86e->Emit(op_mov32,x86_mrm(ECX,sq_both),EDX); - else if (sz==SZ_32F || sz==SZ_64F) - { - x86e->Emit(op_movss,x86_mrm(ECX,sq_both),XMM0); - if (sz==SZ_64F) - x86e->Emit(op_movss,x86_mrm(ECX,sq_both+4),XMM1); - } - else - { - die("Can't happen\n"); - } - } - else - { - //General - - //maintain 16 byte alignment - x86e->Emit(op_sub32, ESP, 12); - - if ((sz==SZ_32F || sz==SZ_64F) && w==1) - { - if (sz==SZ_32F) - { - x86e->Emit(op_movd_xmm_to_r32,EDX,XMM0); - } - else - { - x86e->Emit(op_movss,x86_mrm(ESP,x86_ptr::create(+4)),XMM1); - x86e->Emit(op_movss,x86_mrm(ESP,x86_ptr::create(-0)),XMM0); - } - } - - x86e->Emit(op_call,rwm[w][sz]); - - if ((sz==SZ_32F || sz==SZ_64F) && w==0) - { - x86e->Emit(op_movd_xmm_from_r32,XMM0,EAX); - if (sz==SZ_64F) - { - x86e->Emit(op_movd_xmm_from_r32,XMM1,EDX); - } - } - - if ((sz == SZ_64F) && w == 1) { - x86e->Emit(op_add32, ESP, 4); - } - else { - x86e->Emit(op_add32, ESP, 12); - } - } - - x86e->Emit(op_ret); - - emit_Skip(x86e->x86_indx-si); -} - -unat mem_code_base=0; -unat mem_code_end=0; -void* mem_code[3][2][5]; - -void ngen_init() -{ - //Setup emitter - x86e = new x86_block(); - x86e->Init(0,0); - x86e->x86_buff=(u8*)emit_GetCCPtr(); - x86e->x86_size=emit_FreeSpace(); - x86e->do_realloc=false; - - - mem_code_base=(unat)emit_GetCCPtr(); - - for (int sz=0;sz<5;sz++) - { - for (int w=0;w<2;w++) - { - for (int m=0;m<3;m++) - { - if (m==1 && (sz<=SZ_16 || w==0)) - continue; - - mem_code[m][w][sz]=emit_GetCCPtr(); - gen_hande(w,sz,m); - } - } - } - - mem_code_end=(unat)emit_GetCCPtr(); - - x86e->Generate(); - - delete x86e; - - emit_SetBaseAddr(); -} - -void ngen_ResetBlocks() -{ -} - -void ngen_GetFeatures(ngen_features* dst) -{ - dst->InterpreterFallback=false; - dst->OnlyDynamicEnds=false; -} - - -RuntimeBlockInfo* ngen_AllocateBlock() -{ - return new DynaRBI(); -} - - -bool ngen_Rewrite(unat& addr,unat retadr,unat acc) -{ - if (addr>=mem_code_base && addrInit(0,0); - x86e->x86_buff=(u8*)retadr-5; - x86e->x86_size=emit_FreeSpace(); - x86e->do_realloc=false; - - for (int i=0;i<5;i++) - { - for (int w=0;w<2;w++) - { - if ((u32)mem_code[0][w][i]==ca) - { - //found ! - - if ((acc >> 26) == 0x38) //sq ? - { - verify(w == 1); - x86e->Emit(op_call, x86_ptr_imm(mem_code[1][w][i])); - } - else - { - x86e->Emit(op_call, x86_ptr_imm(mem_code[2][w][i])); - } - - x86e->Generate(); - delete x86e; - - addr=retadr-5; - - //printf("Patched: %08X for access @ %08X\n",addr,acc); - return true; - } - } - } - - die("Failed to match the code :(\n"); - - return false; - } - else - { - return false; - } -} -#endif diff --git a/core/rec-x86/win86_il.cpp b/core/rec-x86/win86_il.cpp deleted file mode 100644 index 99fe9f912..000000000 --- a/core/rec-x86/win86_il.cpp +++ /dev/null @@ -1,1529 +0,0 @@ -#include "win86_ngen.h" -#include "hw/sh4/sh4_mmr.h" -#include "hw/sh4/sh4_rom.h" - -void ngen_Bin(shil_opcode* op,x86_opcode_class natop,bool has_imm=true,bool has_wb=true) -{ - //x86e->Emit(op_mov32,EAX,op->rs1.reg_ptr()); - - verify(reg.IsAllocg(op->rs1._reg)); - verify(reg.IsAllocg(op->rd._reg)); - - if (has_wb && reg.mapg(op->rs1)!=reg.mapg(op->rd)) - { - x86e->Emit(op_mov32,reg.mapg(op->rd),reg.mapg(op->rs1)); - } - - if (has_imm && op->rs2.is_imm()) - { - x86e->Emit(natop,has_wb?reg.mapg(op->rd):reg.mapg(op->rs1),op->rs2._imm); - } - else if (op->rs2.is_r32i()) - { - verify(reg.IsAllocg(op->rs2._reg)); - - x86e->Emit(natop,has_wb?reg.mapg(op->rd):reg.mapg(op->rs1),reg.mapg(op->rs2)); - } - else - { - printf("%d \n",op->rs1.type); - verify(false); - } -} - -void ngen_fp_bin(shil_opcode* op,x86_opcode_class natop) -{ - verify(reg.IsAllocf(op->rs1)); - verify(reg.IsAllocf(op->rs2)); - verify(reg.IsAllocf(op->rd)); - - if (op->rd._reg!=op->rs1._reg) - x86e->Emit(op_movss,reg.mapf(op->rd),reg.mapf(op->rs1)); - - if (op->rs2.is_r32f()) - { - x86e->Emit(natop,reg.mapf(op->rd),reg.mapf(op->rs2)); - } - else - { - printf("%d \n",op->rs2.type); - verify(false); - } -// verify(has_wb); - //x86e->Emit(op_movss,op->rd.reg_ptr(),XMM0); -} -void ngen_Unary(shil_opcode* op,x86_opcode_class natop) -{ - verify(reg.IsAllocg(op->rs1)); - verify(reg.IsAllocg(op->rd)); - - if (reg.mapg(op->rs1)!=reg.mapg(op->rd)) - x86e->Emit(op_mov32,reg.mapg(op->rd),reg.mapg(op->rs1)); - - x86e->Emit(natop,reg.mapg(op->rd)); -} - -void* _vmem_read_const(u32 addr,bool& ismem,u32 sz); - -u32 ngen_CC_BytesPushed; -void ngen_CC_Start(shil_opcode* op) -{ - ngen_CC_BytesPushed=0; -} -void ngen_CC_Param(shil_opcode* op,shil_param* par,CanonicalParamType tp) -{ - switch(tp) - { - //push the contents - case CPT_u32: - case CPT_f32: - if (par->is_reg()) - { - if (reg.IsAllocg(*par)) - x86e->Emit(op_push32,reg.mapg(*par)); - else if (reg.IsAllocf(*par)) - { - x86e->Emit(op_sub32,ESP,4); - x86e->Emit(op_movss,x86_mrm(ESP), reg.mapf(*par)); - } - else - { - die("Must not happen !\n"); - x86e->Emit(op_push32,x86_ptr(par->reg_ptr())); - } - } - else if (par->is_imm()) - x86e->Emit(op_push,par->_imm); - else - die("invalid combination"); - ngen_CC_BytesPushed+=4; - break; - //push the ptr itself - case CPT_ptr: - verify(par->is_reg()); - - die("FAIL"); - x86e->Emit(op_push,(unat)par->reg_ptr()); - - for (u32 ri=0; ri<(*par).count(); ri++) - { - if (reg.IsAllocf(*par,ri)) - { - x86e->Emit(op_sub32,ESP,4); - x86e->Emit(op_movss,x86_mrm(ESP),reg.mapfv(*par,ri)); - } - else - { - verify(!reg.IsAllocAny((Sh4RegType)(par->_reg+ri))); - } - } - - - ngen_CC_BytesPushed+=4; - break; - - //store from EAX - case CPT_u64rvL: - case CPT_u32rv: - if (reg.IsAllocg(*par)) - x86e->Emit(op_mov32,reg.mapg(*par),EAX); - /*else if (reg.IsAllocf(*par)) - x86e->Emit(op_movd_xmm_from_r32,reg.mapf(*par),EAX);*/ - else - die("Must not happen!\n"); - break; - - case CPT_u64rvH: - if (reg.IsAllocg(*par)) - x86e->Emit(op_mov32,reg.mapg(*par),EDX); - else - die("Must not happen!\n"); - break; - - //Store from ST(0) - case CPT_f32rv: - verify(reg.IsAllocf(*par)); - x86e->Emit(op_fstp32f,x86_ptr(par->reg_ptr())); - x86e->Emit(op_movss,reg.mapf(*par),x86_ptr(par->reg_ptr())); - break; - - } -} - -void ngen_CC_Call(shil_opcode*op,void* function) -{ - reg.FreezeXMM(); - x86e->Emit(op_call,x86_ptr_imm(function)); - reg.ThawXMM(); -} -void ngen_CC_Finish(shil_opcode* op) -{ - x86e->Emit(op_add32,ESP,ngen_CC_BytesPushed); -} - -extern u32 vrml_431; -#ifdef PROF2 - -extern u32 srmls,srmlu,srmlc; -extern u32 rmls,rmlu; -extern u32 wmls,wmlu; -extern u32 vrd; -#endif - - -void DYNACALL VERIFYME(u32 addr) -{ - verify((addr>>26)==0x38); -} - -extern u8* virt_ram_base; - -/* - - ReadM - I8 GAI1 [m] - I16 GAI2 [m] - I32 GAI4 [m] - F32 GA4 [m] - F32v2 RA4 [m,m] - F32v4 RA4 [m,m,m,m] - F32v4r3i1 RA4 [m,m,m,1.0] - F32v4r3i0 RA4 [m,m,m,0.0] - - WriteM - I8 GA1 - I16 GA2 - I32 GA4 - F32 GA4 - F32v2 SA - F32v4 - F32v4s3 - F32v4s4 - - - //10 - R S8 B,M - R S16 B,M - R I32 B,M - R F32 B,M - R F32v2 B{,M} - - //13 - W I8 B,M - W I16 B,M - W I32 B,S,M - W F32 B,S,M - W F32v2 B,S{,M} -*/ - -extern void* mem_code[3][2][5]; - -void ngen_opcode(RuntimeBlockInfo* block, shil_opcode* op,x86_block* x86e, bool staging, bool optimise) -{ - switch(op->op) - { - case shop_readm: - { - void* fuct=0; - bool isram=false; - verify(op->rs1.is_imm() || op->rs1.is_r32i()); - - verify(op->rs1.is_imm() || reg.IsAllocg(op->rs1)); - verify(op->rs3.is_null() || op->rs3.is_imm() || reg.IsAllocg(op->rs3)); - - for (u32 i=0;ird.count();i++) - { - verify(reg.IsAllocAny((Sh4RegType)(op->rd._reg+i))); - } - - u32 size=op->flags&0x7f; - - if (op->rs1.is_imm()) - { - if (prof.enable) x86e->Emit(op_add32,&prof.counters.shil.readm_const,1); - void* ptr=_vmem_read_const(op->rs1._imm,isram,size); - if (isram) - { -#ifdef PROF2 - x86e->Emit(op_add32,&srmlu,1); -#endif - if (size==1) - x86e->Emit(op_movsx8to32,EAX,ptr); - else if (size==2) - x86e->Emit(op_movsx16to32,EAX,ptr); - else if (size==4) - { - x86e->Emit(op_mov32,EAX,ptr); - //this is a pretty good sieve, but its not perfect. - //whitelisting is much better, but requires side channel data - //Page locking w/ invalidation is another strategy we can try (leads to 'excessive' - //compiling. Maybe a mix of both ?), its what the mainline nulldc uses - if (optimise) - { - if (staging && !is_s8(*(u32*)ptr) && abs((int)op->rs1._imm-(int)block->addr)<=1024) - { - x86_Label* _same=x86e->CreateLabel(false,8); - x86e->Emit(op_cmp32,EAX,*(u32*)ptr); - x86e->Emit(op_je,_same); - x86e->Emit(op_and32,&op->flags,~0x40000000); - x86e->MarkLabel(_same); - - op->flags|=0x40000000; - } - else if (!staging && op->flags & 0x40000000) - { - x86_Label* _same=x86e->CreateLabel(false,8); - x86e->Emit(op_cmp32,EAX,*(u32*)ptr); - x86e->Emit(op_je,_same); - x86e->Emit(op_int3); - x86e->MarkLabel(_same); -#ifdef PROF2 - x86e->Emit(op_add32,&srmlc,1); -#endif - } - } - } - else if (size==8) - { - x86e->Emit(op_mov32,EAX,ptr); - x86e->Emit(op_mov32,EDX,(u8*)ptr+4); - } - else - { - die("Invalid mem read size"); - } - } - else - { -#ifdef PROF2 - x86e->Emit(op_add32,&srmls,1); -#endif - x86e->Emit(op_mov32,ECX,op->rs1._imm); - fuct=ptr; - } - } - else - { - x86e->Emit(op_mov32,ECX,reg.mapg(op->rs1)); - if (op->rs3.is_imm()) - { - x86e->Emit(op_add32,ECX,op->rs3._imm); - if (prof.enable) x86e->Emit(op_add32,&prof.counters.shil.readm_reg_imm,1); - } - else if (op->rs3.is_r32i()) - { - x86e->Emit(op_add32,ECX,reg.mapg(op->rs3)); - if (prof.enable) x86e->Emit(op_add32,&prof.counters.shil.readm_reg_reg,1); - } - else if (!op->rs3.is_null()) - { - die("invalid rs3"); - } - else - if (prof.enable) x86e->Emit(op_add32,&prof.counters.shil.readm_reg,1); -#if 0 - if (op->flags==0x431 || op->flags==0x440) - { - verify(!reg.IsAllocAny(op->rd)); - verify(!reg.IsAllocAny((Sh4RegType)(op->rd._reg+1))); - verify(!reg.IsAllocAny((Sh4RegType)(op->rd._reg+2))); - verify(!reg.IsAllocAny((Sh4RegType)(op->rd._reg+3))); - - x86e->Emit(op_add32,&vrml_431,1); - x86e->Emit(op_mov32,EDX,ECX); - x86e->Emit(op_and32,EDX,0x1FFFFFFF); - x86e->Emit(op_movups,XMM0,x86_mrm(EDX,x86_ptr(virt_ram_base))); - x86e->Emit(op_movaps,op->rd.reg_ptr(),XMM0); - - if (op->flags==0x431) - x86e->Emit(op_mov32,op->rd.reg_ptr()+3,0x3f800000); - else if (op->flags==0x430) - x86e->Emit(op_mov32,op->rd.reg_ptr()+3,0); - - break; - } - - bool vect=op->flags&0x80; - - if (vect) - { - u32 sz=size; - //x86e->Emit(op_add32,&cvld,sz/(op->flags&0x100?8:4)); - x86e->Emit(op_add32,&vrml_431,sz/(op->flags&0x100?8:4)*2); - verify(sz==8 || sz==12 || sz==16 || sz==32 || sz==64); - - void** vmap,** funct; - _vmem_get_ptrs(4,false,&vmap,&funct); - x86e->Emit(op_mov32,EAX,ECX); - x86e->Emit(op_shr32,EAX,24); - x86e->Emit(op_mov32,EAX,x86_mrm(EAX,sib_scale_4,vmap)); - - x86e->Emit(op_test32,EAX,~0x7F); - x86e->Emit(op_jz,x86_ptr_imm::create(op->flags)); - x86e->Emit(op_xchg32,ECX,EAX); - x86e->Emit(op_shl32,EAX,ECX); - x86e->Emit(op_shr32,EAX,ECX); - x86e->Emit(op_and32,ECX,~0x7F); - - int i=0; - for (i=0;(i+16)<=sz;i+=16) - { - x86e->Emit(op_movups,XMM0,x86_mrm(EAX,ECX,sib_scale_1,x86_ptr::create(i))); - if (op->rd._reg&3) - x86e->Emit(op_movups,op->rd.reg_ptr()+i/4,XMM0); - else - x86e->Emit(op_movaps,op->rd.reg_ptr()+i/4,XMM0); - } - for (;(i+8)<=sz;i+=8) - { - x86e->Emit(op_movlps,XMM0,x86_mrm(EAX,ECX,sib_scale_1,x86_ptr::create(i))); - x86e->Emit(op_movlps,op->rd.reg_ptr()+i/4,XMM0); - } - for (;(i+4)<=sz;i+=4) - { - x86e->Emit(op_movss,XMM0,x86_mrm(EAX,ECX,sib_scale_1,x86_ptr::create(i))); - x86e->Emit(op_movss,op->rd.reg_ptr()+i/4,XMM0); - } - - verify(i==sz); - - break; - - } - - if (optimise) - { - if (staging || op->flags&0x80000000) - { - - //opt disabled for now - op->flags|=0x80000000; - - x86_Label* _ram=x86e->CreateLabel(false,8); - void** vmap,** funct; - _vmem_get_ptrs(4,false,&vmap,&funct); - x86e->Emit(op_mov32,EAX,ECX); - x86e->Emit(op_shr32,EAX,24); - x86e->Emit(op_mov32,EAX,x86_mrm(EAX,sib_scale_4,vmap)); - - x86e->Emit(op_test32,EAX,~0x7F); - x86e->Emit(op_jnz,_ram); - - if (staging) - { - x86e->Emit(op_and32,&op->flags,~0x80000000); - } - else - { - //x86e->Emit(op_int3); - } - - x86e->MarkLabel(_ram); - } - - if ( !staging) - { - if (op->flags & 0x80000000) - { -#ifdef PROF2 - x86e->Emit(op_add32,&rmlu,1); -#endif - if (true) - { - u32 sz=op->flags&0x7f; - if (sz!=8) - { - x86e->Emit(op_mov32,EDX,ECX); - x86e->Emit(op_and32,EDX,0x1FFFFFFF); - if (sz==1) - { - x86e->Emit(op_movsx8to32,EAX,x86_mrm(EDX,x86_ptr(virt_ram_base))); - } - else if (sz==2) - { - x86e->Emit(op_movsx16to32,EAX,x86_mrm(EDX,x86_ptr(virt_ram_base))); - } - else if (sz==4) - { - x86e->Emit(op_mov32,EAX,x86_mrm(EDX,x86_ptr(virt_ram_base))); - } - isram=true; - } - } - - } -#ifdef PROF2 - else - { - x86e->Emit(op_add32,&rmls,1); - } -#endif - } - } -#endif -#if 1 - //new code ... - //yay ... - - int Lsz=0; - int sz=size; - if (sz==2) Lsz=1; - if (sz==4 && op->rd.is_r32i()) Lsz=2; - if (sz==4 && op->rd.is_r32f()) Lsz=3; - if (sz==8) Lsz=4; - - //x86e->Emit(op_int3); - - reg.FreezeXMM(); - x86e->Emit(op_call,x86_ptr_imm(mem_code[0][0][Lsz])); - reg.ThawXMM(); - - if (Lsz <= 2) - { - x86e->Emit(op_mov32, reg.mapg(op->rd), EAX); - } - else - { - x86e->Emit(op_movss, reg.mapfv(op->rd, 0), XMM0); - if (Lsz == 4) - x86e->Emit(op_movss, reg.mapfv(op->rd, 1), XMM1); - } - break; -#endif - } - - if (size<=8) - { - - if (size==8 && optimise) - { - verify(op->rd.count()==2 && reg.IsAllocf(op->rd,0) && reg.IsAllocf(op->rd,1)); - - x86e->Emit(op_mov32,EDX,ECX); - x86e->Emit(op_and32,EDX,0x1FFFFFFF); - x86e->Emit(op_movss,reg.mapfv(op->rd,0),x86_mrm(EDX,x86_ptr(virt_ram_base))); - x86e->Emit(op_movss,reg.mapfv(op->rd,1),x86_mrm(EDX,x86_ptr(4+virt_ram_base))); - break; - } - if (!isram) - { - reg.FreezeXMM(); - switch(size) - { - case 1: - if (!fuct) fuct=ReadMem8; - x86e->Emit(op_call,x86_ptr_imm(fuct)); - x86e->Emit(op_movsx8to32,EAX,EAX); - break; - case 2: - if (!fuct) fuct=ReadMem16; - x86e->Emit(op_call,x86_ptr_imm(fuct)); - x86e->Emit(op_movsx16to32,EAX,EAX); - break; - case 4: - if (!fuct) fuct=ReadMem32; - x86e->Emit(op_call,x86_ptr_imm(fuct)); - break; - case 8: - if (!fuct) fuct=ReadMem64; - x86e->Emit(op_call,x86_ptr_imm(fuct)); - break; - default: - verify(false); - } - reg.ThawXMM(); - } - - if (size!=8) - { - if (reg.IsAllocg(op->rd)) - x86e->Emit(op_mov32,reg.mapg(op->rd),EAX); - else if (reg.IsAllocf(op->rd)) - x86e->Emit(op_movd_xmm_from_r32,reg.mapf(op->rd),EAX); - else - x86e->Emit(op_mov32,op->rd.reg_ptr(),EAX); - } - else - { - verify(op->rd.count()==2 && reg.IsAllocf(op->rd,0) && reg.IsAllocf(op->rd,1)); - - x86e->Emit(op_movd_xmm_from_r32,reg.mapfv(op->rd,0),EAX); - x86e->Emit(op_movd_xmm_from_r32,reg.mapfv(op->rd,1),EDX); - } - - } - } - break; - - case shop_writem: - { - u32 size=op->flags&0x7f; - verify(reg.IsAllocg(op->rs1) || op->rs1.is_imm()); - - verify(op->rs2.is_r32() || (op->rs2.count()==2 && reg.IsAllocf(op->rs2,0) && reg.IsAllocf(op->rs2,1))); - - if (op->rs1.is_imm() && size<=4) - { - if (prof.enable) x86e->Emit(op_add32,&prof.counters.shil.readm_const,1); - bool isram; - void* ptr=_vmem_read_const(op->rs1._imm,isram,size); - if (isram) - { - if (size<=2) - x86e->Emit(op_mov32,EAX,reg.mapg(op->rs2)); - if (size==1) - x86e->Emit(op_mov8,ptr,EAX); - else if (size==2) - x86e->Emit(op_mov16,ptr,EAX); - else if (size==4) - { - if (op->rs2.is_r32i()) - x86e->Emit(op_mov32,ptr,reg.mapg(op->rs2)); - else - x86e->Emit(op_movss,ptr,reg.mapf(op->rs2)); - } - - else if (size==8) - { - die("A"); - } - else - die("Invalid mem read size"); - - goto done_writem; - } - else - x86e->Emit(op_mov32,ECX,op->rs1._imm); - } - else - { - x86e->Emit(op_mov32,ECX,reg.mapg(op->rs1)); - } - - if (op->rs3.is_imm()) - { - x86e->Emit(op_add32,ECX,op->rs3._imm); - } - else if (op->rs3.is_r32i()) - { - verify(reg.IsAllocg(op->rs3)); - x86e->Emit(op_add32,ECX,reg.mapg(op->rs3)); - } - else if (!op->rs3.is_null()) - { - printf("rs3: %08X\n",op->rs3.type); - die("invalid rs3"); - } - -#if 1 - //new code ... - //yay ... - - int Lsz=0; - int sz=size; - if (sz==2) Lsz=1; - if (sz==4 && op->rs2.is_r32i()) Lsz=2; - if (sz==4 && op->rs2.is_r32f()) Lsz=3; - if (sz==8) Lsz=4; - - //x86e->Emit(op_int3); - //if (Lsz==0) - { - - if (Lsz<=2) - x86e->Emit(op_mov32,EDX,reg.mapg(op->rs2)); - else - { - x86e->Emit(op_movss,XMM0,reg.mapfv(op->rs2,0)); - if (Lsz==4) - x86e->Emit(op_movss,XMM1,reg.mapfv(op->rs2,1)); - } - - reg.FreezeXMM(); - x86e->Emit(op_call,x86_ptr_imm(mem_code[2][1][Lsz])); - reg.ThawXMM(); - - break; - } -#endif - - die("woohoo"); - /* - if (size==8 && optimise) - { - verify(!reg.IsAllocAny(op->rd)); - verify(!reg.IsAllocAny((Sh4RegType)(op->rd._reg+1))); - - x86e->Emit(op_mov32,EDX,ECX); - x86e->Emit(op_and32,EDX,0x1FFFFFFF); - x86e->Emit(op_movlps,XMM0,op->rs2.reg_ptr()); - x86e->Emit(op_movlps,x86_mrm(EDX,x86_ptr(virt_ram_base)),XMM0); - break; - }*/ - - bool vect=op->flags&0x80; - - if (!vect && size<=8) - { - if (size!=8) - { - if (reg.IsAllocg(op->rs2)) - { - x86e->Emit(op_mov32,EDX,reg.mapg(op->rs2)); - } - else if (reg.IsAllocf(op->rs2)) - { - x86e->Emit(op_movd_xmm_to_r32,EDX,reg.mapf(op->rs2)); - } - else - { - die("Must not happen\n"); - } - } - else - { - verify(op->rs2.count()==2 && reg.IsAllocf(op->rs2,0) && reg.IsAllocf(op->rs2,1)); - - x86e->Emit(op_sub32,ESP,8); - //[ESP+4]=rs2[1]//-4 +8= +4 - //[ESP+0]=rs2[0]//-8 +8 = 0 - x86e->Emit(op_movss,x86_mrm(ESP,x86_ptr::create(+4)),reg.mapfv(op->rs2,1)); - x86e->Emit(op_movss,x86_mrm(ESP,x86_ptr::create(-0)),reg.mapfv(op->rs2,0)); - } - - - - if (optimise) - { - if (staging || op->flags&0x80000000) - { - - //opt disabled for now - op->flags|=0x80000000; - x86_Label* _ram=x86e->CreateLabel(false,8); - void** vmap,** funct; - _vmem_get_ptrs(4,false,&vmap,&funct); - x86e->Emit(op_mov32,EAX,ECX); - x86e->Emit(op_shr32,EAX,24); - x86e->Emit(op_mov32,EAX,x86_mrm(EAX,sib_scale_4,vmap)); - - x86e->Emit(op_test32,EAX,~0x7F); - x86e->Emit(op_jnz,_ram); - - if (staging) - { - x86e->Emit(op_and32,&op->flags,~0x80000000); - } - else - { - //x86e->Emit(op_int3); - } - - x86e->MarkLabel(_ram); - } - - - if (!staging) - { - if (op->flags & 0x80000000) - { -#ifdef PROF2 - x86e->Emit(op_add32,&wmlu,1); -#endif - if (false && size<4) - { - x86e->Emit(op_mov32,EAX,ECX); - x86e->Emit(op_and32,EAX,0x1FFFFFFF); - - if (size==1) - { - x86e->Emit(op_mov8,x86_mrm(EAX,x86_ptr(virt_ram_base)),EDX); - } - else if (size==2) - { - x86e->Emit(op_mov16,x86_mrm(EAX,x86_ptr(virt_ram_base)),EDX); - } - else if (size==4) - { - x86e->Emit(op_mov32,x86_mrm(EAX,x86_ptr(virt_ram_base)),EAX); - } - break; - } - - } -#ifdef PROF2 - else - x86e->Emit(op_add32,&wmls,1); -#endif - } - } - } - - if (vect) - { - u32 sz=op->flags&0x7f; - x86e->Emit(op_add32,&vrml_431,sz/(op->flags&0x100?8:4)*5); - verify(sz==8 || sz==12 || sz==16 || sz==32 || sz==64); - - void** vmap,** funct; - _vmem_get_ptrs(4,false,&vmap,&funct); - x86e->Emit(op_mov32,EAX,ECX); - x86e->Emit(op_shr32,EAX,24); - x86e->Emit(op_mov32,EAX,x86_mrm(EAX,sib_scale_4,vmap)); - - x86e->Emit(op_test32,EAX,~0x7F); - x86e->Emit(op_jz,x86_ptr_imm::create(op->flags)); - x86e->Emit(op_xchg32,ECX,EAX); - x86e->Emit(op_shl32,EAX,ECX); - x86e->Emit(op_shr32,EAX,ECX); - x86e->Emit(op_and32,ECX,~0x7F); - - u32 i=0; - for (; (i+16)<=sz; i+=16) - { - if (op->rs2._reg&3) - x86e->Emit(op_movups,XMM0,op->rs2.reg_ptr()+i/4); - else - x86e->Emit(op_movaps,XMM0,op->rs2.reg_ptr()+i/4); - - x86e->Emit(op_movups,x86_mrm(EAX,ECX,sib_scale_1,x86_ptr::create(i)),XMM0); - } - for (; (i+8)<=sz; i+=8) - { - x86e->Emit(op_movlps,XMM0,op->rs2.reg_ptr()+i/4); - x86e->Emit(op_movlps,x86_mrm(EAX,ECX,sib_scale_1,x86_ptr::create(i)),XMM0); - } - for (; (i+4)<=sz; i+=4) - { - x86e->Emit(op_movss,XMM0,op->rs2.reg_ptr()+i/4); - x86e->Emit(op_movss,x86_mrm(EAX,ECX,sib_scale_1,x86_ptr::create(i)),XMM0); - } - - verify(i==sz); - } - else - { - - reg.FreezeXMM(); - switch(size) - { - case 1: - x86e->Emit(op_call,x86_ptr_imm(&WriteMem8)); - break; - case 2: - x86e->Emit(op_call,x86_ptr_imm(&WriteMem16)); - break; - case 4: - x86e->Emit(op_call,x86_ptr_imm(&WriteMem32)); - break; - case 8: - x86e->Emit(op_call,x86_ptr_imm(&WriteMem64)); - break; - default: - verify(false); - } - reg.ThawXMM(); - } - } - done_writem: - break; - - case shop_ifb: - { - /* - //reg alloc should be flushed here. Add Check - for (int i=0;irs1._imm) - { - x86e->Emit(op_mov32,&next_pc,op->rs2._imm); - } - x86e->Emit(op_mov32,ECX,op->rs3._imm); -#ifdef PROF2 - x86e->Emit(op_add32,&OpDesc[op->rs3._imm]->fallbacks,1); - x86e->Emit(op_adc32,((u8*)&OpDesc[op->rs3._imm]->fallbacks)+4,0); -#endif - x86e->Emit(op_call,x86_ptr_imm(OpDesc[op->rs3._imm]->oph)); - } - break; - - case shop_jdyn: - { - - verify(reg.IsAllocg(op->rs1)); - verify(reg.IsAllocg(op->rd)); - - x86e->Emit(op_mov32,reg.mapg(op->rd),reg.mapg(op->rs1)); - if (op->rs2.is_imm()) - { - x86e->Emit(op_add32,reg.mapg(op->rd),op->rs2._imm); - } - //x86e->Emit(op_mov32,op->rd.reg_ptr(),EAX); - } - break; - - case shop_jcond: - { - verify(block->has_jcond); - verify(reg.IsAllocg(op->rs1)); - verify(reg.IsAllocg(op->rd)); - - x86e->Emit(op_mov32,reg.mapg(op->rd),reg.mapg(op->rs1)); - //x86e->Emit(op_mov32,op->rd.reg_ptr(),EAX); - } - break; - - case shop_mov64: - { - verify(op->rd.is_r64()); - verify(op->rs1.is_r64()); - - verify(reg.IsAllocf(op->rs1,0) && reg.IsAllocf(op->rs1,1)); - verify(reg.IsAllocf(op->rd,0) && reg.IsAllocf(op->rd,1)); - - - x86e->Emit(op_movaps,reg.mapfv(op->rd,0),reg.mapfv(op->rs1,0)); - x86e->Emit(op_movaps,reg.mapfv(op->rd,1),reg.mapfv(op->rs1,1)); - } - break; - - case shop_mov32: - { - verify(op->rd.is_r32()); - - if (op->rs1.is_imm()) - { - if (op->rd.is_r32i()) - { - x86e->Emit(op_mov32,reg.mapg(op->rd),op->rs1._imm); - // x86e->Emit(op_add32,&rdmt[4],1); - } - else - { - //verify(!reg.IsAllocAny(op->rd)); - x86e->Emit(op_mov32,EAX,op->rs1._imm); - x86e->Emit(op_movd_xmm_from_r32,reg.mapf(op->rd),EAX); - // x86e->Emit(op_add32,&rdmt[5],1); - } - } - else if (op->rs1.is_r32()) - { - u32 type=0; - - if (reg.IsAllocf(op->rd)) - type|=1; - - if (reg.IsAllocf(op->rs1)) - type|=2; - // x86e->Emit(op_add32,&rdmt[type],1); - switch(type) - { - case 0: //reg=reg - if (reg.mapg(op->rd) != reg.mapg(op->rs1)) - x86e->Emit(op_mov32,reg.mapg(op->rd),reg.mapg(op->rs1)); - - break; - - case 1: //xmm=reg - x86e->Emit(op_movd_xmm_from_r32,reg.mapf(op->rd),reg.mapg(op->rs1)); - break; - - case 2: //reg=xmm - x86e->Emit(op_movd_xmm_to_r32,reg.mapg(op->rd),reg.mapf(op->rs1)); - break; - - case 3: //xmm=xmm - if (reg.mapf(op->rd) != reg.mapf(op->rs1)) - x86e->Emit(op_movss,reg.mapf(op->rd),reg.mapf(op->rs1)); - else - printf("Renamed fmov !\n"); - break; - - } - } - else - { - die("Invalid mov32 size"); - } - - } - break; - -//if CANONICAL_TEST is defined all opcodes use the C-based canonical implementation ! -//#define CANONICAL_TEST 1 -#ifndef CANONICAL_TEST - case shop_and: ngen_Bin(op,op_and32); break; - case shop_or: ngen_Bin(op,op_or32); break; - case shop_xor: ngen_Bin(op,op_xor32); break; - case shop_add: ngen_Bin(op,op_add32); break; - case shop_sub: ngen_Bin(op,op_sub32); break; - case shop_ror: ngen_Bin(op,op_ror32); break; - - case shop_shl: - case shop_shr: - case shop_sar: - { - x86_opcode_class opcd[]={op_shl32,op_shr32,op_sar32}; - ngen_Bin(op,opcd[op->op-shop_shl]); - } - break; - - case shop_rocr: - case shop_rocl: - { - x86e->Emit(op_sar32,reg.mapg(op->rs2),1); - x86e->Emit(op->op==shop_rocr?op_rcr32:op_rcl32,reg.mapg(op->rd),1); - x86e->Emit(op_rcl32,reg.mapg(op->rd2),1); - } - break; - - case shop_test: - case shop_seteq: - case shop_setge: - case shop_setgt: - case shop_setae: - case shop_setab: - { - x86_opcode_class opcls1=op->op==shop_test?op_test32:op_cmp32; - x86_opcode_class opcls2[]={op_setz,op_sete,op_setge,op_setg,op_setae,op_seta }; - ngen_Bin(op,opcls1,true,false); - x86e->Emit(opcls2[op->op-shop_test],AL); - x86e->Emit(op_movzx8to32,reg.mapg(op->rd),AL); - } - break; - - case shop_adc: - { - x86e->Emit(op_sar32,reg.mapg(op->rs3),1); - if (reg.mapg(op->rd)!=reg.mapg(op->rs1)) - x86e->Emit(op_mov32,reg.mapg(op->rd),reg.mapg(op->rs1)); - x86e->Emit(op_adc32,reg.mapg(op->rd),reg.mapg(op->rs2)); - x86e->Emit(op_rcl32,reg.mapg(op->rd2),1); - } - break; - - //rd=rs1<rs1)); - verify(op->rs2.is_imm() || reg.IsAllocg(op->rs2)); - verify(reg.IsAllocg(op->rd)); - - x86_opcode_class sl32=op->op==shop_shad?op_sal32:op_shl32; - x86_opcode_class sr32=op->op==shop_shad?op_sar32:op_shr32; - - if (reg.mapg(op->rd)!=reg.mapg(op->rs1)) - x86e->Emit(op_mov32,reg.mapg(op->rd),reg.mapg(op->rs1)); - - if (op->rs2.is_imm()) - { - die("sh*d: no imms please\n"); - } - else - { - x86e->Emit(op_mov32,ECX,reg.mapg(op->rs2)); - - x86_Label* _exit=x86e->CreateLabel(false,8); - x86_Label* _neg=x86e->CreateLabel(false,8); - x86_Label* _nz=x86e->CreateLabel(false,8); - - x86e->Emit(op_cmp32,reg.mapg(op->rs2),0); - x86e->Emit(op_js,_neg); - { - //>=0 - //r[n]<<=sf; - x86e->Emit(sl32,reg.mapg(op->rd),ECX); - x86e->Emit(op_jmp,_exit); - } - x86e->MarkLabel(_neg); - x86e->Emit(op_test32,reg.mapg(op->rs2),0x1f); - x86e->Emit(op_jnz,_nz); - { - //1fh==0 - if (op->op!=shop_shad) - { - //r[n]=0; - x86e->Emit(op_mov32,reg.mapg(op->rd),0); - } - else - { - //r[n]>>=31; - x86e->Emit(op_sar32,reg.mapg(op->rd),31); - } - x86e->Emit(op_jmp,_exit); - } - x86e->MarkLabel(_nz); - { - //<0 - //r[n]>>=(-sf); - x86e->Emit(op_neg32,ECX); - x86e->Emit(sr32,reg.mapg(op->rd),ECX); - } - x86e->MarkLabel(_exit); - } - } - break; - - case shop_swaplb: - { - if (reg.mapg(op->rd)!=reg.mapg(op->rs1)) - x86e->Emit(op_mov32,reg.mapg(op->rd),reg.mapg(op->rs1)); - x86e->Emit(op_ror16,reg.mapg(op->rd),8); - } - break; - - - case shop_neg: ngen_Unary(op,op_neg32); break; - case shop_not: ngen_Unary(op,op_not32); break; - - - case shop_sync_sr: - { - //reg alloc should be flushed here. Add Check - for (int i=0;i<8;i++) - { - verify(!reg.IsAllocAny((Sh4RegType)(reg_r0+i))); - verify(!reg.IsAllocAny((Sh4RegType)(reg_r0_Bank+i))); - } - - verify(!reg.IsAllocAny(reg_old_sr_status)); - verify(!reg.IsAllocAny(reg_sr_status)); - - //reg alloc should be flushed here, add checks - x86e->Emit(op_call,x86_ptr_imm(UpdateSR)); - } - break; - - case shop_sync_fpscr: - { - //reg alloc should be flushed here. Add Check - for (int i=0;i<16;i++) - { - verify(!reg.IsAllocAny((Sh4RegType)(reg_fr_0+i))); - verify(!reg.IsAllocAny((Sh4RegType)(reg_xf_0+i))); - } - - verify(!reg.IsAllocAny(reg_old_fpscr)); - verify(!reg.IsAllocAny(reg_fpscr)); - - - //reg alloc should be flushed here, add checks - x86e->Emit(op_call,x86_ptr_imm(UpdateFPSCR)); - } - break; - - - case shop_mul_u16: - case shop_mul_s16: - case shop_mul_i32: - case shop_mul_u64: - case shop_mul_s64: - { - verify(reg.IsAllocg(op->rs1)); - verify(reg.IsAllocg(op->rs2)); - verify(reg.IsAllocg(op->rd)); - - x86_opcode_class opdt[]={op_movzx16to32,op_movsx16to32,op_mov32,op_mov32,op_mov32}; - x86_opcode_class opmt[]={op_mul32,op_mul32,op_mul32,op_mul32,op_imul32}; - //only the top 32 bits are different on signed vs unsigned - - u32 opofs=op->op-shop_mul_u16; - - x86e->Emit(opdt[opofs],EAX,reg.mapg(op->rs1)); - x86e->Emit(opdt[opofs],EDX,reg.mapg(op->rs2)); - - x86e->Emit(opmt[opofs],EDX); - x86e->Emit(op_mov32,reg.mapg(op->rd),EAX); - - if (op->op>=shop_mul_u64) - x86e->Emit(op_mov32,reg.mapg(op->rd2),EDX); - } - break; - - - //fpu - case shop_fadd: - case shop_fsub: - case shop_fmul: - case shop_fdiv: - { - verify(reg.IsAllocf(op->rs1)); - verify(reg.IsAllocf(op->rs2)); - verify(reg.IsAllocf(op->rd)); - - const x86_opcode_class opcds[]= { op_addss, op_subss, op_mulss, op_divss }; - ngen_fp_bin(op,opcds[op->op-shop_fadd]); - } - break; - - case shop_fabs: - { - verify(reg.IsAllocf(op->rs1)); - verify(reg.IsAllocf(op->rd)); - - static DECL_ALIGN(16) u32 AND_ABS_MASK[4] = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF }; - - verify(op->rd._reg==op->rs1._reg); - x86e->Emit(op_pand,reg.mapf(op->rd),AND_ABS_MASK); - } - break; - - case shop_fneg: - { - verify(reg.IsAllocf(op->rs1)); - verify(reg.IsAllocf(op->rd)); - - static DECL_ALIGN(16) u32 XOR_NEG_MASK[4] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; - - verify(op->rd._reg==op->rs1._reg); - x86e->Emit(op_pxor,reg.mapf(op->rd),XOR_NEG_MASK); - } - break; - - case shop_fsca: - { - verify(op->rs1.is_r32i()); - - //verify(op->rd.is_vector); //double ? vector(2) ? - - verify(reg.IsAllocg(op->rs1)); - verify(reg.IsAllocf(op->rd,0) && reg.IsAllocf(op->rd,1)); - - //sin/cos - x86e->Emit(op_movzx16to32,EAX,reg.mapg(op->rs1)); - x86e->Emit(op_movss,reg.mapfv(op->rd,0),x86_mrm(EAX,sib_scale_8,x86_ptr(&sin_table->u[0]))); - x86e->Emit(op_movss,reg.mapfv(op->rd,1),x86_mrm(EAX,sib_scale_8,x86_ptr(&sin_table->u[1]))); - } - break; - - case shop_fipr: - { - //rd=rs1*rs2 (vectors) -// verify(!reg.IsAllocAny(op->rs1)); -// verify(!reg.IsAllocAny(op->rs2)); - verify(reg.IsAllocf(op->rd)); - - verify(op->rs1.is_r32fv()==4); - verify(op->rs2.is_r32fv()==4); - verify(op->rd.is_r32()); - - if (sse_3) - { - x86_reg xmm=reg.mapf(op->rd); - - x86e->Emit(op_movaps ,xmm,op->rs1.reg_ptr()); - x86e->Emit(op_mulps ,xmm,op->rs2.reg_ptr()); - //xmm0={a0 ,a1 ,a2 ,a3} - x86e->Emit(op_haddps,xmm,xmm); //xmm0={a0+a1 ,a2+a3 ,a0+a1 ,a2+a3} - x86e->Emit(op_haddps,xmm,xmm); //xmm0={(a0+a1)+(a2+a3) ,(a0+a1)+(a2+a3),(a0+a1)+(a2+a3),(a0+a1)+(a2+a3)} - } - else - { - x86_reg xmm=reg.mapf(op->rd); - - x86e->Emit(op_movaps ,xmm,op->rs1.reg_ptr()); - x86e->Emit(op_mulps ,xmm,op->rs2.reg_ptr()); - x86e->Emit(op_movhlps ,XMM1,xmm); - x86e->Emit(op_addps ,xmm,XMM1); - x86e->Emit(op_movaps ,XMM1,xmm); - x86e->Emit(op_shufps ,XMM1,XMM1,1); - x86e->Emit(op_addss ,xmm,XMM1); - } - } - break; - - case shop_fsqrt: - { - verify(reg.IsAllocf(op->rs1)); - verify(reg.IsAllocf(op->rd)); - - //rd=sqrt(rs1) - x86e->Emit(op_sqrtss ,reg.mapf(op->rd),reg.mapf(op->rs1)); - //x86e->Emit(op_movss ,op->rd.reg_ptr(),XMM0); - } - break; - - case shop_ftrv: - { -#ifdef PROF2 - x86e->Emit(op_add32,&vrd,16); -#endif - verify(!reg.IsAllocAny(op->rs1)); - verify(!reg.IsAllocAny(op->rs2)); - verify(!reg.IsAllocAny(op->rd)); - - //rd(vector)=rs1(vector)*rs2(matrix) - verify(op->rd.is_r32fv()==4); - verify(op->rs1.is_r32fv()==4); - verify(op->rs2.is_r32fv()==16); - -#if 1 - //load the vector .. - if (sse_2) - { - x86e->Emit(op_movaps ,XMM3,op->rs1.reg_ptr()); //xmm0=vector - x86e->Emit(op_pshufd ,XMM0,XMM3,0); //xmm0={v0} - x86e->Emit(op_pshufd ,XMM1,XMM3,0x55); //xmm1={v1} - x86e->Emit(op_pshufd ,XMM2,XMM3,0xaa); //xmm2={v2} - x86e->Emit(op_pshufd ,XMM3,XMM3,0xff); //xmm3={v3} - } - else - { - x86e->Emit(op_movaps ,XMM0,op->rs1.reg_ptr()); //xmm0=vector - - x86e->Emit(op_movaps ,XMM3,XMM0); //xmm3=vector - x86e->Emit(op_shufps ,XMM0,XMM0,0); //xmm0={v0} - x86e->Emit(op_movaps ,XMM1,XMM3); //xmm1=vector - x86e->Emit(op_movaps ,XMM2,XMM3); //xmm2=vector - x86e->Emit(op_shufps ,XMM3,XMM3,0xff); //xmm3={v3} - x86e->Emit(op_shufps ,XMM1,XMM1,0x55); //xmm1={v1} - x86e->Emit(op_shufps ,XMM2,XMM2,0xaa); //xmm2={v2} - } - - //do the matrix mult ! - x86e->Emit(op_mulps ,XMM0,op->rs2.reg_ptr() + 0); //v0*=vm0 - x86e->Emit(op_mulps ,XMM1,op->rs2.reg_ptr() + 4); //v1*=vm1 - x86e->Emit(op_mulps ,XMM2,op->rs2.reg_ptr() + 8); //v2*=vm2 - x86e->Emit(op_mulps ,XMM3,op->rs2.reg_ptr() + 12); //v3*=vm3 - - x86e->Emit(op_addps ,XMM0,XMM1); //sum it all up - x86e->Emit(op_addps ,XMM2,XMM3); - x86e->Emit(op_addps ,XMM0,XMM2); - - x86e->Emit(op_movaps ,op->rd.reg_ptr(),XMM0); -#else - /* - AABB CCDD - - ABCD * 0 1 2 3 0 1 4 5 - 4 5 6 7 2 3 6 7 - 8 9 a b 8 9 c d - c d e f a b e f - */ - - x86e->Emit(op_movaps ,XMM1,op->rs1.reg_ptr()); //xmm1=vector - - x86e->Emit(op_pshufd ,XMM0,XMM1,0x05); //xmm0={v0,v0,v1,v1} - x86e->Emit(op_pshufd ,XMM1,XMM1,0xaf); //xmm1={v2,v2,v3,v3} - - x86e->Emit(op_movaps,XMM2,XMM0); //xmm2={v0,v0,v1,v1} - x86e->Emit(op_movaps,XMM3,XMM1); //xmm3={v2,v2,v3,v3} - - x86e->Emit(op_mulps ,XMM0,op->rs2.reg_ptr() + 0); //aabb * 0145 - x86e->Emit(op_mulps ,XMM2,op->rs2.reg_ptr() + 4); //aabb * 2367 - x86e->Emit(op_mulps ,XMM1,op->rs2.reg_ptr() + 8); //ccdd * 89cd - x86e->Emit(op_mulps ,XMM3,op->rs2.reg_ptr() + 12); //ccdd * abef - - - x86e->Emit(op_addps ,XMM0,XMM1); //sum it all up - x86e->Emit(op_addps ,XMM2,XMM3); - - //XMM0 -> A0C8 | A1C9 | B4DC | B5DD - verify(sse_3); - - x86e->Emit(op_shufps,XMM0,XMM0,0x27); //A0C8 B4DC A1C9 B5DC - x86e->Emit(op_shufps,XMM2,XMM2,0x27); - - x86e->Emit(op_haddps,XMM0,XMM2); //haddps ={a0+a1 ,a2+a3 ,b0+b1 ,b2+b3} - - - x86e->Emit(op_movaps ,op->rd.reg_ptr(),XMM0); -#endif - } - break; - - case shop_fmac: - { - verify(reg.IsAllocf(op->rs1)); - verify(reg.IsAllocf(op->rs2)); - verify(reg.IsAllocf(op->rs3)); - verify(reg.IsAllocf(op->rd)); - - //rd=rs1+rs2*rs3 - //rd might be rs1,rs2 or rs3, so can't prestore here (iirc, rd==rs1==fr0) - x86e->Emit(op_movss ,XMM0,reg.mapf(op->rs2)); - x86e->Emit(op_mulss ,XMM0,reg.mapf(op->rs3)); - x86e->Emit(op_addss ,XMM0,reg.mapf(op->rs1)); - x86e->Emit(op_movss ,reg.mapf(op->rd),XMM0); - } - break; - - case shop_fsrra: - { - verify(reg.IsAllocf(op->rs1)); - verify(reg.IsAllocf(op->rd)); - - //rd=1/sqrt(rs1) - static float one=1.0f; - x86e->Emit(op_sqrtss ,XMM0,reg.mapf(op->rs1)); - x86e->Emit(op_movss ,reg.mapf(op->rd),&one); - x86e->Emit(op_divss ,reg.mapf(op->rd),XMM0); - } - break; - - case shop_fseteq: - case shop_fsetgt: - { - verify(reg.IsAllocf(op->rs1)); - verify(reg.IsAllocf(op->rs2)); - verify(reg.IsAllocg(op->rd)); - - //x86e->Emit(op_movss,XMM0,op->rs1.reg_ptr()); - x86e->Emit(op_ucomiss,reg.mapf(op->rs1),reg.mapf(op->rs2)); - - if (op->op==shop_fseteq) - { - //special case - //We want to take in account the 'unordered' case on the fpu - x86e->Emit(op_lahf); - x86e->Emit(op_test8,AH,0x44); - x86e->Emit(op_setnp,AL); - } - else - { - x86e->Emit(op_seta,AL); - } - - x86e->Emit(op_movzx8to32,reg.mapg(op->rd),AL); - } - break; - - case shop_pref: - { - verify(op->rs1.is_r32i()); - verify(reg.IsAllocg(op->rs1)); - - if (op->flags==0x1337) - { - // - x86e->Emit(op_mov32 ,ECX,reg.mapg(op->rs1)); - x86e->Emit(op_call,x86_ptr_imm(&VERIFYME)); //call do_sqw_mmu - } - - x86e->Emit(op_mov32 ,EDX,reg.mapg(op->rs1)); - x86e->Emit(op_mov32 ,ECX,reg.mapg(op->rs1)); - x86e->Emit(op_shr32 ,EDX,26); - - x86_Label* nosq=x86e->CreateLabel(false,8); - - x86e->Emit(op_cmp32,EDX,0x38); - x86e->Emit(op_jne,nosq); - { - if (CCN_MMUCR.AT) - x86e->Emit(op_call,x86_ptr_imm(&do_sqw_mmu)); //call do_sqw_mmu - else - { - x86e->Emit(op_mov32 ,EDX,(u32)sq_both); - x86e->Emit(op_call32,x86_ptr(&do_sqw_nommu)); //call [do_sqw_nommu] - } - } - x86e->MarkLabel(nosq); - } - break; - - case shop_ext_s8: - case shop_ext_s16: - { - verify(op->rd.is_r32i()); - verify(op->rs1.is_r32i()); - - verify(reg.IsAllocg(op->rd)); - verify(reg.IsAllocg(op->rs1)); - - x86e->Emit(op_mov32,EAX,reg.mapg(op->rs1)); - - if (op->op==shop_ext_s8) - x86e->Emit(op_movsx8to32,reg.mapg(op->rd),EAX); - else - x86e->Emit(op_movsx16to32,reg.mapg(op->rd),EAX); - } - break; - - case shop_cvt_f2i_t: - verify(op->rd.is_r32i()); - verify(op->rs1.is_r32f()); - verify(reg.IsAllocg(op->rd)); - verify(reg.IsAllocf(op->rs1)); - - x86e->Emit(op_cvttss2si,reg.mapg(op->rd),reg.mapf(op->rs1)); - break; - - //i hope that the round mode bit is set properly here :p - case shop_cvt_i2f_n: - case shop_cvt_i2f_z: - verify(op->rd.is_r32f()); - verify(op->rs1.is_r32i()); - verify(reg.IsAllocf(op->rd)); - verify(reg.IsAllocg(op->rs1)); - - x86e->Emit(op_cvtsi2ss,reg.mapf(op->rd),reg.mapg(op->rs1)); - //x86e->Emit(op_movss,op->rd.reg_ptr(),XMM0); - break; - - case shop_frswap: - { - verify(op->rd._reg==op->rs2._reg); - verify(op->rd2._reg==op->rs1._reg); - - verify(op->rs1.count()==16 && op->rs2.count()==16); - verify(op->rd2.count()==16 && op->rd.count()==16); -#ifdef PROF2 - x86e->Emit(op_add32,&vrd,32); -#endif - for (int i=0;i<4;i++) - { - x86e->Emit(op_movaps,XMM0,op->rs1.reg_ptr()+i*4); - x86e->Emit(op_movaps,XMM1,op->rs2.reg_ptr()+i*4); - x86e->Emit(op_movaps,op->rd.reg_ptr()+i*4,XMM0); - x86e->Emit(op_movaps,op->rd2.reg_ptr()+i*4,XMM1); - } - } - break; - - case shop_div32s: - case shop_div32u: - { - x86e->Emit(op_mov32,EAX,reg.mapg(op->rs1)); - if (op->op==shop_div32s) - x86e->Emit(op_cdq); - else - x86e->Emit(op_xor32,EDX,EDX); - - x86e->Emit(op->op==shop_div32s?op_idiv32:op_div32,reg.mapg(op->rs2)); - - x86e->Emit(op_mov32,reg.mapg(op->rd),EAX); - x86e->Emit(op_mov32,reg.mapg(op->rd2),EDX); - } - break; - - case shop_div32p2: - { - x86e->Emit(op_xor32,EAX,EAX); - x86e->Emit(op_cmp32,reg.mapg(op->rs3),0); - x86e->Emit(op_cmove32,EAX,reg.mapg(op->rs2)); - if (reg.mapg(op->rd)!=reg.mapg(op->rs1)) - x86e->Emit(op_mov32,reg.mapg(op->rd),reg.mapg(op->rs1)); - - x86e->Emit(op_sub32,reg.mapg(op->rd),EAX); - } - break; - - -#endif - - default: -#if 1 || CANONICAL_TEST - shil_chf[op->op](op); - break; -#endif - - -defaulty: - printf("OH CRAP %d\n",op->op); - verify(false); - } -} \ No newline at end of file diff --git a/core/windows/win86_ngen.cpp b/core/windows/win86_ngen.cpp deleted file mode 100644 index 6703b4f70..000000000 --- a/core/windows/win86_ngen.cpp +++ /dev/null @@ -1,129 +0,0 @@ -#include "types.h" - -#ifndef HOST_NO_REC - -#include "win86_ngen.h" - - -naked void ngen_LinkBlock_Shared_stub() -{ - __asm - { - pop ecx; - sub ecx,5; - call rdv_LinkBlock; - jmp eax; - } -} - -naked void ngen_LinkBlock_cond_Next_stub() -{ - __asm - { - mov edx,0 - jmp ngen_LinkBlock_Shared_stub; - } -} -naked void ngen_LinkBlock_cond_Branch_stub() -{ - __asm - { - mov edx,1 - jmp ngen_LinkBlock_Shared_stub; - } -} - -const u32 offs=offsetof(Sh4RCB,cntx.jdyn); -naked void ngen_LinkBlock_Generic_stub() -{ - __asm - { - mov edx,[p_sh4rcb]; - add edx,[offs]; - mov edx,[edx]; - jmp ngen_LinkBlock_Shared_stub; - } -} - - - - -naked void ngen_FailedToFindBlock_() -{ - __asm - { - mov ecx,esi; - call rdv_FailedToFindBlock; - jmp eax; - } -} - -void (*ngen_FailedToFindBlock)()=&ngen_FailedToFindBlock_; -naked void ngen_mainloop(void* cntx) -{ - __asm - { - push esi; - push edi; - push ebp; - push ebx; - - mov ecx,0xA0000000; - mov [cycle_counter],SH4_TIMESLICE; - - mov [loop_no_update],offset no_update; - mov [intc_sched],offset intc_sched_offs; - - mov eax,0; - //next_pc _MUST_ be on ecx -no_update: - mov esi,ecx; - call bm_GetCode - jmp eax; - -intc_sched_offs: - add [cycle_counter],SH4_TIMESLICE; - call UpdateSystem; - cmp eax,0; - jnz do_iter; - ret; - -do_iter: - pop ecx; - call rdv_DoInterrupts; - mov ecx,eax; -// cmp byte ptr [sh4_int_bCpuRun],0; - // jz cleanup; - jmp no_update; - -cleanup: - pop ebx; - pop ebp; - pop edi; - pop esi; - - ret; - } -} - - -naked void DYNACALL ngen_blockcheckfail(u32 addr) -{ - __asm - { - call rdv_BlockCheckFail; - jmp eax; - } -} - -naked void DYNACALL ngen_blockcheckfail2(u32 addr) -{ - __asm - { - int 3; - call rdv_BlockCheckFail; - jmp eax; - } -} - -#endif \ No newline at end of file diff --git a/core/windows/win86_ngen.h b/core/windows/win86_ngen.h deleted file mode 100644 index a5f656762..000000000 --- a/core/windows/win86_ngen.h +++ /dev/null @@ -1,50 +0,0 @@ -#include "types.h" - -#include "hw\sh4\sh4_opcode_list.h" -#include "hw\sh4\modules\ccn.h" -#include "hw/sh4/sh4_interrupts.h" - -#include "hw\sh4\sh4_core.h" -#include "hw\sh4\dyna\ngen.h" -#include "hw\sh4\sh4_mem.h" -#include "hw/sh4/dyna/regalloc.h" -#include "emitter\x86_emitter.h" -#include "profiler/profiler.h" -#include "oslib\oslib.h" - -void ngen_opcode(RuntimeBlockInfo* block, shil_opcode* op,x86_block* x86e, bool staging, bool optimise); - -void ngen_LinkBlock_Generic_stub(); -void ngen_LinkBlock_cond_Next_stub(); -void ngen_LinkBlock_cond_Branch_stub(); -void ngen_FailedToFindBlock_(); -void ngen_mainloop(); - - -void DYNACALL ngen_blockcheckfail(u32 addr); -void DYNACALL ngen_blockcheckfail2(u32 addr); - -extern x86_block* x86e; - -extern u32 cycle_counter; - -extern void* loop_no_update; -extern void* intc_sched; - -extern bool sse_1; -extern bool sse_2; -extern bool sse_3; -extern bool ssse_3; -extern bool mmx; - -struct x86_reg_alloc: RegAlloc -{ - virtual void Preload(u32 reg,x86_reg nreg); - virtual void Writeback(u32 reg,x86_reg nreg); - virtual void Preload_FPU(u32 reg,x86_reg nreg); - virtual void Writeback_FPU(u32 reg,x86_reg nreg); - void FreezeXMM(); - void ThawXMM(); -}; - -extern x86_reg_alloc reg; \ No newline at end of file diff --git a/shell/reicast.vcxproj b/shell/reicast.vcxproj index 5a6d16bc7..77e2c6cd6 100644 --- a/shell/reicast.vcxproj +++ b/shell/reicast.vcxproj @@ -156,30 +156,15 @@ - + true true true true - - true - true - true - true - - - true - true - true - true - - - true - true - true - true - + + + @@ -191,9 +176,6 @@ - - - @@ -320,12 +302,7 @@ - - true - true - true - true - + @@ -336,7 +313,6 @@ - @@ -346,7 +322,7 @@ true true - + true true true diff --git a/shell/reicast.vcxproj.filters b/shell/reicast.vcxproj.filters index 9f2024394..0323e123b 100644 --- a/shell/reicast.vcxproj.filters +++ b/shell/reicast.vcxproj.filters @@ -132,17 +132,8 @@ emitter - - win86 - - - win86 - - - win86 - - win86 + windows profiler @@ -246,9 +237,6 @@ linux - - rec-ARM - deps\chdpsr @@ -405,21 +393,24 @@ reios - - rec-x86 - - - rec-x86 - - - rec-x86 - linux oslib + + rec-ARM + + + rec-x86 + + + rec-x86 + + + rec-x86 + @@ -476,9 +467,6 @@ {e14356dc-6635-49f9-94d5-dc14ff1dec70} - - {5a7b63eb-8c03-46ac-b6e0-dfd3ade02f11} - {f96b3c39-1255-4ee8-999e-5c6e8fef21e5} @@ -545,6 +533,9 @@ {df854851-d3b5-4549-8248-acdfa954be44} + + {5a7b63eb-8c03-46ac-b6e0-dfd3ade02f11} + @@ -727,9 +718,6 @@ emitter - - win86 - profiler @@ -907,15 +895,15 @@ reios - - rec-x86 - linux oslib + + rec-x86 + @@ -924,7 +912,7 @@ rec-ARM - + rec-x86