diff --git a/core/core.mk b/core/core.mk index 85c609d6b..7f7b391ad 100755 --- a/core/core.mk +++ b/core/core.mk @@ -37,6 +37,10 @@ ifdef X64_REC RZDCY_MODULES += rec-x64/ endif +ifdef CPP_REC + RZDCY_MODULES += rec-cpp/ +endif + ifndef NO_REND RZDCY_MODULES += rend/gles/ else diff --git a/core/hw/pvr/spg.cpp b/core/hw/pvr/spg.cpp index 50a9147de..13aaa3c85 100755 --- a/core/hw/pvr/spg.cpp +++ b/core/hw/pvr/spg.cpp @@ -86,6 +86,8 @@ extern double speed_load_mspdf; double speed_load_mspdf; #endif +int mips_counter; + double full_rps; u32 fskip=0; @@ -184,12 +186,13 @@ int spg_line_sched(int tag, int cycl, int jit) spd_cpu*100/200,spd_vbs, mode,res,fullvbs, spd_fps,fskip/ts); - #else - sprintf(fpsStr,"%s/%c - %4.2f (%4.2f) - %4.2f - V: %4.2f (%.2f, %s%s%4.2f) R: %4.2f+%4.2f VTX: %4.2f%c", + #else + sprintf(fpsStr,"%s/%c - %4.2f (%4.2f) - %4.2f - V: %4.2f (%.2f, %s%s%4.2f) R: %4.2f+%4.2f VTX: %4.2f%c, MIPS: %.2f", VER_SHORTNAME,'n',mspdf,speed_load_mspdf,spd_cpu*100/200,spd_vbs, spd_vbs/full_rps,mode,res,fullvbs, spd_fps,fskip/ts - ,mv,mv_c); + , mv, mv_c, mips_counter/ 1024.0 / 1024.0); + mips_counter = 0; #endif fskip=0; diff --git a/core/hw/sh4/dyna/decoder.cpp b/core/hw/sh4/dyna/decoder.cpp index e6b3ab86c..8e1f8674a 100644 --- a/core/hw/sh4/dyna/decoder.cpp +++ b/core/hw/sh4/dyna/decoder.cpp @@ -15,6 +15,9 @@ #include "hw/sh4/sh4_mem.h" #include "decoder_opcodes.h" +#define BLOCK_MAX_SH_OPS_SOFT 500 +#define BLOCK_MAX_SH_OPS_HARD 511 + RuntimeBlockInfo* blk; @@ -1054,7 +1057,10 @@ void dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles) //there is no break here by design case NDO_NextOp: { - if (blk->guest_cycles>=max_cycles && !state.cpu.is_delayslot) + if ( + ( (blk->oplist.size() >= BLOCK_MAX_SH_OPS_SOFT) || (blk->guest_cycles >= max_cycles) ) + && !state.cpu.is_delayslot + ) { dec_End(state.cpu.rpc,BET_StaticJump,false); } @@ -1142,6 +1148,8 @@ _end: blk->NextBlock=state.NextAddr; blk->BranchBlock=state.JumpAddr; blk->BlockType=state.BlockType; + + verify(blk->oplist.size() <= BLOCK_MAX_SH_OPS_HARD); #if HOST_OS == OS_WINDOWS switch(rbi->addr) diff --git a/core/hw/sh4/dyna/driver.cpp b/core/hw/sh4/dyna/driver.cpp index 25aaae103..9bd767919 100644 --- a/core/hw/sh4/dyna/driver.cpp +++ b/core/hw/sh4/dyna/driver.cpp @@ -28,12 +28,13 @@ #if FEAT_SHREC != DYNAREC_NONE //uh uh -#if HOST_OS == OS_WINDOWS - u8 SH4_TCB[2*CODE_SIZE+4096]; +u8 SH4_TCB[2*CODE_SIZE+4096] +#if HOST_OS == OS_WINDOWS || FEAT_SHREC != DYNAREC_JIT + ; #elif HOST_OS == OS_LINUX - u8 SH4_TCB[2*CODE_SIZE+4096] __attribute__((section(".text"))); + __attribute__((section(".text"))); #elif HOST_OS==OS_DARWIN - u8 SH4_TCB[2*CODE_SIZE+4096] __attribute__((section("__TEXT,.text"))); + __attribute__((section("__TEXT,.text"))); #else #error SH4_TCB ALLOC #endif diff --git a/core/hw/sh4/dyna/shil_canonical.h b/core/hw/sh4/dyna/shil_canonical.h index 1ab8cf304..68c6a5091 100644 --- a/core/hw/sh4/dyna/shil_canonical.h +++ b/core/hw/sh4/dyna/shil_canonical.h @@ -42,7 +42,7 @@ extern "C" f32 fipr_asm(float* fn, float* fm); #define shil_opc(name) struct shil_opcl_##name { #define shil_opc_end() }; - #define shil_canonical(rv,name,args,code) static rv name args { code } + #define shil_canonical(rv,name,args,code) struct name { static rv impl args { code } }; #define shil_cf_arg_u32(x) ngen_CC_Param(op,&op->x,CPT_u32); #define shil_cf_arg_f32(x) ngen_CC_Param(op,&op->x,CPT_f32); @@ -50,7 +50,8 @@ extern "C" f32 fipr_asm(float* fn, float* fm); #define shil_cf_rv_u32(x) ngen_CC_Param(op,&op->x,CPT_u32rv); #define shil_cf_rv_f32(x) ngen_CC_Param(op,&op->x,CPT_f32rv); #define shil_cf_rv_u64(x) ngen_CC_Param(op,&op->rd,CPT_u64rvL); ngen_CC_Param(op,&op->rd2,CPT_u64rvH); - #define shil_cf(x) ngen_CC_Call(op,(void*)x); + #define shil_cf_ext(x) ngen_CC_Call(op,(void*)&x); + #define shil_cf(x) shil_cf_ext(x::impl) #define shil_compile(code) static void compile(shil_opcode* op) { ngen_CC_Start(op); code ngen_CC_Finish(op); } #elif SHIL_MODE==2 @@ -61,7 +62,7 @@ extern "C" f32 fipr_asm(float* fn, float* fm); #define shil_opc(name) struct shil_opcl_##name { #define shil_opc_end() }; - #define shil_canonical(rv,name,args,code) static rv cimpl_##name args; + #define shil_canonical(rv,name,args,code) struct name { static rv impl args; }; #define shil_compile(code) static void compile(shil_opcode* op); #elif SHIL_MODE==3 //generate struct list ... @@ -93,7 +94,7 @@ extern "C" f32 fipr_asm(float* fn, float* fm); -#if SHIL_MODE==1 +#if SHIL_MODE==1 || SHIL_MODE==2 //only in structs we use the code :) #include #include "types.h" @@ -206,18 +207,26 @@ shil_opc_end() //Canonical impl. opcodes ! shil_opc(sync_sr) +shil_canonical +( +void, f1, (), + UpdateSR(); +) shil_compile ( - shil_cf(UpdateSR); - //die(); + shil_cf(f1); ) shil_opc_end() shil_opc(sync_fpscr) +shil_canonical +( +void, f1, (), + UpdateFPSCR(); +) shil_compile ( - shil_cf(UpdateFPSCR); - //die(); + shil_cf(f1); ) shil_opc_end() @@ -981,7 +990,7 @@ shil_opc_end() -//shop_ftrv +//shop_frswap shil_opc(frswap) shil_canonical ( diff --git a/core/hw/sh4/sh4_core_regs.cpp b/core/hw/sh4/sh4_core_regs.cpp index b4b00b543..069dcf6ea 100644 --- a/core/hw/sh4/sh4_core_regs.cpp +++ b/core/hw/sh4/sh4_core_regs.cpp @@ -13,23 +13,23 @@ u8* sh4_dyna_rcb; INLINE void ChangeGPR() { - u32 temp[8]; + u32 temp; for (int i=0;i<8;i++) { - temp[i]=r[i]; + temp=r[i]; r[i]=r_bank[i]; - r_bank[i]=temp[i]; + r_bank[i]=temp; } } INLINE void ChangeFP() { - u32 temp[16]; + u32 temp; for (int i=0;i<16;i++) { - temp[i]=fr_hex[i]; + temp=fr_hex[i]; fr_hex[i]=xf_hex[i]; - xf_hex[i]=temp[i]; + xf_hex[i]=temp; } } diff --git a/core/rec-cpp/rec_cpp.cpp b/core/rec-cpp/rec_cpp.cpp new file mode 100644 index 000000000..8b2ff91f0 --- /dev/null +++ b/core/rec-cpp/rec_cpp.cpp @@ -0,0 +1,1543 @@ + +#include "types.h" + +#include +#include + +#if FEAT_SHREC == DYNAREC_CPP +#include "hw/sh4/sh4_opcode_list.h" +#include "hw/sh4/modules/ccn.h" +#include "hw/sh4/sh4_interrupts.h" + +#include "hw/sh4/sh4_core.h" +#include "hw/sh4/dyna/ngen.h" +#include "hw/sh4/sh4_mem.h" +#include "hw/sh4/dyna/regalloc.h" +#include "emitter/x86_emitter.h" +#include "profiler/profiler.h" +#include "oslib/oslib.h" + +#define SHIL_MODE 2 +#include "hw/sh4/dyna/shil_canonical.h" + + +#define MIPS_COUNTER 0 + +struct DynaRBI : RuntimeBlockInfo +{ + virtual u32 Relink() { + //verify(false); + return 0; + } + + virtual void Relocate(void* dst) { + verify(false); + } +}; + + + +int cycle_counter; +extern int mips_counter; + +void ngen_FailedToFindBlock_internal() { + rdv_FailedToFindBlock(Sh4cntx.pc); +} + +void(*ngen_FailedToFindBlock)() = &ngen_FailedToFindBlock_internal; + +void ngen_mainloop(void* v_cntx) +{ + Sh4RCB* ctx = (Sh4RCB*)((u8*)v_cntx - sizeof(Sh4RCB)); + + cycle_counter = 0; + + for (;;) { + cycle_counter = SH4_TIMESLICE; + do { + DynarecCodeEntryPtr rcb = bm_GetCode(ctx->cntx.pc); + rcb(); + } while (cycle_counter > 0); + + if (UpdateSystem()) { + rdv_DoInterrupts_pc(ctx->cntx.pc); + } + } +} + +void ngen_init() +{ +} + + +void ngen_GetFeatures(ngen_features* dst) +{ + dst->InterpreterFallback = false; + dst->OnlyDynamicEnds = false; +} + +RuntimeBlockInfo* ngen_AllocateBlock() +{ + return new DynaRBI(); +} + +u32* GetRegPtr(u32 reg) +{ + return Sh4_int_GetRegisterPtr((Sh4RegType)reg); +} + +class opcodeExec { + public: + virtual void execute() = 0; +}; + +class opcodeDie : public opcodeExec { + void execute() { + die("death opcode"); + } +}; + +struct CC_PS +{ + CanonicalParamType type; + shil_param* prm; +}; + +typedef vector CC_pars_t; + + +struct opcode_cc_aBaCbC { + template + struct opex2 : public opcodeExec { + + u32 rs2; + u32* rs1; + u32* rd; + + void setup(const CC_pars_t& prms, void* fun) { + rs2 = prms[0].prm->imm_value(); + rs1 = prms[1].prm->reg_ptr(); + rd = prms[2].prm->reg_ptr(); + verify(prms.size() == 3); + } + + void execute() { + *rd = ((u32(*)(u32, u32))&T::impl)(*rs1, rs2); + } + }; +}; + +struct opcode_cc_aCaCbC { + struct opex : public opcodeExec { + void* fn; + u32* rs1; + u32* rs2; + u32* rd; + void execute() { + *rd = ((u32(*)(u32, u32))fn)(*rs1, *rs2); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs2 = prms[0].prm->reg_ptr(); + rs1 = prms[1].prm->reg_ptr(); + rd = prms[2].prm->reg_ptr(); + verify(prms.size() == 3); + } + }; + + template + struct opex2 : public opex { + void execute() { + *rd = ((u32(*)(u32, u32))&T::impl)(*rs1, *rs2); + } + }; +}; + +struct opcode_cc_aCbC { + struct opex : public opcodeExec { + void* fn; + u32* rs1; + u32* rd; + void execute() { + *rd = ((u32(*)(u32))fn)(*rs1); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs1 = prms[0].prm->reg_ptr(); + rd = prms[1].prm->reg_ptr(); + verify(prms.size() == 2); + } + }; + + template + struct opex2 : public opex { + void execute() { + *rd = ((u32(*)(u32))&T::impl)(*rs1); + } + }; +}; + +struct opcode_cc_aC { + struct opex : public opcodeExec { + void* fn; + u32* rs1; + void execute() { + ((void(*)(u32))fn)(*rs1); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs1 = prms[0].prm->reg_ptr(); + verify(prms.size() == 1); + } + }; + + template + struct opex2 : public opex { + void execute() { + ((void(*)(u32))&T::impl)(*rs1); + } + }; +}; + +struct opcode_cc_aCaCaCbC { + struct opex : public opcodeExec { + void* fn; + u32* rs1; + u32* rs2; + u32* rs3; + u32* rd; + void execute() { + *rd = ((u32(*)(u32, u32, u32))fn)(*rs1, *rs2, *rs3); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs3 = prms[0].prm->reg_ptr(); + rs2 = prms[1].prm->reg_ptr(); + rs1 = prms[2].prm->reg_ptr(); + rd = prms[3].prm->reg_ptr(); + verify(prms.size() == 4); + } + }; + + template + struct opex2 : public opex { + void execute() { + *rd = ((u32(*)(u32, u32, u32))&T::impl)(*rs1, *rs2, *rs3); + } + }; +}; + +struct opcode_cc_aCaCaCcCdC { + //split this to two cases, u64 and u64L/u32H + struct opex : public opcodeExec { + void* fn; + u32* rs1; + u32* rs2; + u32* rs3; + u32* rd; + u32* rd2; + void execute() { + auto rv = ((u64(*)(u32, u32, u32))fn)(*rs1, *rs2, *rs3); + + *rd = (u32)rv; + *rd2 = rv >> 32; + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs3 = prms[0].prm->reg_ptr(); + rs2 = prms[1].prm->reg_ptr(); + rs1 = prms[2].prm->reg_ptr(); + rd = prms[3].prm->reg_ptr(); + rd2 = prms[4].prm->reg_ptr(); + + //verify((u64*)(rd2 - 1) == rd); + verify(prms.size() == 5); + } + }; + template + struct opex2 : public opex { + void execute() { + auto rv = ((u64(*)(u32, u32, u32))&T::impl)(*rs1, *rs2, *rs3); + + *rd = (u32)rv; + *rd2 = rv >> 32; + } + }; + +}; + +struct opcode_cc_aCaCcCdC { + struct opex : public opcodeExec { + void* fn; + u32* rs1; + u32* rs2; + u32* rd; + u32* rd2; + void execute() { + auto rv = ((u64(*)(u32, u32))fn)(*rs1, *rs2); + *rd = (u32)rv; + *rd2 = rv >> 32; + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs2 = prms[0].prm->reg_ptr(); + rs1 = prms[1].prm->reg_ptr(); + rd = prms[2].prm->reg_ptr(); + rd2 = prms[3].prm->reg_ptr(); + + verify(prms.size() == 4); + } + }; + + template + struct opex2 : public opex { + void execute() { + auto rv = ((u64(*)(u32, u32))&T::impl)(*rs1, *rs2); + *rd = (u32)rv; + *rd2 = rv >> 32; + } + }; +}; + +struct opcode_cc_eDeDeDfD { + struct opex : public opcodeExec { + void* fn; + f32* rs1; + f32* rs2; + f32* rs3; + f32* rd; + void execute() { + *rd = ((f32(*)(f32, f32, f32))fn)(*rs1, *rs2, *rs3); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs3 = (f32*)prms[0].prm->reg_ptr(); + rs2 = (f32*)prms[1].prm->reg_ptr(); + rs1 = (f32*)prms[2].prm->reg_ptr(); + rd = (f32*)prms[3].prm->reg_ptr(); + } + }; + + template + struct opex2 : public opex { + void execute() { + *rd = ((f32(*)(f32, f32, f32))&T::impl)(*rs1, *rs2, *rs3); + } + }; + +}; + +struct opcode_cc_eDeDfD { + struct opex : public opcodeExec { + void* fn; + f32* rs1; + f32* rs2; + f32* rd; + void execute() { + *rd = ((f32(*)(f32, f32))fn)(*rs1, *rs2); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs2 = (f32*)prms[0].prm->reg_ptr(); + rs1 = (f32*)prms[1].prm->reg_ptr(); + rd = (f32*)prms[2].prm->reg_ptr(); + } + }; + + template + struct opex2 : public opex { + void execute() { + *rd = ((f32(*)(f32, f32))&T::impl)(*rs1, *rs2); + } + }; +}; + +struct opcode_cc_eDeDbC { + struct opex : public opcodeExec { + void* fn; + f32* rs1; + f32* rs2; + u32* rd; + void execute() { + *rd = ((u32(*)(f32, f32))fn)(*rs1, *rs2); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs2 = (f32*)prms[0].prm->reg_ptr(); + rs1 = (f32*)prms[1].prm->reg_ptr(); + rd = (u32*)prms[2].prm->reg_ptr(); + } + }; + + template + struct opex2 : public opex { + void execute() { + *rd = ((u32(*)(f32, f32))&T::impl)(*rs1, *rs2); + } + }; +}; + +struct opcode_cc_eDbC { + struct opex : public opcodeExec { + void* fn; + f32* rs1; + u32* rd; + void execute() { + *rd = ((u32(*)(f32))fn)(*rs1); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs1 = (f32*)prms[0].prm->reg_ptr(); + rd = (u32*)prms[1].prm->reg_ptr(); + } + }; + + template + struct opex2 : public opex { + void execute() { + *rd = ((u32(*)(f32))&T::impl)(*rs1); + } + }; +}; + +struct opcode_cc_aCfD { + struct opex : public opcodeExec { + void* fn; + u32* rs1; + f32* rd; + void execute() { + *rd = ((f32(*)(u32))fn)(*rs1); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs1 = (u32*)prms[0].prm->reg_ptr(); + rd = (f32*)prms[1].prm->reg_ptr(); + } + }; + + template + struct opex2 : public opex { + void execute() { + *rd = ((f32(*)(u32))&T::impl)(*rs1); + } + }; +}; + +struct opcode_cc_eDfD { + struct opex : public opcodeExec { + void* fn; + f32* rs1; + f32* rd; + void execute() { + *rd = ((f32(*)(f32))fn)(*rs1); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs1 = (f32*)prms[0].prm->reg_ptr(); + rd = (f32*)prms[1].prm->reg_ptr(); + } + }; + + template + struct opex2 : public opex { + void execute() { + *rd = ((f32(*)(f32))&T::impl)(*rs1); + } + }; +}; + +struct opcode_cc_aCgE { + struct opex : public opcodeExec { + void* fn; + u32* rs1; + f32* rd; + void execute() { + ((void(*)(f32*, u32))fn)(rd, *rs1); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs1 = (u32*)prms[0].prm->reg_ptr(); + rd = (f32*)prms[1].prm->reg_ptr(); + } + }; + + template + struct opex2 : public opex { + void execute() { + ((void(*)(f32*, u32))&T::impl)(rd, *rs1); + } + }; +}; + +struct opcode_cc_gJgHgH { + struct opex : public opcodeExec { + void* fn; + f32* rs2; + f32* rs1; + f32* rd; + void execute() { + ((void(*)(f32*, f32*, f32*))fn)(rd, rs1, rs2); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs2 = (f32*)prms[0].prm->reg_ptr(); + rs1 = (f32*)prms[1].prm->reg_ptr(); + rd = (f32*)prms[2].prm->reg_ptr(); + } + }; + + template + struct opex2 : public opex { + void execute() { + ((void(*)(f32*, f32*, f32*))&T::impl)(rd, rs1, rs2); + } + }; +}; + +struct opcode_cc_gHgHfD { + struct opex : public opcodeExec { + void* fn; + f32* rs2; + f32* rs1; + f32* rd; + void execute() { + *rd = ((f32(*)(f32*, f32*))fn)(rs1, rs2); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs2 = (f32*)prms[0].prm->reg_ptr(); + rs1 = (f32*)prms[1].prm->reg_ptr(); + rd = (f32*)prms[2].prm->reg_ptr(); + } + }; + + template + struct opex2 : public opex { + void execute() { + *rd = ((f32(*)(f32*, f32*))&T::impl)(rs1, rs2); + } + }; +}; + +struct opcode_cc_vV { + struct opex : public opcodeExec { + void* fn; + + void execute() { + ((void(*)())fn)(); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + } + }; + + template + struct opex2 : public opex { + void execute() { + ((void(*)())&T::impl)(); + } + }; +}; + +//u64* fd1,u64* fd2,u64* fs1,u64* fs2 +//slightly violates the type, as it's FV4PTR but we pass u64* +struct opcode_cc_gJgJgJgJ { + struct opex : public opcodeExec { + void* fn; + u64* rs2; + u64* rs1; + u64* rd; + u64* rd2; + void execute() { + ((void(*)(u64*, u64*, u64*, u64*))fn)(rd, rd2, rs1, rs2); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs2 = (u64*)prms[0].prm->reg_ptr(); + rs1 = (u64*)prms[1].prm->reg_ptr(); + rd2 = (u64*)prms[2].prm->reg_ptr(); + rd = (u64*)prms[3].prm->reg_ptr(); + } + }; + + template + struct opex2 : public opex { + void execute() { + ((void(*)(u64*, u64*, u64*, u64*))&T::impl)(rd, rd2, rs1, rs2); + } + }; +}; + +struct opcode_ifb_pc : public opcodeExec { + OpCallFP* oph; + u32 pc; + u16 opcode; + + void execute() { + next_pc = pc; + oph(opcode); + } +}; + +struct opcode_ifb : public opcodeExec { + OpCallFP* oph; + u16 opcode; + + void execute() { + oph(opcode); + } +}; + +struct opcode_jdyn : public opcodeExec { + u32* src; + void execute() { + Sh4cntx.jdyn = *src; + } +}; + +struct opcode_jdyn_imm : public opcodeExec { + u32* src; + u32 imm; + void execute() { + Sh4cntx.jdyn = *src + imm; + } +}; + +struct opcode_mov32 : public opcodeExec { + u32* src; + u32* dst; + + void execute() { + *dst = *src; + } +}; + +struct opcode_mov32_imm : public opcodeExec { + u32 src; + u32* dst; + + void execute() { + *dst = src; + } +}; + +struct opcode_mov64 : public opcodeExec { + u64* src; + u64* dst; + + void execute() { + *dst = *src; + } +}; + +#define do_readm(d, a, sz) do { if (sz == 1) { *d = (s32)(s8)ReadMem8(a); } else if (sz == 2) { *d = (s32)(s16)ReadMem16(a); } \ + else if (sz == 4) { *d = ReadMem32(a);} else if (sz == 8) { *(u64*)d = ReadMem64(a); } \ + } while(0) +template +struct opcode_readm : public opcodeExec { + u32* src; + u32* dst; + + void execute() { + auto a = *src; + do_readm(dst, a, sz); + } +}; + +template +struct opcode_readm_imm : public opcodeExec { + u32 src; + u32* dst; + + void execute() { + auto a = src; + do_readm(dst, a, sz); + } +}; + +template +struct opcode_readm_offs : public opcodeExec { + u32* src; + u32* dst; + u32* offs; + + void execute() { + auto a = *src + *offs; + do_readm(dst, a, sz); + } +}; + +template +struct opcode_readm_offs_imm : public opcodeExec { + u32* src; + u32* dst; + u32 offs; + + void execute() { + auto a = *src + offs; + do_readm(dst, a, sz); + } +}; + +#define do_writem(d, a, sz) do { if (sz == 1) { WriteMem8(a, *d);} else if (sz == 2) { WriteMem16(a, *d); } \ + else if (sz == 4) { WriteMem32(a, *d);} else if (sz == 8) { WriteMem64(a, *(u64*)d); } \ + } while(0) +template +struct opcode_writem : public opcodeExec { + u32* src; + u32* src2; + + void execute() { + auto a = *src; + do_writem(src2, a, sz); + } +}; + +template +struct opcode_writem_imm : public opcodeExec { + u32 src; + u32* src2; + + void execute() { + auto a = src; + do_writem(src2, a, sz); + } +}; + +template +struct opcode_writem_offs : public opcodeExec { + u32* src; + u32* src2; + u32* offs; + + void execute() { + auto a = *src + *offs; + do_writem(src2, a, sz); + } +}; + +template +struct opcode_writem_offs_imm : public opcodeExec { + u32* src; + u32* src2; + u32 offs; + + void execute() { + auto a = *src + offs; + do_writem(src2, a, sz); + } +}; + +template +struct opcode_blockend : public opcodeExec { + int next_pc_value; + int branch_pc_value; + u32* jdyn; + + opcodeExec* setup(RuntimeBlockInfo* block) { + next_pc_value = block->NextBlock; + branch_pc_value = block->BranchBlock; + + jdyn = &Sh4cntx.jdyn; + if (!block->has_jcond && BET_GET_CLS(block->BlockType) == BET_CLS_COND) { + jdyn = &sr.T; + } + return this; + } + + void execute() { + //do whatever + + + switch (end_type) { + + case BET_StaticJump: + case BET_StaticCall: + next_pc = branch_pc_value; + break; + + case BET_Cond_0: + if (*jdyn != 0) + next_pc = next_pc_value; + else + next_pc = branch_pc_value; + break; + + case BET_Cond_1: + if (*jdyn != 1) + next_pc = next_pc_value; + else + next_pc = branch_pc_value; + break; + + case BET_DynamicJump: + case BET_DynamicCall: + case BET_DynamicRet: + next_pc = *jdyn; + break; + + case BET_DynamicIntr: + case BET_StaticIntr: + if (end_type == BET_DynamicIntr) + next_pc = *jdyn; + else + next_pc = next_pc_value; + + UpdateINTC(); + break; + + default: + die("NOT GONNA HAPPEN TODAY, ALRIGHY?"); + } + } +}; + +#if !defined(_DEBUG) + #define DREP_1(x, phrase) if (x < cnt) ops[x]->execute(); else return; + #define DREP_2(x, phrase) DREP_1(x, phrase) DREP_1(x+1, phrase) + #define DREP_4(x, phrase) DREP_2(x, phrase) DREP_2(x+2, phrase) + #define DREP_8(x, phrase) DREP_4(x, phrase) DREP_4(x+4, phrase) + #define DREP_16(x, phrase) DREP_8(x, phrase) DREP_8(x+8, phrase) + #define DREP_32(x, phrase) DREP_16(x, phrase) DREP_16(x+16, phrase) + #define DREP_64(x, phrase) DREP_32(x, phrase) DREP_32(x+32, phrase) + #define DREP_128(x, phrase) DREP_64(x, phrase) DREP_64(x+64, phrase) + #define DREP_256(x, phrase) DREP_128(x, phrase) DREP_128(x+128, phrase) + #define DREP_512(x, phrase) DREP_256(x, phrase) DREP_256(x+256, phrase) +#else + #define DREP_512(x, phrase) for (int i=0; iexecute(); +#endif + +template +class fnblock { +public: + opcodeExec* ops[cnt]; + int cc; + void execute() { + cycle_counter -= cc; + +#if MIPS_COUNTER + mips_counter += cnt; +#endif + + DREP_512(0, phrase); + } + + static void runner(void* fnb) { + ((fnblock*)fnb)->execute(); + } +}; + +template <> +class fnblock<0> { + void execute() { + die("WHATNOT"); + } +}; + +struct fnrv { + void* fnb; + void(*runner)(void* fnb); + opcodeExec** ptrs; +}; + +template +fnrv fnnCtor(int cycles) { + auto rv = new fnblock(); + rv->cc = cycles; + fnrv rvb = { rv, &fnblock::runner, rv->ops }; + return rvb; +} + +template<> +fnrv fnnCtor<0>(int cycles) { + fnrv rvb = { 0, 0, 0 }; + return rvb; +} + +template +opcodeExec* createType2(const CC_pars_t& prms, void* fun) { + typedef typename CTR::template opex2 thetype; + auto rv = new thetype(); + + rv->setup(prms, fun); + return rv; +} + + +map funs; + + +int funs_id_count; + +template +opcodeExec* createType_fast(const CC_pars_t& prms, void* fun, shil_opcode* opcode) { + return 0; +} + +#define OPCODE_CC(sig) opcode_cc_##sig + +#define FAST_sig(sig, ...) \ +template <> \ +opcodeExec* createType_fast(const CC_pars_t& prms, void* fun, shil_opcode* opcode) { \ + typedef OPCODE_CC(sig) CTR; \ + \ + static map funsf = {\ + +#define FAST_gis \ +};\ + \ + if (funsf.count(fun)) { \ + return funsf[fun](prms, fun); \ + } \ + else { \ + return 0; \ + } \ +} + +#define FAST_po2(n,fn) { (void*)&shil_opcl_##n::fn::impl, &createType2 < shil_opcl_##n::fn, CTR > }, +#define FAST_po(n) FAST_po2(n, f1) + +FAST_sig(aCaCbC) +FAST_po(and) +FAST_po(or) +FAST_po(xor) +FAST_po(add) +FAST_po(sub) +FAST_po(ror) +FAST_po(shl) +FAST_po(shr) +FAST_po(sar) +FAST_po(shad) +FAST_po(shld) +FAST_po(test) +FAST_po(seteq) +FAST_po(setge) +FAST_po(setgt) +FAST_po(setae) +FAST_po(setab) +FAST_po(setpeq) +FAST_po(mul_u16) +FAST_po(mul_s16) +FAST_po(mul_i32) +FAST_gis + +FAST_sig(aBaCbC) +FAST_po(and) +FAST_po(or) +FAST_po(xor) +FAST_po(add) +FAST_po(sub) +FAST_po(ror) +FAST_po(shl) +FAST_po(shr) +FAST_po(sar) +FAST_po(shad) +FAST_po(shld) +FAST_po(test) +FAST_po(seteq) +FAST_po(setge) +FAST_po(setgt) +FAST_po(setae) +FAST_po(setab) +FAST_po(setpeq) +FAST_po(mul_u16) +FAST_po(mul_s16) +FAST_po(mul_i32) +FAST_gis + +FAST_sig(eDeDfD) +FAST_po(fadd) +FAST_po(fsub) +FAST_po(fmul) +FAST_po(fdiv) +FAST_gis + +FAST_sig(eDfD) +FAST_po(fneg) +FAST_po(fabs) +FAST_po(fsrra) +FAST_po(fsqrt) +FAST_gis + + +FAST_sig(eDeDbC) +FAST_po(fseteq) +FAST_po(fsetgt) +FAST_gis + +FAST_sig(eDeDeDfD) +FAST_po(fmac) +FAST_gis + +FAST_sig(gHgHfD) +FAST_po(fipr) +FAST_gis + +FAST_sig(aCaCcCdC) +FAST_po(div32u) +FAST_po(div32s) +FAST_po(rocr) +FAST_po(rocl) +FAST_po(mul_u64) +FAST_po(mul_s64) +FAST_gis + +FAST_sig(aCaCaCcCdC) +FAST_po(adc) +FAST_po(sbc) +FAST_gis + +FAST_sig(aCaCaCbC) +FAST_po(div32p2) +FAST_gis + +FAST_sig(aCbC) +FAST_po(neg) +FAST_po(not) +FAST_po(ext_s8) +FAST_po(ext_s16) +FAST_po(swaplb) +FAST_gis + +FAST_sig(aCfD) +FAST_po(cvt_i2f_z) +FAST_po(cvt_i2f_n) +FAST_gis + + +FAST_sig(aCgE) +FAST_po2(fsca, fsca_table) +FAST_gis + +FAST_sig(eDbC) +FAST_po(cvt_f2i_t) +FAST_gis + +FAST_sig(gJgHgH) +FAST_po(ftrv) +FAST_gis + +FAST_sig(aC) +FAST_po2(pref, f1) +FAST_po2(pref, f2) +FAST_gis + +FAST_sig(vV) +FAST_po(sync_sr) +FAST_po(sync_fpscr) +FAST_gis + +FAST_sig(gJgJgJgJ) +FAST_po(frswap) +FAST_gis + + +typedef opcodeExec*(*foas)(const CC_pars_t& prms, void* fun, shil_opcode* opcode); + +string getCTN(foas code); + +template +opcodeExec* createType(const CC_pars_t& prms, void* fun, shil_opcode* opcode) { + + auto frv = createType_fast(prms, fun, opcode); + if (frv) + return frv; + + if (!funs.count(fun)) { + funs[fun] = funs_id_count++; + + printf("DEFINE %s: FAST_po(%s)\n", getCTN(&createType).c_str(), shil_opcode_name(opcode->op)); + } + + typedef typename CTR::opex thetype; + auto rv = new thetype(); + + rv->setup(prms, fun); + return rv; +} + +map< string, foas> unmap = { + { "aBaCbC", &createType_fast }, + { "aCaCbC", &createType }, + { "aCbC", &createType }, + { "aC", &createType }, + + { "eDeDeDfD", &createType }, + { "eDeDfD", &createType }, + + { "aCaCaCbC", &createType }, + { "aCaCcCdC", &createType }, + { "aCaCaCcCdC", &createType }, + + { "eDbC", &createType }, + { "aCfD", &createType }, + + { "eDeDbC", &createType }, + { "eDfD", &createType }, + + { "aCgE", &createType }, + { "gJgHgH", &createType }, + { "gHgHfD", &createType }, + { "gJgJgJgJ", &createType }, + { "vV", &createType }, +}; + +string getCTN(foas f) { + auto it = find_if(unmap.begin(), unmap.end(), [f](const map< string, foas>::value_type& s) { return s.second == f; }); + + return it->first; +} + +struct { + void* fnb; + void(*runner)(void* fnb); +} dispatchb[8192]; + +template +void disaptchn() { + dispatchb[n].runner(dispatchb[n].fnb); +} + +int idxnxx = 0; +//&disaptchn +#define REP_1(x, phrase) phrase < x > +#define REP_2(x, phrase) REP_1(x, phrase), REP_1(x+1, phrase) +#define REP_4(x, phrase) REP_2(x, phrase), REP_2(x+2, phrase) +#define REP_8(x, phrase) REP_4(x, phrase), REP_4(x+4, phrase) +#define REP_16(x, phrase) REP_8(x, phrase), REP_8(x+8, phrase) +#define REP_32(x, phrase) REP_16(x, phrase), REP_16(x+16, phrase) +#define REP_64(x, phrase) REP_32(x, phrase), REP_32(x+32, phrase) +#define REP_128(x, phrase) REP_64(x, phrase), REP_64(x+64, phrase) +#define REP_256(x, phrase) REP_128(x, phrase), REP_128(x+128, phrase) +#define REP_512(x, phrase) REP_256(x, phrase), REP_256(x+256, phrase) +#define REP_1024(x, phrase) REP_512(x, phrase), REP_512(x+512, phrase) +#define REP_2048(x, phrase) REP_1024(x, phrase), REP_1024(x+1024, phrase) +#define REP_4096(x, phrase) REP_2048(x, phrase), REP_2048(x+2048, phrase) +#define REP_8192(x, phrase) REP_4096(x, phrase), REP_4096(x+4096, phrase) + + +DynarecCodeEntryPtr FNS[] = { REP_8192(0, &disaptchn) }; + +DynarecCodeEntryPtr getndpn_forreal(int n) { + if (n >= 8192) + return 0; + else + return FNS[n]; +} + +typedef fnrv(*FNAFB)(int cycles); + +FNAFB FNA[] = { REP_512(1, &fnnCtor) }; + +FNAFB fnnCtor_forreal(size_t n) { + verify(n > 0); + verify(n <= 512); + return FNA[n - 1]; +} + +class BlockCompiler { +public: + + size_t opcode_index; + opcodeExec** ptrsg; + void compile(RuntimeBlockInfo* block, bool force_checks, bool reset, bool staging, bool optimise) { + + //we need an extra one for the end opcode + auto ptrs = fnnCtor_forreal(block->oplist.size() + 1)(block->guest_cycles); + + ptrsg = ptrs.ptrs; + + dispatchb[idxnxx].fnb = ptrs.fnb; + dispatchb[idxnxx].runner = ptrs.runner; + + block->code = getndpn_forreal(idxnxx++); + + if (getndpn_forreal(idxnxx) == 0) { + emit_Skip(emit_FreeSpace()-16); + } + + for (size_t i = 0; i < block->oplist.size(); i++) { + opcode_index = i; + shil_opcode& op = block->oplist[i]; + switch (op.op) { + + case shop_ifb: + { + if (op.rs1.imm_value()) { + auto opc = new opcode_ifb_pc(); + ptrs.ptrs[i] = opc; + + opc->pc = op.rs2.imm_value(); + opc->opcode = op.rs3.imm_value(); + + opc->oph = OpDesc[op.rs3.imm_value()]->oph; + } + else { + auto opc = new opcode_ifb(); + ptrs.ptrs[i] = opc; + + opc->opcode = op.rs3.imm_value(); + + opc->oph = OpDesc[op.rs3.imm_value()]->oph; + } + } + break; + + case shop_jcond: + case shop_jdyn: + { + if (op.rs2.is_imm()) { + auto opc = new opcode_jdyn_imm(); + ptrs.ptrs[i] = opc; + + opc->src = op.rs1.reg_ptr(); + opc->imm = op.rs2.imm_value(); + } + else { + auto opc = new opcode_jdyn(); + ptrs.ptrs[i] = opc; + + opc->src = op.rs1.reg_ptr(); + } + + } + break; + + case shop_mov32: + { + verify(op.rd.is_reg()); + + verify(op.rs1.is_reg() || op.rs1.is_imm()); + + + if (op.rs1.is_imm()) { + auto opc = new opcode_mov32_imm(); + ptrs.ptrs[i] = opc; + + opc->src = op.rs1.imm_value(); + opc->dst = op.rd.reg_ptr(); + } + else { + auto opc = new opcode_mov32(); + ptrs.ptrs[i] = opc; + + opc->src = op.rs1.reg_ptr(); + opc->dst = op.rd.reg_ptr(); + } + + + } + break; + + case shop_mov64: + { + verify(op.rd.is_reg()); + + verify(op.rs1.is_reg()); + + auto opc = new opcode_mov64(); + ptrs.ptrs[i] = opc; + + opc->src = (u64*) op.rs1.reg_ptr(); + opc->dst = (u64*)op.rd.reg_ptr(); + } + break; + + case shop_readm: + { + u32 size = op.flags & 0x7f; + if (op.rs1.is_imm()) { + verify(op.rs2.is_null() && op.rs3.is_null()); + + if (size == 1) + { + auto opc = new opcode_readm_imm<1>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.imm_value(); opc->dst = op.rd.reg_ptr(); + } + else if (size == 2) + { + auto opc = new opcode_readm_imm<2>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.imm_value(); opc->dst = op.rd.reg_ptr(); + } + else if (size == 4) + { + auto opc = new opcode_readm_imm<4>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.imm_value(); opc->dst = op.rd.reg_ptr(); + } + else if (size == 8) + { + auto opc = new opcode_readm_imm<8>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.imm_value(); opc->dst = op.rd.reg_ptr(); + } + } + else if (op.rs3.is_imm()) { + verify(op.rs2.is_null()); + if (size == 1) + { + auto opc = new opcode_readm_offs_imm<1>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.imm_value(); opc->dst = op.rd.reg_ptr(); + } + else if (size == 2) + { + auto opc = new opcode_readm_offs_imm<2>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.imm_value(); opc->dst = op.rd.reg_ptr(); + } + else if (size == 4) + { + auto opc = new opcode_readm_offs_imm<4>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.imm_value(); opc->dst = op.rd.reg_ptr(); + } + else if (size == 8) + { + auto opc = new opcode_readm_offs_imm<8>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.imm_value(); opc->dst = op.rd.reg_ptr(); + } + } + else if (op.rs3.is_reg()) { + verify(op.rs2.is_null()); + if (size == 1) + { + auto opc = new opcode_readm_offs<1>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.reg_ptr(); opc->dst = op.rd.reg_ptr(); + } + else if (size == 2) + { + auto opc = new opcode_readm_offs<2>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.reg_ptr(); opc->dst = op.rd.reg_ptr(); + } + else if (size == 4) + { + auto opc = new opcode_readm_offs<4>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.reg_ptr(); opc->dst = op.rd.reg_ptr(); + } + else if (size == 8) + { + auto opc = new opcode_readm_offs<8>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.reg_ptr(); opc->dst = op.rd.reg_ptr(); + } + } + else { + verify(op.rs2.is_null() && op.rs3.is_null()); + if (size == 1) + { + auto opc = new opcode_readm<1>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->dst = op.rd.reg_ptr(); + } + else if (size == 2) + { + auto opc = new opcode_readm<2>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->dst = op.rd.reg_ptr(); + } + else if (size == 4) + { + auto opc = new opcode_readm<4>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->dst = op.rd.reg_ptr(); + } + else if (size == 8) + { + auto opc = new opcode_readm<8>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->dst = op.rd.reg_ptr(); + } + } + } + break; + + case shop_writem: + { + u32 size = op.flags & 0x7f; + + if (op.rs1.is_imm()) { + verify(op.rs3.is_null()); + if (size == 1) + { + auto opc = new opcode_writem_imm<1>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.imm_value(); opc->src2 = op.rs2.reg_ptr(); + } + else if (size == 2) + { + auto opc = new opcode_writem_imm<2>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.imm_value(); opc->src2 = op.rs2.reg_ptr(); + } + else if (size == 4) + { + auto opc = new opcode_writem_imm<4>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.imm_value(); opc->src2 = op.rs2.reg_ptr(); + } + else if (size == 8) + { + auto opc = new opcode_writem_imm<8>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.imm_value(); opc->src2 = op.rs2.reg_ptr(); + } + } + else if (op.rs3.is_imm()) { + if (size == 1) + { + auto opc = new opcode_writem_offs_imm<1>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.imm_value(); opc->src2 = op.rs2.reg_ptr(); + } + else if (size == 2) + { + auto opc = new opcode_writem_offs_imm<2>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.imm_value(); opc->src2 = op.rs2.reg_ptr(); + } + else if (size == 4) + { + auto opc = new opcode_writem_offs_imm<4>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.imm_value(); opc->src2 = op.rs2.reg_ptr(); + } + else if (size == 8) + { + auto opc = new opcode_writem_offs_imm<8>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.imm_value(); opc->src2 = op.rs2.reg_ptr(); + } + } + else if (op.rs3.is_reg()) { + if (size == 1) + { + auto opc = new opcode_writem_offs<1>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.reg_ptr(); opc->src2 = op.rs2.reg_ptr(); + } + else if (size == 2) + { + auto opc = new opcode_writem_offs<2>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.reg_ptr(); opc->src2 = op.rs2.reg_ptr(); + } + else if (size == 4) + { + auto opc = new opcode_writem_offs<4>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.reg_ptr(); opc->src2 = op.rs2.reg_ptr(); + } + else if (size == 8) + { + auto opc = new opcode_writem_offs<8>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.reg_ptr(); opc->src2 = op.rs2.reg_ptr(); + } + } + else { + verify(op.rs3.is_null()); + if (size == 1) + { + auto opc = new opcode_writem<1>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->src2 = op.rs2.reg_ptr(); + } + else if (size == 2) + { + auto opc = new opcode_writem<2>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->src2 = op.rs2.reg_ptr(); + } + else if (size == 4) + { + auto opc = new opcode_writem<4>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->src2 = op.rs2.reg_ptr(); + } + else if (size == 8) + { + auto opc = new opcode_writem<8>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->src2 = op.rs2.reg_ptr(); + } + } + } + break; + + default: + shil_chf[op.op](&op); + break; + } + } + + //Block end opcode + { + opcodeExec* op; + + #define CASEWS(n) case n: op = (new opcode_blockend())->setup(block); break + + switch (block->BlockType) { + CASEWS(BET_StaticJump); + CASEWS(BET_StaticCall); + CASEWS(BET_StaticIntr); + + CASEWS(BET_DynamicJump); + CASEWS(BET_DynamicCall); + CASEWS(BET_DynamicRet); + CASEWS(BET_DynamicIntr); + + CASEWS(BET_Cond_0); + CASEWS(BET_Cond_1); + } + + ptrs.ptrs[block->oplist.size()] = op; + } + + } + + CC_pars_t CC_pars; + void* ccfn; + + void ngen_CC_Start(shil_opcode* op) + { + CC_pars.clear(); + ccfn = 0; + } + + void ngen_CC_param(shil_opcode& op, shil_param& prm, CanonicalParamType tp) { + CC_PS t = { tp, &prm }; + CC_pars.push_back(t); + } + + void ngen_CC_Call(shil_opcode*op, void* function) + { + ccfn = function; + } + + void ngen_CC_Finish(shil_opcode* op) + { + string nm = ""; + for (auto m : CC_pars) { + nm += (char)(m.type + 'a'); + nm += (char)(m.prm->type + 'A'); + } + if (!nm.size()) + nm = "vV"; + + if (unmap.count(nm)) { + ptrsg[opcode_index] = unmap[nm](CC_pars, ccfn, op); + } + else { + printf("IMPLEMENT CC_CALL CLASS: %s\n", nm.c_str()); + ptrsg[opcode_index] = new opcodeDie(); + } + } + +}; + +BlockCompiler* compiler; + +void ngen_Compile(RuntimeBlockInfo* block, bool force_checks, bool reset, bool staging, bool optimise) +{ + verify(emit_FreeSpace() >= 16 * 1024); + + compiler = new BlockCompiler(); + + + compiler->compile(block, force_checks, reset, staging, optimise); + + delete compiler; +} + + + +void ngen_CC_Start(shil_opcode* op) +{ + compiler->ngen_CC_Start(op); +} + +void ngen_CC_Param(shil_opcode* op, shil_param* par, CanonicalParamType tp) +{ + compiler->ngen_CC_param(*op, *par, tp); +} + +void ngen_CC_Call(shil_opcode*op, void* function) +{ + compiler->ngen_CC_Call(op, function); +} + +void ngen_CC_Finish(shil_opcode* op) +{ + compiler->ngen_CC_Finish(op); +} + +void ngen_ResetBlocks() +{ + idxnxx = 0; + int id = 0; + /* + while (dispatchb[id].fnb) + delete dispatchb[id].fnb; + */ +} +#endif diff --git a/core/rec-x86/rec_x86_driver.cpp b/core/rec-x86/rec_x86_driver.cpp index 027be23af..b11da90ea 100644 --- a/core/rec-x86/rec_x86_driver.cpp +++ b/core/rec-x86/rec_x86_driver.cpp @@ -265,6 +265,7 @@ u32* GetRegPtr(u32 reg) u32 cvld; u32 rdmt[6]; extern u32 memops_t,memops_l; +extern int mips_counter; void CheckBlock(RuntimeBlockInfo* block,x86_ptr_imm place) { @@ -286,6 +287,8 @@ void CheckBlock(RuntimeBlockInfo* block,x86_ptr_imm place) } } + + void ngen_Compile(RuntimeBlockInfo* block,bool force_checks, bool reset, bool staging,bool optimise) { //initialise stuff @@ -305,7 +308,11 @@ void ngen_Compile(RuntimeBlockInfo* block,bool force_checks, bool reset, bool st x86e->Emit(op_add32,&memops_t,block->memops); x86e->Emit(op_add32,&memops_l,block->linkedmemops); - + +#ifdef MIPS_COUNTER + x86e->Emit(op_add32, &mips_counter, block->oplist.size()); +#endif + //run register allocator reg.DoAlloc(block,alloc_regs,xmm_alloc_regs); diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 5c00100ca..fc589c45a 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -1034,7 +1034,7 @@ void tryfit(float* x,float* y) for (int i=0;i<128;i++) { float diff=min(max(b*logf(x[i])/logf(2.0)+a,(double)0),(double)1)-y[i]; - maxdev=max((float)abs(diff),(float)maxdev); + maxdev=max((float)fabs((float)diff),(float)maxdev); } printf("FOG TABLE Curve match: maxdev: %.02f cents\n",maxdev*100); fog_coefs[0]=a; diff --git a/shell/linCPP/Makefile b/shell/linCPP/Makefile new file mode 100644 index 000000000..9489d42b8 --- /dev/null +++ b/shell/linCPP/Makefile @@ -0,0 +1,122 @@ + +LOCAL_PATH := $(call my-dir) +FOR_LINUX :=1 +NOT_ARM := 1 +CPP_REC := 1 +#NO_REC := 1 +#NO_REND := 1 +WEBUI :=1 +USE_ALSA := 1 +USE_OSS := 1 +#USE_PULSEAUDIO := 1 + +RZDCY_SRC_DIR = ../../core + +include $(RZDCY_SRC_DIR)/core.mk + + +CXX=${CC_PREFIX}g++ +CC=${CC_PREFIX}gcc +AS=${CC_PREFIX}as +STRIP=${CC_PREFIX}strip + +LD=${CC} + +MFLAGS := #-m32 +#-marm -march=armv7-a -mtune=cortex-a9 -mfpu=neon -mfloat-abi=softfp -funroll-loops +ASFLAGS := +#-march=armv7-a -mfpu=neon -mfloat-abi=softfp + +LDFLAGS := -g -Wl,-Map,$(notdir $@).map,--gc-sections -Wl,-O3 -Wl,--sort-common + +CFLAGS := -g -O3 -D RELEASE -c -D TARGET_LINUX_x64 -D USES_HOMEDIR -D TARGET_NO_JIT +CFLAGS += -D SUPPORT_X11 +CFLAGS += -frename-registers -fno-strict-aliasing #-fsingle-precision-constant +CFLAGS += -ffast-math -ftree-vectorize + + +#-fprefetch-loop-arrays +#-std=c++0x +CXXFLAGS += $(CFLAGS) $(MFLAGS) -fexceptions -fno-rtti -fpermissive -std=gnu++11 +CXXFLAGS += -D SUPPORT_X11 +CXXFLAGS += -fno-operator-names + +ifdef PGO_MAKE + CFLAGS += -fprofile-generate -pg + LDFLAGS += -fprofile-generate +else + CFLAGS += -fomit-frame-pointer +endif + +ifdef PGO_USE + CFLAGS += -fprofile-use +endif + + +ifdef LTO_TEST + CFLAGS += -flto -fwhole-program + LDFLAGS +=-flto -fwhole-program +endif + +INCS := -I$(RZDCY_SRC_DIR) -I$(RZDCY_SRC_DIR)/deps -I$(RZDCY_SRC_DIR)/khronos + +LIBS := # use system libs +LIBS += -lm -lrt -ldl +LIBS += -lpthread -lX11 + +ifdef USE_ALSA + CXXFLAGS += -D USE_ALSA + LIBS += -lasound +endif + +ifdef USE_OSS + CXXFLAGS += -D USE_OSS +endif + +ifdef USE_PULSEAUDIO + CXXFLAGS += -D USE_PULSEAUDIO + LIBS += -lpulse-simple +endif + +ifdef USE_GLES + CXXFLAGS += -DGLES + LIBS += -lEGL -lGLESv2 +else + LIBS += -ldl -lGL #for desktop gl +endif + + +OBJECTS=$(RZDCY_FILES:.cpp=.build_obj) +OBJECTS:=$(OBJECTS:.c=.build_obj) +OBJECTS:=$(OBJECTS:.S=.build_obj) +OBJECTS:=$(patsubst $(RZDCY_SRC_DIR)/%,obj/%,$(OBJECTS)) + + +EXECUTABLE_STRIPPED=nosym-reicast.elf +EXECUTABLE=reicast.elf + +PACKAGE_FILES=$(EXECUTABLE_STRIPPED) default.gcw0.desktop icon-32.png + +all: $(CPPFILES) $(EXECUTABLE) $(EXECUTABLE_STRIPPED) + +$(EXECUTABLE): $(OBJECTS) + $(CXX) $(MFLAGS) $(EXTRAFLAGS) $(LDFLAGS) $(OBJECTS) $(LIBS) -o $@ + +$(EXECUTABLE_STRIPPED): $(EXECUTABLE) + cp $< $@ && $(STRIP) $@ + +obj/%.build_obj : $(RZDCY_SRC_DIR)/%.cpp + mkdir -p $(dir $@) + $(CXX) $(EXTRAFLAGS) $(INCS) $(CFLAGS) $(CXXFLAGS) $< -o $@ + +obj/%.build_obj : $(RZDCY_SRC_DIR)/%.c + mkdir -p $(dir $@) + $(CC) $(EXTRAFLAGS) $(INCS) $(CFLAGS) $< -o $@ + +obj/%.build_obj : $(RZDCY_SRC_DIR)/%.S + mkdir -p $(dir $@) + $(AS) $(ASFLAGS) $(INCS) $< -o $@ + + +clean: + rm $(OBJECTS) $(EXECUTABLE) -f diff --git a/shell/reicast.sln b/shell/reicast.sln index 0b0428f0d..d04865b50 100644 --- a/shell/reicast.sln +++ b/shell/reicast.sln @@ -1,8 +1,15 @@  -Microsoft Visual Studio Solution File, Format Version 11.00 -# Visual Studio 2010 +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 2013 +VisualStudioVersion = 12.0.31101.0 +MinimumVisualStudioVersion = 10.0.40219.1 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "reicast", "reicast.vcxproj", "{58B14048-EACB-4780-8B1E-9C84C2C30A8E}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{B03DF793-41BA-4F47-A4F2-C06E52FAFB13}" + ProjectSection(SolutionItems) = preProject + Performance2.psess = Performance2.psess + EndProjectSection +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Win32 = Debug|Win32 @@ -23,4 +30,7 @@ Global GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection + GlobalSection(Performance) = preSolution + HasPerformanceSessions = true + EndGlobalSection EndGlobal diff --git a/shell/reicast.vcxproj b/shell/reicast.vcxproj index bf22e57e8..6da835c0b 100644 --- a/shell/reicast.vcxproj +++ b/shell/reicast.vcxproj @@ -162,6 +162,10 @@ true true + + /bigobj %(AdditionalOptions) + /bigobj %(AdditionalOptions) + true diff --git a/shell/reicast.vcxproj.filters b/shell/reicast.vcxproj.filters index 3ea65d2bd..d63fdd35d 100644 --- a/shell/reicast.vcxproj.filters +++ b/shell/reicast.vcxproj.filters @@ -414,6 +414,9 @@ rec-x64 + + rec-cpp + @@ -542,6 +545,9 @@ {f73263e9-dbe8-4a6f-8b73-335af8307551} + + {63d1fcf2-64b4-4973-995f-cd471f51117c} +