From cd80850acb4e743ad5047a5744a921b47f07c2c6 Mon Sep 17 00:00:00 2001 From: Stefanos Kornilios Mitsis Poiitidis Date: Wed, 22 Jul 2015 21:28:55 +0200 Subject: [PATCH 01/18] First idea for a fully portable "jit" --- core/rec-cpp/rec_cpp.cpp | 503 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 503 insertions(+) create mode 100644 core/rec-cpp/rec_cpp.cpp diff --git a/core/rec-cpp/rec_cpp.cpp b/core/rec-cpp/rec_cpp.cpp new file mode 100644 index 000000000..b9c2cf10d --- /dev/null +++ b/core/rec-cpp/rec_cpp.cpp @@ -0,0 +1,503 @@ + +#include "types.h" + +#include + +#if FEAT_SHREC == DYNAREC_CPP +#include "hw/sh4/sh4_opcode_list.h" +#include "hw/sh4/modules/ccn.h" +#include "hw/sh4/sh4_interrupts.h" + +#include "hw/sh4/sh4_core.h" +#include "hw/sh4/dyna/ngen.h" +#include "hw/sh4/sh4_mem.h" +#include "hw/sh4/dyna/regalloc.h" +#include "emitter/x86_emitter.h" +#include "profiler/profiler.h" +#include "oslib/oslib.h" + + +struct DynaRBI : RuntimeBlockInfo +{ + virtual u32 Relink() { + //verify(false); + return 0; + } + + virtual void Relocate(void* dst) { + verify(false); + } +}; + + + +int cycle_counter; + +void ngen_FailedToFindBlock_internal() { + rdv_FailedToFindBlock(Sh4cntx.pc); +} + +void(*ngen_FailedToFindBlock)() = &ngen_FailedToFindBlock_internal; + +void ngen_mainloop(void* v_cntx) +{ + Sh4RCB* ctx = (Sh4RCB*)((u8*)v_cntx - sizeof(Sh4RCB)); + + cycle_counter = 0; + + for (;;) { + cycle_counter = SH4_TIMESLICE; + do { + DynarecCodeEntryPtr rcb = bm_GetCode(ctx->cntx.pc); + rcb(); + } while (cycle_counter > 0); + + if (UpdateSystem()) { + rdv_DoInterrupts_pc(ctx->cntx.pc); + } + } +} + +void ngen_init() +{ +} + + +void ngen_GetFeatures(ngen_features* dst) +{ + dst->InterpreterFallback = true; + dst->OnlyDynamicEnds = true; +} + +RuntimeBlockInfo* ngen_AllocateBlock() +{ + return new DynaRBI(); +} + +u32* GetRegPtr(u32 reg) +{ + return Sh4_int_GetRegisterPtr((Sh4RegType)reg); +} + +class opcodeExec { + public: + virtual void execute() = 0; +}; + +class opcodeDie : public opcodeExec { + void execute() { + die("death opcode"); + } +}; + +struct opcode_ifb_pc : public opcodeExec { + OpCallFP* oph; + u32 pc; + u16 opcode; + + void execute() { + next_pc = pc; + oph(opcode); + } +}; + +struct opcode_ifb : public opcodeExec { + OpCallFP* oph; + u16 opcode; + + void execute() { + oph(opcode); + } +}; + +struct opcode_jdyn : public opcodeExec { + u32* src; + void execute() { + next_pc = *src; + } +}; + +struct opcode_jdyn_imm : public opcodeExec { + u32* src; + u32 imm; + void execute() { + next_pc = *src + imm; + } +}; + +struct opcode_mov32 : public opcodeExec { + u32* src; + u32* dst; + + void execute() { + *dst = *src; + } +}; + +struct opcode_mov32_imm : public opcodeExec { + u32 src; + u32* dst; + + void execute() { + *dst = src; + } +}; + +template +class fnblock { +public: + opcodeExec* ops[cnt]; + int cc; + void execute() { + cycle_counter -= cc; + for (int i = 0; i < cnt; i++) { + ops[i]->execute(); + } + } + + static void runner(void* fnb) { + ((fnblock*)fnb)->execute(); + } +}; + +template <> +class fnblock<0> { + void execute() { + die("WHATNOT"); + } +}; + +struct fnrv { + void* fnb; + void(*runner)(void* fnb); + opcodeExec** ptrs; +}; + +template +fnrv fnnCtor(int cycles) { + auto rv = new fnblock(); + rv->cc = cycles; + fnrv rvb = { rv, &fnblock::runner, rv->ops }; + return rvb; +} + +template<> +fnrv fnnCtor<0>(int cycles) { + fnrv rvb = { 0, 0, 0 }; + return rvb; +} + +template +opcodeExec* createType() { + return new CTR(); +} + +map< const char*, opcodeExec*(*)()> unmap = { + { "uru", &createType }, +}; + +struct { + void* fnb; + void(*runner)(void* fnb); +} dispatchb[8192]; + +template +void disaptchn() { + dispatchb[n].runner(dispatchb[n].fnb); +} + +int idxnxx = 0; +//&disaptchn +#define REP_1(x, phrase) phrase < x > +#define REP_2(x, phrase) REP_1(x, phrase), REP_1(x+1, phrase) +#define REP_4(x, phrase) REP_2(x, phrase), REP_2(x+2, phrase) +#define REP_8(x, phrase) REP_4(x, phrase), REP_4(x+4, phrase) +#define REP_16(x, phrase) REP_8(x, phrase), REP_8(x+8, phrase) +#define REP_32(x, phrase) REP_16(x, phrase), REP_16(x+16, phrase) +#define REP_64(x, phrase) REP_32(x, phrase), REP_32(x+32, phrase) +#define REP_128(x, phrase) REP_64(x, phrase), REP_64(x+64, phrase) +#define REP_256(x, phrase) REP_128(x, phrase), REP_128(x+128, phrase) +#define REP_512(x, phrase) REP_256(x, phrase), REP_256(x+256, phrase) +#define REP_1024(x, phrase) REP_512(x, phrase), REP_512(x+512, phrase) +#define REP_2048(x, phrase) REP_1024(x, phrase), REP_1024(x+1024, phrase) +#define REP_4096(x, phrase) REP_2048(x, phrase), REP_2048(x+2048, phrase) +#define REP_8192(x, phrase) REP_4096(x, phrase), REP_4096(x+4096, phrase) + + +DynarecCodeEntryPtr FNS[] = { REP_8192(0, &disaptchn) }; + +typedef fnrv(*FNAFB)(int cycles); + +FNAFB FNA[] = { REP_512(0, &fnnCtor) }; + +DynarecCodeEntryPtr getndpn_forreal(int n) { + if (n > 8192) + return 0; + else + return FNS[n]; +} + +FNAFB fnnCtor_forreal(int n) { + if (n > 512) + return 0; + else + return FNA[n]; +} + +class BlockCompiler { +public: + + void compile(RuntimeBlockInfo* block, bool force_checks, bool reset, bool staging, bool optimise) { + + auto ptrs = fnnCtor_forreal(block->oplist.size())(block->guest_cycles); + + dispatchb[idxnxx].fnb = ptrs.fnb; + dispatchb[idxnxx].runner = ptrs.runner; + + block->code = getndpn_forreal(idxnxx++); + + for (size_t i = 0; i < block->oplist.size(); i++) { + shil_opcode& op = block->oplist[i]; + switch (op.op) { + + case shop_ifb: + { + if (op.rs1._imm) { + auto opc = new opcode_ifb_pc(); + ptrs.ptrs[i] = opc; + + opc->pc = op.rs2._imm; + opc->opcode = op.rs3._imm; + + opc->oph = OpDesc[op.rs3._imm]->oph; + } + else { + auto opc = new opcode_ifb(); + ptrs.ptrs[i] = opc; + + opc->opcode = op.rs3._imm; + + opc->oph = OpDesc[op.rs3._imm]->oph; + } + } + break; + + case shop_jdyn: + { + if (op.rs2.is_imm()) { + auto opc = new opcode_jdyn_imm(); + ptrs.ptrs[i] = opc; + + opc->src = op.rs1.reg_ptr(); + opc->imm = op.rs2._imm; + } + else { + auto opc = new opcode_jdyn(); + ptrs.ptrs[i] = opc; + + opc->src = op.rs1.reg_ptr(); + } + + } + break; + + case shop_mov32: + { + verify(op.rd.is_reg()); + + verify(op.rs1.is_reg() || op.rs1.is_imm()); + + + if (op.rs1.is_imm()) { + auto opc = new opcode_mov32_imm(); + ptrs.ptrs[i] = opc; + + opc->src = op.rs1._imm; + opc->dst = op.rd.reg_ptr(); + } + else { + auto opc = new opcode_mov32(); + ptrs.ptrs[i] = opc; + + opc->src = op.rs1.reg_ptr(); + opc->dst = op.rd.reg_ptr(); + } + + + } + break; + + /* + case shop_mov32: + { + verify(op.rd.is_reg()); + + verify(op.rs1.is_reg() || op.rs1.is_imm()); + + sh_to_reg(op.rs1, mov, ecx); + + reg_to_sh(op.rd, ecx); + } + break; + + case shop_mov64: + { + verify(op.rd.is_reg()); + + verify(op.rs1.is_reg() || op.rs1.is_imm()); + + sh_to_reg(op.rs1, mov, rcx); + + reg_to_sh(op.rd, rcx); + } + break; + + case shop_readm: + { + sh_to_reg(op.rs1, mov, call_regs[0]); + sh_to_reg(op.rs3, add, call_regs[0]); + + u32 size = op.flags & 0x7f; + + if (size == 1) { + call(ReadMem8); + movsx(rcx, al); + } + else if (size == 2) { + call(ReadMem16); + movsx(rcx, ax); + } + else if (size == 4) { + call(ReadMem32); + mov(rcx, rax); + } + else if (size == 8) { + call(ReadMem64); + mov(rcx, rax); + } + else { + die("1..8 bytes"); + } + + if (size != 8) + reg_to_sh(op.rd, ecx); + else + reg_to_sh(op.rd, rcx); + } + break; + + case shop_writem: + { + u32 size = op.flags & 0x7f; + sh_to_reg(op.rs1, mov, call_regs[0]); + sh_to_reg(op.rs3, add, call_regs[0]); + + if (size != 8) + sh_to_reg(op.rs2, mov, call_regs[1]); + else + sh_to_reg(op.rs2, mov, call_regs64[1]); + + if (size == 1) + call(WriteMem8); + else if (size == 2) + call(WriteMem16); + else if (size == 4) + call(WriteMem32); + else if (size == 8) + call(WriteMem64); + else { + die("1..8 bytes"); + } + } + break; + */ + default: + shil_chf[op.op](&op); + break; + } + } + + verify(block->BlockType == BET_DynamicJump); + + //emit_Skip(getSize()); + } + + struct CC_PS + { + CanonicalParamType type; + shil_param* prm; + }; + + vector CC_pars; + void* ccfn; + + void ngen_CC_Start(shil_opcode* op) + { + CC_pars.clear(); + ccfn = 0; + } + + void ngen_CC_param(shil_opcode& op, shil_param& prm, CanonicalParamType tp) { + CC_PS t = { tp, &prm }; + CC_pars.push_back(t); + } + + void ngen_CC_Call(shil_opcode*op, void* function) + { + ccfn = function; + } + + void ngen_CC_Finish(shil_opcode* op) + { + //lookup + die("false"); + } + +}; + +BlockCompiler* compiler; + +void ngen_Compile(RuntimeBlockInfo* block, bool force_checks, bool reset, bool staging, bool optimise) +{ + verify(emit_FreeSpace() >= 16 * 1024); + + compiler = new BlockCompiler(); + + + compiler->compile(block, force_checks, reset, staging, optimise); + + delete compiler; +} + + + +void ngen_CC_Start(shil_opcode* op) +{ + compiler->ngen_CC_Start(op); +} + +void ngen_CC_Param(shil_opcode* op, shil_param* par, CanonicalParamType tp) +{ + compiler->ngen_CC_param(*op, *par, tp); +} + +void ngen_CC_Call(shil_opcode*op, void* function) +{ + compiler->ngen_CC_Call(op, function); +} + +void ngen_CC_Finish(shil_opcode* op) +{ + compiler->ngen_CC_Finish(op); +} + +void ngen_ResetBlocks() +{ + idxnxx = 0; + int id = 0; + /* + while (dispatchb[id].fnb) + delete dispatchb[id].fnb; + */ +} +#endif \ No newline at end of file From bfef969626b77fdd6434a9f78807971ace67c656 Mon Sep 17 00:00:00 2001 From: Stefanos Kornilios Mitsis Poiitidis Date: Thu, 23 Jul 2015 02:07:16 +0200 Subject: [PATCH 02/18] rec-cpp: Implement stuff up to the corejit layer --- core/rec-cpp/rec_cpp.cpp | 687 ++++++++++++++++++++++++++++++---- shell/reicast.sln | 14 +- shell/reicast.vcxproj | 4 + shell/reicast.vcxproj.filters | 6 + 4 files changed, 630 insertions(+), 81 deletions(-) diff --git a/core/rec-cpp/rec_cpp.cpp b/core/rec-cpp/rec_cpp.cpp index b9c2cf10d..ada419f46 100644 --- a/core/rec-cpp/rec_cpp.cpp +++ b/core/rec-cpp/rec_cpp.cpp @@ -65,7 +65,7 @@ void ngen_init() void ngen_GetFeatures(ngen_features* dst) { - dst->InterpreterFallback = true; + dst->InterpreterFallback = false; dst->OnlyDynamicEnds = true; } @@ -90,6 +90,301 @@ class opcodeDie : public opcodeExec { } }; +struct CC_PS +{ + CanonicalParamType type; + shil_param* prm; +}; + +typedef vector CC_pars_t; + +struct opcode_cc_aBaCbC : public opcodeExec { + void* fn; + u32* rs1; + u32 rs2; + u32* rd; + void execute() { + *rd = ((u32(*)(u32, u32))fn)(*rs1, rs2); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs2 = prms[0].prm->imm_value(); + rs1 = prms[1].prm->reg_ptr(); + rd = prms[2].prm->reg_ptr(); + verify(prms.size() == 3); + } +}; + +struct opcode_cc_aCaCbC : public opcodeExec { + void* fn; + u32* rs1; + u32* rs2; + u32* rd; + void execute() { + *rd = ((u32(*)(u32, u32))fn)(*rs1, *rs2); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs2 = prms[0].prm->reg_ptr(); + rs1 = prms[1].prm->reg_ptr(); + rd = prms[2].prm->reg_ptr(); + verify(prms.size() == 3); + } +}; + +struct opcode_cc_aCbC : public opcodeExec { + void* fn; + u32* rs1; + u32* rd; + void execute() { + *rd = ((u32(*)(u32))fn)(*rs1); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs1 = prms[0].prm->reg_ptr(); + rd = prms[1].prm->reg_ptr(); + verify(prms.size() == 2); + } +}; + +struct opcode_cc_aC : public opcodeExec { + void* fn; + u32* rs1; + void execute() { + ((void(*)(u32))fn)(*rs1); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs1 = prms[0].prm->reg_ptr(); + verify(prms.size() == 1); + } +}; + +struct opcode_cc_aCaCaCbC : public opcodeExec { + void* fn; + u32* rs1; + u32* rs2; + u32* rs3; + u32* rd; + void execute() { + *rd = ((u32(*)(u32, u32, u32))fn)(*rs1, *rs2, *rs3); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs3 = prms[0].prm->reg_ptr(); + rs2 = prms[1].prm->reg_ptr(); + rs1 = prms[2].prm->reg_ptr(); + rd = prms[3].prm->reg_ptr(); + verify(prms.size() == 4); + } +}; + +//split this to two cases, u64 and u64L/u32H +struct opcode_cc_aCaCaCcCdC : public opcodeExec { + void* fn; + u32* rs1; + u32* rs2; + u32* rs3; + u32* rd; + u32* rd2; + void execute() { + auto rv = ((u64(*)(u32, u32, u32))fn)(*rs1, *rs2, *rs3); + + *rd = rv; + *rd2 = rv >> 32; + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs3 = prms[0].prm->reg_ptr(); + rs2 = prms[1].prm->reg_ptr(); + rs1 = prms[2].prm->reg_ptr(); + rd = prms[3].prm->reg_ptr(); + rd2 = prms[4].prm->reg_ptr(); + + //verify((u64*)(rd2 - 1) == rd); + verify(prms.size() == 5); + } +}; + + +struct opcode_cc_aCaCcCdC : public opcodeExec { + void* fn; + u32* rs1; + u32* rs2; + u32* rd; + u32* rd2; + void execute() { + auto rv = ((u64(*)(u32, u32))fn)(*rs1, *rs2); + *rd = rv; + *rd2 = rv >> 32; + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs2 = prms[0].prm->reg_ptr(); + rs1 = prms[1].prm->reg_ptr(); + rd = prms[2].prm->reg_ptr(); + rd2 = prms[3].prm->reg_ptr(); + + verify(prms.size() == 4); + } +}; + + +struct opcode_cc_eDeDeDfD : public opcodeExec { + void* fn; + f32* rs1; + f32* rs2; + f32* rs3; + f32* rd; + void execute() { + *rd = ((f32(*)(f32, f32, f32))fn)(*rs1, *rs2, *rs3); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs3 = (f32*)prms[0].prm->reg_ptr(); + rs2 = (f32*)prms[1].prm->reg_ptr(); + rs1 = (f32*)prms[2].prm->reg_ptr(); + rd = (f32*)prms[3].prm->reg_ptr(); + } +}; + + +struct opcode_cc_eDeDfD : public opcodeExec { + void* fn; + f32* rs1; + f32* rs2; + f32* rd; + void execute() { + *rd = ((f32(*)(f32, f32))fn)(*rs1, *rs2); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs2 = (f32*)prms[0].prm->reg_ptr(); + rs1 = (f32*)prms[1].prm->reg_ptr(); + rd = (f32*)prms[2].prm->reg_ptr(); + } +}; + +struct opcode_cc_eDeDbC : public opcodeExec { + void* fn; + f32* rs1; + f32* rs2; + u32* rd; + void execute() { + *rd = ((u32(*)(f32, f32))fn)(*rs1, *rs2); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs2 = (f32*)prms[0].prm->reg_ptr(); + rs1 = (f32*)prms[1].prm->reg_ptr(); + rd = (u32*)prms[2].prm->reg_ptr(); + } +}; + +struct opcode_cc_eDbC : public opcodeExec { + void* fn; + f32* rs1; + u32* rd; + void execute() { + *rd = ((u32(*)(f32))fn)(*rs1); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs1 = (f32*)prms[0].prm->reg_ptr(); + rd = (u32*)prms[1].prm->reg_ptr(); + } +}; + +struct opcode_cc_aCfD : public opcodeExec { + void* fn; + u32* rs1; + f32* rd; + void execute() { + *rd = ((f32(*)(u32))fn)(*rs1); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs1 = (u32*)prms[0].prm->reg_ptr(); + rd = (f32*)prms[1].prm->reg_ptr(); + } +}; + +struct opcode_cc_eDfD : public opcodeExec { + void* fn; + f32* rs1; + f32* rd; + void execute() { + *rd = ((f32(*)(f32))fn)(*rs1); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs1 = (f32*)prms[0].prm->reg_ptr(); + rd = (f32*)prms[1].prm->reg_ptr(); + } +}; + +struct opcode_cc_aCgE : public opcodeExec { + void* fn; + u32* rs1; + f32* rd; + void execute() { + ((void(*)(f32*, u32))fn)(rd, *rs1); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs1 = (u32*)prms[0].prm->reg_ptr(); + rd = (f32*)prms[1].prm->reg_ptr(); + } +}; + +struct opcode_cc_gJgHgH : public opcodeExec { + void* fn; + f32* rs2; + f32* rs1; + f32* rd; + void execute() { + ((void(*)(f32*, f32*, f32*))fn)(rd, rs1, rs2); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs2 = (f32*)prms[0].prm->reg_ptr(); + rs1 = (f32*)prms[1].prm->reg_ptr(); + rd = (f32*)prms[2].prm->reg_ptr(); + } +}; + +struct opcode_cc_gHgHfD : public opcodeExec { + void* fn; + f32* rs2; + f32* rs1; + f32* rd; + void execute() { + *rd = ((f32(*)(f32*, f32*))fn)(rs1, rs2); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs2 = (f32*)prms[0].prm->reg_ptr(); + rs1 = (f32*)prms[1].prm->reg_ptr(); + rd = (f32*)prms[2].prm->reg_ptr(); + } +}; + struct opcode_ifb_pc : public opcodeExec { OpCallFP* oph; u32 pc; @@ -143,6 +438,113 @@ struct opcode_mov32_imm : public opcodeExec { } }; +struct opcode_mov64 : public opcodeExec { + u64* src; + u64* dst; + + void execute() { + *dst = *src; + } +}; + +#define do_readm(d, a, sz) do { if (sz == 1) { *d = (s32)(s8)ReadMem8(a); } else if (sz == 2) { *d = (s32)(s16)ReadMem16(a); } \ + else if (sz == 4) { *d = ReadMem32(a);} else if (sz == 8) { *(u64*)d = ReadMem64(a); } \ + } while(0) +template +struct opcode_readm : public opcodeExec { + u32* src; + u32* dst; + + void execute() { + auto a = *src; + do_readm(dst, a, sz); + } +}; + +template +struct opcode_readm_imm : public opcodeExec { + u32 src; + u32* dst; + + void execute() { + auto a = src; + do_readm(dst, a, sz); + } +}; + +template +struct opcode_readm_offs : public opcodeExec { + u32* src; + u32* dst; + u32* offs; + + void execute() { + auto a = *src + *offs; + do_readm(dst, a, sz); + } +}; + +template +struct opcode_readm_offs_imm : public opcodeExec { + u32* src; + u32* dst; + u32 offs; + + void execute() { + auto a = *src + offs; + do_readm(dst, a, sz); + } +}; + +#define do_writem(d, a, sz) do { if (sz == 1) { WriteMem8(a, *d);} else if (sz == 2) { WriteMem16(a, *d); } \ + else if (sz == 4) { WriteMem32(a, *d);} else if (sz == 8) { WriteMem64(a, *(u64*)d); } \ + } while(0) +template +struct opcode_writem : public opcodeExec { + u32* src; + u32* src2; + + void execute() { + auto a = *src; + do_writem(src2, a, sz); + } +}; + +template +struct opcode_writem_imm : public opcodeExec { + u32 src; + u32* src2; + + void execute() { + auto a = src; + do_writem(src2, a, sz); + } +}; + +template +struct opcode_writem_offs : public opcodeExec { + u32* src; + u32* src2; + u32* offs; + + void execute() { + auto a = *src + *offs; + do_writem(src2, a, sz); + } +}; + +template +struct opcode_writem_offs_imm : public opcodeExec { + u32* src; + u32* src2; + u32 offs; + + void execute() { + auto a = *src + offs; + do_writem(src2, a, sz); + } +}; + template class fnblock { public: @@ -188,12 +590,35 @@ fnrv fnnCtor<0>(int cycles) { } template -opcodeExec* createType() { - return new CTR(); +opcodeExec* createType(const CC_pars_t& prms, void* fun) { + auto rv = new CTR(); + + rv->setup(prms, fun); + return rv; } -map< const char*, opcodeExec*(*)()> unmap = { - { "uru", &createType }, +map< string, opcodeExec*(*)(const CC_pars_t& prms, void* fun)> unmap = { + { "aBaCbC", &createType }, + { "aCaCbC", &createType }, + { "aCbC", &createType }, + { "aC", &createType }, + + { "eDeDeDfD", &createType }, + { "eDeDfD", &createType }, + + { "aCaCaCbC", &createType }, + { "aCaCcCdC", &createType }, + { "aCaCaCcCdC", &createType }, + + { "eDbC", &createType }, + { "aCfD", &createType }, + + { "eDeDbC", &createType }, + { "eDfD", &createType }, + + { "aCgE", &createType }, + { "gJgHgH", &createType }, + { "gHgHfD", &createType }, }; struct { @@ -237,7 +662,7 @@ DynarecCodeEntryPtr getndpn_forreal(int n) { return FNS[n]; } -FNAFB fnnCtor_forreal(int n) { +FNAFB fnnCtor_forreal(size_t n) { if (n > 512) return 0; else @@ -247,37 +672,42 @@ FNAFB fnnCtor_forreal(int n) { class BlockCompiler { public: + size_t opcode_index; + opcodeExec** ptrsg; void compile(RuntimeBlockInfo* block, bool force_checks, bool reset, bool staging, bool optimise) { auto ptrs = fnnCtor_forreal(block->oplist.size())(block->guest_cycles); + ptrsg = ptrs.ptrs; + dispatchb[idxnxx].fnb = ptrs.fnb; dispatchb[idxnxx].runner = ptrs.runner; block->code = getndpn_forreal(idxnxx++); for (size_t i = 0; i < block->oplist.size(); i++) { + opcode_index = i; shil_opcode& op = block->oplist[i]; switch (op.op) { case shop_ifb: { - if (op.rs1._imm) { + if (op.rs1.imm_value()) { auto opc = new opcode_ifb_pc(); ptrs.ptrs[i] = opc; - opc->pc = op.rs2._imm; - opc->opcode = op.rs3._imm; + opc->pc = op.rs2.imm_value(); + opc->opcode = op.rs3.imm_value(); - opc->oph = OpDesc[op.rs3._imm]->oph; + opc->oph = OpDesc[op.rs3.imm_value()]->oph; } else { auto opc = new opcode_ifb(); ptrs.ptrs[i] = opc; - opc->opcode = op.rs3._imm; + opc->opcode = op.rs3.imm_value(); - opc->oph = OpDesc[op.rs3._imm]->oph; + opc->oph = OpDesc[op.rs3.imm_value()]->oph; } } break; @@ -289,7 +719,7 @@ public: ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); - opc->imm = op.rs2._imm; + opc->imm = op.rs2.imm_value(); } else { auto opc = new opcode_jdyn(); @@ -312,7 +742,7 @@ public: auto opc = new opcode_mov32_imm(); ptrs.ptrs[i] = opc; - opc->src = op.rs1._imm; + opc->src = op.rs1.imm_value(); opc->dst = op.rd.reg_ptr(); } else { @@ -327,90 +757,184 @@ public: } break; - /* - case shop_mov32: - { - verify(op.rd.is_reg()); - - verify(op.rs1.is_reg() || op.rs1.is_imm()); - - sh_to_reg(op.rs1, mov, ecx); - - reg_to_sh(op.rd, ecx); - } - break; - case shop_mov64: { verify(op.rd.is_reg()); - verify(op.rs1.is_reg() || op.rs1.is_imm()); + verify(op.rs1.is_reg()); - sh_to_reg(op.rs1, mov, rcx); + auto opc = new opcode_mov64(); + ptrs.ptrs[i] = opc; - reg_to_sh(op.rd, rcx); + opc->src = (u64*) op.rs1.reg_ptr(); + opc->dst = (u64*)op.rd.reg_ptr(); } break; case shop_readm: { - sh_to_reg(op.rs1, mov, call_regs[0]); - sh_to_reg(op.rs3, add, call_regs[0]); - u32 size = op.flags & 0x7f; + if (op.rs1.is_imm()) { + verify(op.rs2.is_null() && op.rs3.is_null()); - if (size == 1) { - call(ReadMem8); - movsx(rcx, al); + if (size == 1) + { + auto opc = new opcode_readm_imm<1>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.imm_value(); opc->dst = op.rd.reg_ptr(); + } + else if (size == 2) + { + auto opc = new opcode_readm_imm<2>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.imm_value(); opc->dst = op.rd.reg_ptr(); + } + else if (size == 4) + { + auto opc = new opcode_readm_imm<4>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.imm_value(); opc->dst = op.rd.reg_ptr(); + } + else if (size == 8) + { + auto opc = new opcode_readm_imm<8>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.imm_value(); opc->dst = op.rd.reg_ptr(); + } } - else if (size == 2) { - call(ReadMem16); - movsx(rcx, ax); + else if (op.rs3.is_imm()) { + verify(op.rs2.is_null()); + if (size == 1) + { + auto opc = new opcode_readm_offs_imm<1>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.imm_value(); opc->dst = op.rd.reg_ptr(); + } + else if (size == 2) + { + auto opc = new opcode_readm_offs_imm<2>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.imm_value(); opc->dst = op.rd.reg_ptr(); + } + else if (size == 4) + { + auto opc = new opcode_readm_offs_imm<4>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.imm_value(); opc->dst = op.rd.reg_ptr(); + } + else if (size == 8) + { + auto opc = new opcode_readm_offs_imm<8>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.imm_value(); opc->dst = op.rd.reg_ptr(); + } } - else if (size == 4) { - call(ReadMem32); - mov(rcx, rax); - } - else if (size == 8) { - call(ReadMem64); - mov(rcx, rax); + else if (op.rs3.is_reg()) { + verify(op.rs2.is_null()); + if (size == 1) + { + auto opc = new opcode_readm_offs<1>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.reg_ptr(); opc->dst = op.rd.reg_ptr(); + } + else if (size == 2) + { + auto opc = new opcode_readm_offs<2>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.reg_ptr(); opc->dst = op.rd.reg_ptr(); + } + else if (size == 4) + { + auto opc = new opcode_readm_offs<4>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.reg_ptr(); opc->dst = op.rd.reg_ptr(); + } + else if (size == 8) + { + auto opc = new opcode_readm_offs<8>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.reg_ptr(); opc->dst = op.rd.reg_ptr(); + } } else { - die("1..8 bytes"); + verify(op.rs2.is_null() && op.rs3.is_null()); + if (size == 1) + { + auto opc = new opcode_readm<1>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->dst = op.rd.reg_ptr(); + } + else if (size == 2) + { + auto opc = new opcode_readm<2>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->dst = op.rd.reg_ptr(); + } + else if (size == 4) + { + auto opc = new opcode_readm<4>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->dst = op.rd.reg_ptr(); + } + else if (size == 8) + { + auto opc = new opcode_readm<8>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->dst = op.rd.reg_ptr(); + } } - - if (size != 8) - reg_to_sh(op.rd, ecx); - else - reg_to_sh(op.rd, rcx); } break; case shop_writem: { u32 size = op.flags & 0x7f; - sh_to_reg(op.rs1, mov, call_regs[0]); - sh_to_reg(op.rs3, add, call_regs[0]); - - if (size != 8) - sh_to_reg(op.rs2, mov, call_regs[1]); - else - sh_to_reg(op.rs2, mov, call_regs64[1]); - - if (size == 1) - call(WriteMem8); - else if (size == 2) - call(WriteMem16); - else if (size == 4) - call(WriteMem32); - else if (size == 8) - call(WriteMem64); + + if (op.rs1.is_imm()) { + verify(op.rs3.is_null()); + if (size == 1) + { + auto opc = new opcode_writem_imm<1>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.imm_value(); opc->src2 = op.rs2.reg_ptr(); + } + else if (size == 2) + { + auto opc = new opcode_writem_imm<2>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.imm_value(); opc->src2 = op.rs2.reg_ptr(); + } + else if (size == 4) + { + auto opc = new opcode_writem_imm<4>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.imm_value(); opc->src2 = op.rs2.reg_ptr(); + } + else if (size == 8) + { + auto opc = new opcode_writem_imm<8>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.imm_value(); opc->src2 = op.rs2.reg_ptr(); + } + } + else if (op.rs3.is_imm()) { + if (size == 1) + { + auto opc = new opcode_writem_offs_imm<1>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.imm_value(); opc->src2 = op.rs2.reg_ptr(); + } + else if (size == 2) + { + auto opc = new opcode_writem_offs_imm<2>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.imm_value(); opc->src2 = op.rs2.reg_ptr(); + } + else if (size == 4) + { + auto opc = new opcode_writem_offs_imm<4>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.imm_value(); opc->src2 = op.rs2.reg_ptr(); + } + else if (size == 8) + { + auto opc = new opcode_writem_offs_imm<8>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.imm_value(); opc->src2 = op.rs2.reg_ptr(); + } + } + else if (op.rs3.is_reg()) { + if (size == 1) + { + auto opc = new opcode_writem_offs<1>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.reg_ptr(); opc->src2 = op.rs2.reg_ptr(); + } + else if (size == 2) + { + auto opc = new opcode_writem_offs<2>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.reg_ptr(); opc->src2 = op.rs2.reg_ptr(); + } + else if (size == 4) + { + auto opc = new opcode_writem_offs<4>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.reg_ptr(); opc->src2 = op.rs2.reg_ptr(); + } + else if (size == 8) + { + auto opc = new opcode_writem_offs<8>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->offs = op.rs3.reg_ptr(); opc->src2 = op.rs2.reg_ptr(); + } + } else { - die("1..8 bytes"); + verify(op.rs3.is_null()); + if (size == 1) + { + auto opc = new opcode_writem<1>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->src2 = op.rs2.reg_ptr(); + } + else if (size == 2) + { + auto opc = new opcode_writem<2>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->src2 = op.rs2.reg_ptr(); + } + else if (size == 4) + { + auto opc = new opcode_writem<4>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->src2 = op.rs2.reg_ptr(); + } + else if (size == 8) + { + auto opc = new opcode_writem<8>(); ptrs.ptrs[i] = opc; opc->src = op.rs1.reg_ptr(); opc->src2 = op.rs2.reg_ptr(); + } } } break; - */ + default: shil_chf[op.op](&op); break; @@ -422,13 +946,7 @@ public: //emit_Skip(getSize()); } - struct CC_PS - { - CanonicalParamType type; - shil_param* prm; - }; - - vector CC_pars; + CC_pars_t CC_pars; void* ccfn; void ngen_CC_Start(shil_opcode* op) @@ -449,8 +967,19 @@ public: void ngen_CC_Finish(shil_opcode* op) { - //lookup - die("false"); + string nm = ""; + for (auto m : CC_pars) { + nm += (char)(m.type + 'a'); + nm += (char)(m.prm->type + 'A'); + } + + if (unmap.count(nm)) { + ptrsg[opcode_index] = unmap[nm](CC_pars, ccfn); + } + else { + printf("IMPLEMENT CC_CALL CLASS: %s\n", nm.c_str()); + ptrsg[opcode_index] = new opcodeDie(); + } } }; diff --git a/shell/reicast.sln b/shell/reicast.sln index 0b0428f0d..d04865b50 100644 --- a/shell/reicast.sln +++ b/shell/reicast.sln @@ -1,8 +1,15 @@  -Microsoft Visual Studio Solution File, Format Version 11.00 -# Visual Studio 2010 +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 2013 +VisualStudioVersion = 12.0.31101.0 +MinimumVisualStudioVersion = 10.0.40219.1 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "reicast", "reicast.vcxproj", "{58B14048-EACB-4780-8B1E-9C84C2C30A8E}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{B03DF793-41BA-4F47-A4F2-C06E52FAFB13}" + ProjectSection(SolutionItems) = preProject + Performance2.psess = Performance2.psess + EndProjectSection +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Win32 = Debug|Win32 @@ -23,4 +30,7 @@ Global GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection + GlobalSection(Performance) = preSolution + HasPerformanceSessions = true + EndGlobalSection EndGlobal diff --git a/shell/reicast.vcxproj b/shell/reicast.vcxproj index bf22e57e8..6da835c0b 100644 --- a/shell/reicast.vcxproj +++ b/shell/reicast.vcxproj @@ -162,6 +162,10 @@ true true + + /bigobj %(AdditionalOptions) + /bigobj %(AdditionalOptions) + true diff --git a/shell/reicast.vcxproj.filters b/shell/reicast.vcxproj.filters index 3ea65d2bd..d63fdd35d 100644 --- a/shell/reicast.vcxproj.filters +++ b/shell/reicast.vcxproj.filters @@ -414,6 +414,9 @@ rec-x64 + + rec-cpp + @@ -542,6 +545,9 @@ {f73263e9-dbe8-4a6f-8b73-335af8307551} + + {63d1fcf2-64b4-4973-995f-cd471f51117c} + From fc01428c3040a78b6d83a660b3b309a642259329 Mon Sep 17 00:00:00 2001 From: Stefanos Kornilios Mitsis Poiitidis Date: Thu, 23 Jul 2015 03:40:28 +0200 Subject: [PATCH 03/18] rec-cpp: Force resets when running out of blocks --- core/rec-cpp/rec_cpp.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/core/rec-cpp/rec_cpp.cpp b/core/rec-cpp/rec_cpp.cpp index ada419f46..d03989e00 100644 --- a/core/rec-cpp/rec_cpp.cpp +++ b/core/rec-cpp/rec_cpp.cpp @@ -656,7 +656,7 @@ typedef fnrv(*FNAFB)(int cycles); FNAFB FNA[] = { REP_512(0, &fnnCtor) }; DynarecCodeEntryPtr getndpn_forreal(int n) { - if (n > 8192) + if (n >= 8192) return 0; else return FNS[n]; @@ -685,6 +685,10 @@ public: block->code = getndpn_forreal(idxnxx++); + if (getndpn_forreal(idxnxx) == 0) { + emit_Skip(emit_FreeSpace()-16); + } + for (size_t i = 0; i < block->oplist.size(); i++) { opcode_index = i; shil_opcode& op = block->oplist[i]; From e4138f9a6083b7398c7483d8002a6ebc26e15ae6 Mon Sep 17 00:00:00 2001 From: Stefanos Kornilios Mitsis Poiitidis Date: Thu, 23 Jul 2015 05:16:41 +0200 Subject: [PATCH 04/18] rec-cpp: Wastefully generate more dispatchers for better BTB cache locality The real solution is to have inlining between the (now static) dispatchers and the impls. It's gonna be hard to convince the compiler on that. --- core/rec-cpp/rec_cpp.cpp | 549 ++++++++++++++++++++++----------------- 1 file changed, 311 insertions(+), 238 deletions(-) diff --git a/core/rec-cpp/rec_cpp.cpp b/core/rec-cpp/rec_cpp.cpp index d03989e00..9c765ff5b 100644 --- a/core/rec-cpp/rec_cpp.cpp +++ b/core/rec-cpp/rec_cpp.cpp @@ -98,291 +98,337 @@ struct CC_PS typedef vector CC_pars_t; -struct opcode_cc_aBaCbC : public opcodeExec { - void* fn; - u32* rs1; - u32 rs2; - u32* rd; - void execute() { - *rd = ((u32(*)(u32, u32))fn)(*rs1, rs2); - } - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs2 = prms[0].prm->imm_value(); - rs1 = prms[1].prm->reg_ptr(); - rd = prms[2].prm->reg_ptr(); - verify(prms.size() == 3); - } +struct opcode_cc_aBaCbC { + template + struct opex : public opcodeExec { + void* fn; + u32* rs1; + u32 rs2; + u32* rd; + void execute() { + *rd = ((u32(*)(u32, u32))fn)(*rs1, rs2); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs2 = prms[0].prm->imm_value(); + rs1 = prms[1].prm->reg_ptr(); + rd = prms[2].prm->reg_ptr(); + verify(prms.size() == 3); + } + }; }; -struct opcode_cc_aCaCbC : public opcodeExec { - void* fn; - u32* rs1; - u32* rs2; - u32* rd; - void execute() { - *rd = ((u32(*)(u32, u32))fn)(*rs1, *rs2); - } +struct opcode_cc_aCaCbC { + template + struct opex : public opcodeExec { + void* fn; + u32* rs1; + u32* rs2; + u32* rd; + void execute() { + *rd = ((u32(*)(u32, u32))fn)(*rs1, *rs2); + } - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs2 = prms[0].prm->reg_ptr(); - rs1 = prms[1].prm->reg_ptr(); - rd = prms[2].prm->reg_ptr(); - verify(prms.size() == 3); - } + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs2 = prms[0].prm->reg_ptr(); + rs1 = prms[1].prm->reg_ptr(); + rd = prms[2].prm->reg_ptr(); + verify(prms.size() == 3); + } + }; }; -struct opcode_cc_aCbC : public opcodeExec { - void* fn; - u32* rs1; - u32* rd; - void execute() { - *rd = ((u32(*)(u32))fn)(*rs1); - } +struct opcode_cc_aCbC { + template + struct opex : public opcodeExec { + void* fn; + u32* rs1; + u32* rd; + void execute() { + *rd = ((u32(*)(u32))fn)(*rs1); + } - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs1 = prms[0].prm->reg_ptr(); - rd = prms[1].prm->reg_ptr(); - verify(prms.size() == 2); - } + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs1 = prms[0].prm->reg_ptr(); + rd = prms[1].prm->reg_ptr(); + verify(prms.size() == 2); + } + }; }; -struct opcode_cc_aC : public opcodeExec { - void* fn; - u32* rs1; - void execute() { - ((void(*)(u32))fn)(*rs1); - } +struct opcode_cc_aC { + template + struct opex : public opcodeExec { + void* fn; + u32* rs1; + void execute() { + ((void(*)(u32))fn)(*rs1); + } - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs1 = prms[0].prm->reg_ptr(); - verify(prms.size() == 1); - } + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs1 = prms[0].prm->reg_ptr(); + verify(prms.size() == 1); + } + }; }; -struct opcode_cc_aCaCaCbC : public opcodeExec { - void* fn; - u32* rs1; - u32* rs2; - u32* rs3; - u32* rd; - void execute() { - *rd = ((u32(*)(u32, u32, u32))fn)(*rs1, *rs2, *rs3); - } +struct opcode_cc_aCaCaCbC { + template + struct opex : public opcodeExec { + void* fn; + u32* rs1; + u32* rs2; + u32* rs3; + u32* rd; + void execute() { + *rd = ((u32(*)(u32, u32, u32))fn)(*rs1, *rs2, *rs3); + } - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs3 = prms[0].prm->reg_ptr(); - rs2 = prms[1].prm->reg_ptr(); - rs1 = prms[2].prm->reg_ptr(); - rd = prms[3].prm->reg_ptr(); - verify(prms.size() == 4); - } + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs3 = prms[0].prm->reg_ptr(); + rs2 = prms[1].prm->reg_ptr(); + rs1 = prms[2].prm->reg_ptr(); + rd = prms[3].prm->reg_ptr(); + verify(prms.size() == 4); + } + }; }; -//split this to two cases, u64 and u64L/u32H -struct opcode_cc_aCaCaCcCdC : public opcodeExec { - void* fn; - u32* rs1; - u32* rs2; - u32* rs3; - u32* rd; - u32* rd2; - void execute() { - auto rv = ((u64(*)(u32, u32, u32))fn)(*rs1, *rs2, *rs3); +struct opcode_cc_aCaCaCcCdC { + //split this to two cases, u64 and u64L/u32H + template + struct opex : public opcodeExec { + void* fn; + u32* rs1; + u32* rs2; + u32* rs3; + u32* rd; + u32* rd2; + void execute() { + auto rv = ((u64(*)(u32, u32, u32))fn)(*rs1, *rs2, *rs3); - *rd = rv; - *rd2 = rv >> 32; - } + *rd = (u32)rv; + *rd2 = rv >> 32; + } - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs3 = prms[0].prm->reg_ptr(); - rs2 = prms[1].prm->reg_ptr(); - rs1 = prms[2].prm->reg_ptr(); - rd = prms[3].prm->reg_ptr(); - rd2 = prms[4].prm->reg_ptr(); + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs3 = prms[0].prm->reg_ptr(); + rs2 = prms[1].prm->reg_ptr(); + rs1 = prms[2].prm->reg_ptr(); + rd = prms[3].prm->reg_ptr(); + rd2 = prms[4].prm->reg_ptr(); - //verify((u64*)(rd2 - 1) == rd); - verify(prms.size() == 5); - } + //verify((u64*)(rd2 - 1) == rd); + verify(prms.size() == 5); + } + }; }; +struct opcode_cc_aCaCcCdC { + template + struct opex : public opcodeExec { + void* fn; + u32* rs1; + u32* rs2; + u32* rd; + u32* rd2; + void execute() { + auto rv = ((u64(*)(u32, u32))fn)(*rs1, *rs2); + *rd = (u32)rv; + *rd2 = rv >> 32; + } -struct opcode_cc_aCaCcCdC : public opcodeExec { - void* fn; - u32* rs1; - u32* rs2; - u32* rd; - u32* rd2; - void execute() { - auto rv = ((u64(*)(u32, u32))fn)(*rs1, *rs2); - *rd = rv; - *rd2 = rv >> 32; - } + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs2 = prms[0].prm->reg_ptr(); + rs1 = prms[1].prm->reg_ptr(); + rd = prms[2].prm->reg_ptr(); + rd2 = prms[3].prm->reg_ptr(); - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs2 = prms[0].prm->reg_ptr(); - rs1 = prms[1].prm->reg_ptr(); - rd = prms[2].prm->reg_ptr(); - rd2 = prms[3].prm->reg_ptr(); - - verify(prms.size() == 4); - } + verify(prms.size() == 4); + } + }; }; +struct opcode_cc_eDeDeDfD { + template + struct opex : public opcodeExec { + void* fn; + f32* rs1; + f32* rs2; + f32* rs3; + f32* rd; + void execute() { + *rd = ((f32(*)(f32, f32, f32))fn)(*rs1, *rs2, *rs3); + } -struct opcode_cc_eDeDeDfD : public opcodeExec { - void* fn; - f32* rs1; - f32* rs2; - f32* rs3; - f32* rd; - void execute() { - *rd = ((f32(*)(f32, f32, f32))fn)(*rs1, *rs2, *rs3); - } - - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs3 = (f32*)prms[0].prm->reg_ptr(); - rs2 = (f32*)prms[1].prm->reg_ptr(); - rs1 = (f32*)prms[2].prm->reg_ptr(); - rd = (f32*)prms[3].prm->reg_ptr(); - } + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs3 = (f32*)prms[0].prm->reg_ptr(); + rs2 = (f32*)prms[1].prm->reg_ptr(); + rs1 = (f32*)prms[2].prm->reg_ptr(); + rd = (f32*)prms[3].prm->reg_ptr(); + } + }; }; +struct opcode_cc_eDeDfD { + template + struct opex : public opcodeExec { + void* fn; + f32* rs1; + f32* rs2; + f32* rd; + void execute() { + *rd = ((f32(*)(f32, f32))fn)(*rs1, *rs2); + } -struct opcode_cc_eDeDfD : public opcodeExec { - void* fn; - f32* rs1; - f32* rs2; - f32* rd; - void execute() { - *rd = ((f32(*)(f32, f32))fn)(*rs1, *rs2); - } - - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs2 = (f32*)prms[0].prm->reg_ptr(); - rs1 = (f32*)prms[1].prm->reg_ptr(); - rd = (f32*)prms[2].prm->reg_ptr(); - } + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs2 = (f32*)prms[0].prm->reg_ptr(); + rs1 = (f32*)prms[1].prm->reg_ptr(); + rd = (f32*)prms[2].prm->reg_ptr(); + } + }; }; -struct opcode_cc_eDeDbC : public opcodeExec { - void* fn; - f32* rs1; - f32* rs2; - u32* rd; - void execute() { - *rd = ((u32(*)(f32, f32))fn)(*rs1, *rs2); - } +struct opcode_cc_eDeDbC { + template + struct opex : public opcodeExec { + void* fn; + f32* rs1; + f32* rs2; + u32* rd; + void execute() { + *rd = ((u32(*)(f32, f32))fn)(*rs1, *rs2); + } - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs2 = (f32*)prms[0].prm->reg_ptr(); - rs1 = (f32*)prms[1].prm->reg_ptr(); - rd = (u32*)prms[2].prm->reg_ptr(); - } + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs2 = (f32*)prms[0].prm->reg_ptr(); + rs1 = (f32*)prms[1].prm->reg_ptr(); + rd = (u32*)prms[2].prm->reg_ptr(); + } + }; }; -struct opcode_cc_eDbC : public opcodeExec { - void* fn; - f32* rs1; - u32* rd; - void execute() { - *rd = ((u32(*)(f32))fn)(*rs1); - } +struct opcode_cc_eDbC { + template + struct opex : public opcodeExec { + void* fn; + f32* rs1; + u32* rd; + void execute() { + *rd = ((u32(*)(f32))fn)(*rs1); + } - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs1 = (f32*)prms[0].prm->reg_ptr(); - rd = (u32*)prms[1].prm->reg_ptr(); - } + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs1 = (f32*)prms[0].prm->reg_ptr(); + rd = (u32*)prms[1].prm->reg_ptr(); + } + }; }; -struct opcode_cc_aCfD : public opcodeExec { - void* fn; - u32* rs1; - f32* rd; - void execute() { - *rd = ((f32(*)(u32))fn)(*rs1); - } +struct opcode_cc_aCfD { + template + struct opex : public opcodeExec { + void* fn; + u32* rs1; + f32* rd; + void execute() { + *rd = ((f32(*)(u32))fn)(*rs1); + } - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs1 = (u32*)prms[0].prm->reg_ptr(); - rd = (f32*)prms[1].prm->reg_ptr(); - } + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs1 = (u32*)prms[0].prm->reg_ptr(); + rd = (f32*)prms[1].prm->reg_ptr(); + } + }; }; -struct opcode_cc_eDfD : public opcodeExec { - void* fn; - f32* rs1; - f32* rd; - void execute() { - *rd = ((f32(*)(f32))fn)(*rs1); - } +struct opcode_cc_eDfD { + template + struct opex : public opcodeExec { + void* fn; + f32* rs1; + f32* rd; + void execute() { + *rd = ((f32(*)(f32))fn)(*rs1); + } - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs1 = (f32*)prms[0].prm->reg_ptr(); - rd = (f32*)prms[1].prm->reg_ptr(); - } + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs1 = (f32*)prms[0].prm->reg_ptr(); + rd = (f32*)prms[1].prm->reg_ptr(); + } + }; }; -struct opcode_cc_aCgE : public opcodeExec { - void* fn; - u32* rs1; - f32* rd; - void execute() { - ((void(*)(f32*, u32))fn)(rd, *rs1); - } +struct opcode_cc_aCgE { + template + struct opex : public opcodeExec { + void* fn; + u32* rs1; + f32* rd; + void execute() { + ((void(*)(f32*, u32))fn)(rd, *rs1); + } - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs1 = (u32*)prms[0].prm->reg_ptr(); - rd = (f32*)prms[1].prm->reg_ptr(); - } + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs1 = (u32*)prms[0].prm->reg_ptr(); + rd = (f32*)prms[1].prm->reg_ptr(); + } + }; }; -struct opcode_cc_gJgHgH : public opcodeExec { - void* fn; - f32* rs2; - f32* rs1; - f32* rd; - void execute() { - ((void(*)(f32*, f32*, f32*))fn)(rd, rs1, rs2); - } +struct opcode_cc_gJgHgH { + template + struct opex : public opcodeExec { + void* fn; + f32* rs2; + f32* rs1; + f32* rd; + void execute() { + ((void(*)(f32*, f32*, f32*))fn)(rd, rs1, rs2); + } - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs2 = (f32*)prms[0].prm->reg_ptr(); - rs1 = (f32*)prms[1].prm->reg_ptr(); - rd = (f32*)prms[2].prm->reg_ptr(); - } + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs2 = (f32*)prms[0].prm->reg_ptr(); + rs1 = (f32*)prms[1].prm->reg_ptr(); + rd = (f32*)prms[2].prm->reg_ptr(); + } + }; }; -struct opcode_cc_gHgHfD : public opcodeExec { - void* fn; - f32* rs2; - f32* rs1; - f32* rd; - void execute() { - *rd = ((f32(*)(f32*, f32*))fn)(rs1, rs2); - } +struct opcode_cc_gHgHfD { + template + struct opex : public opcodeExec { + void* fn; + f32* rs2; + f32* rs1; + f32* rd; + void execute() { + *rd = ((f32(*)(f32*, f32*))fn)(rs1, rs2); + } - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs2 = (f32*)prms[0].prm->reg_ptr(); - rs1 = (f32*)prms[1].prm->reg_ptr(); - rd = (f32*)prms[2].prm->reg_ptr(); - } + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs2 = (f32*)prms[0].prm->reg_ptr(); + rs1 = (f32*)prms[1].prm->reg_ptr(); + rd = (f32*)prms[2].prm->reg_ptr(); + } + }; }; struct opcode_ifb_pc : public opcodeExec { @@ -589,14 +635,41 @@ fnrv fnnCtor<0>(int cycles) { return rvb; } -template + +#define XREP_1(x, phrase) &createType +#define XREP_2(x, phrase) XREP_1(x, phrase), XREP_1(x+1, phrase) +#define XREP_4(x, phrase) XREP_2(x, phrase), XREP_2(x+2, phrase) +#define XREP_8(x, phrase) XREP_4(x, phrase), XREP_4(x+4, phrase) +#define XREP_16(x, phrase) XREP_8(x, phrase), XREP_8(x+8, phrase) +#define XREP_32(x, phrase) XREP_16(x, phrase), XREP_16(x+16, phrase) +#define XREP_64(x, phrase) XREP_32(x, phrase), XREP_32(x+32, phrase) + +template opcodeExec* createType(const CC_pars_t& prms, void* fun) { - auto rv = new CTR(); + auto rv = new CTR::opex(); rv->setup(prms, fun); return rv; } + +map funs; +int funs_id_count; + +template +opcodeExec* createType(const CC_pars_t& prms, void* fun) { + + if (!funs.count(fun)) { + funs[fun] = funs_id_count++; + } + + static opcodeExec* (*ctors[])(const CC_pars_t& prms, void* fun) = { XREP_64(0, phrase) }; + + int id = funs[fun]; + + return ctors[id](prms, fun); +} + map< string, opcodeExec*(*)(const CC_pars_t& prms, void* fun)> unmap = { { "aBaCbC", &createType }, { "aCaCbC", &createType }, From 39e369411c808f81e6ba174ff010c9a5b89b4868 Mon Sep 17 00:00:00 2001 From: Stefanos Kornilios Mitsis Poiitidis Date: Thu, 23 Jul 2015 09:00:18 +0200 Subject: [PATCH 05/18] shil/canonical: Fix decls, comment --- core/hw/sh4/dyna/shil_canonical.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/hw/sh4/dyna/shil_canonical.h b/core/hw/sh4/dyna/shil_canonical.h index 1ab8cf304..5a7bbce79 100644 --- a/core/hw/sh4/dyna/shil_canonical.h +++ b/core/hw/sh4/dyna/shil_canonical.h @@ -61,7 +61,7 @@ extern "C" f32 fipr_asm(float* fn, float* fm); #define shil_opc(name) struct shil_opcl_##name { #define shil_opc_end() }; - #define shil_canonical(rv,name,args,code) static rv cimpl_##name args; + #define shil_canonical(rv,name,args,code) static rv name args; #define shil_compile(code) static void compile(shil_opcode* op); #elif SHIL_MODE==3 //generate struct list ... @@ -93,7 +93,7 @@ extern "C" f32 fipr_asm(float* fn, float* fm); -#if SHIL_MODE==1 +#if SHIL_MODE==1 || SHIL_MODE==2 //only in structs we use the code :) #include #include "types.h" @@ -981,7 +981,7 @@ shil_opc_end() -//shop_ftrv +//shop_frswap shil_opc(frswap) shil_canonical ( From 7303a046be98614e66135e5cbd30ebcf4881a732 Mon Sep 17 00:00:00 2001 From: Stefanos Kornilios Mitsis Poiitidis Date: Thu, 23 Jul 2015 09:01:54 +0200 Subject: [PATCH 06/18] rec-cpp: Direct calls to shil_opcl_* handlers for many opcodes --- core/rec-cpp/rec_cpp.cpp | 221 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 220 insertions(+), 1 deletion(-) diff --git a/core/rec-cpp/rec_cpp.cpp b/core/rec-cpp/rec_cpp.cpp index 9c765ff5b..a8c23e07e 100644 --- a/core/rec-cpp/rec_cpp.cpp +++ b/core/rec-cpp/rec_cpp.cpp @@ -16,6 +16,8 @@ #include "profiler/profiler.h" #include "oslib/oslib.h" +#define SHIL_MODE 2 +#include "hw/sh4/dyna/shil_canonical.h" struct DynaRBI : RuntimeBlockInfo { @@ -118,6 +120,13 @@ struct opcode_cc_aBaCbC { verify(prms.size() == 3); } }; + + template + struct opex2 : public opex<64> { + void execute() { + *rd = ((u32(*)(u32, u32))&T::f1)(*rs1, rs2); + } + }; }; struct opcode_cc_aCaCbC { @@ -139,6 +148,13 @@ struct opcode_cc_aCaCbC { verify(prms.size() == 3); } }; + + template + struct opex2 : public opex<64> { + void execute() { + *rd = ((u32(*)(u32, u32))&T::f1)(*rs1, *rs2); + } + }; }; struct opcode_cc_aCbC { @@ -158,6 +174,14 @@ struct opcode_cc_aCbC { verify(prms.size() == 2); } }; + + template + struct opex2 : public opex<64> { + void execute() { + *rd = ((u32(*)(u32))&T::f1)(*rs1); + } + }; + }; struct opcode_cc_aC { @@ -198,6 +222,13 @@ struct opcode_cc_aCaCaCbC { verify(prms.size() == 4); } }; + + template + struct opex2 : public opex<64> { + void execute() { + *rd = ((u32(*)(u32, u32, u32))&T::f1)(*rs1, *rs2, *rs3); + } + }; }; struct opcode_cc_aCaCaCcCdC { @@ -229,6 +260,16 @@ struct opcode_cc_aCaCaCcCdC { verify(prms.size() == 5); } }; + template + struct opex2 : public opex<64> { + void execute() { + auto rv = ((u64(*)(u32, u32, u32))&T::f1)(*rs1, *rs2, *rs3); + + *rd = (u32)rv; + *rd2 = rv >> 32; + } + }; + }; struct opcode_cc_aCaCcCdC { @@ -255,6 +296,15 @@ struct opcode_cc_aCaCcCdC { verify(prms.size() == 4); } }; + + template + struct opex2 : public opex<64> { + void execute() { + auto rv = ((u64(*)(u32, u32))T::f1)(*rs1, *rs2); + *rd = (u32)rv; + *rd2 = rv >> 32; + } + }; }; struct opcode_cc_eDeDeDfD { @@ -277,6 +327,14 @@ struct opcode_cc_eDeDeDfD { rd = (f32*)prms[3].prm->reg_ptr(); } }; + + template + struct opex2 : public opex<64> { + void execute() { + *rd = ((f32(*)(f32, f32, f32))&T::f1)(*rs1, *rs2, *rs3); + } + }; + }; struct opcode_cc_eDeDfD { @@ -297,6 +355,13 @@ struct opcode_cc_eDeDfD { rd = (f32*)prms[2].prm->reg_ptr(); } }; + + template + struct opex2 : public opex<64> { + void execute() { + *rd = ((f32(*)(f32, f32))&T::f1)(*rs1, *rs2); + } + }; }; struct opcode_cc_eDeDbC { @@ -317,6 +382,13 @@ struct opcode_cc_eDeDbC { rd = (u32*)prms[2].prm->reg_ptr(); } }; + + template + struct opex2 : public opex<64> { + void execute() { + *rd = ((u32(*)(f32, f32))&T::f1)(*rs1, *rs2); + } + }; }; struct opcode_cc_eDbC { @@ -371,6 +443,13 @@ struct opcode_cc_eDfD { rd = (f32*)prms[1].prm->reg_ptr(); } }; + + template + struct opex2 : public opex<64> { + void execute() { + *rd = ((f32(*)(f32))&T::f1)(*rs1); + } + }; }; struct opcode_cc_aCgE { @@ -429,6 +508,13 @@ struct opcode_cc_gHgHfD { rd = (f32*)prms[2].prm->reg_ptr(); } }; + + template + struct opex2 : public opex<64> { + void execute() { + *rd = ((f32(*)(f32*, f32*))&T::f1)(rs1, rs2); + } + }; }; struct opcode_ifb_pc : public opcodeExec { @@ -652,18 +738,151 @@ opcodeExec* createType(const CC_pars_t& prms, void* fun) { return rv; } +template +opcodeExec* createType2(const CC_pars_t& prms, void* fun) { + auto rv = new CTR::opex2(); + + rv->setup(prms, fun); + return rv; +} + map funs; + + int funs_id_count; +template +opcodeExec* createType_fast(const CC_pars_t& prms, void* fun) { + return 0; +} + +#define FAST_sig(sig, ...) \ +template <> \ +opcodeExec* createType_fast(const CC_pars_t& prms, void* fun) { \ + using CTR = opcode_cc_##sig; \ + \ + static map funsf = {\ + +#define FAST_gis \ +};\ + \ + if (funsf.count(fun)) { \ + return funsf[fun](prms, fun); \ + } \ + else { \ + return 0; \ + } \ +} + +#define FAST_po(n) { &shil_opcl_##n::f1, &createType2 < shil_opcl_##n, CTR > }, + +FAST_sig(aCaCbC) +FAST_po(and) +FAST_po(or) +FAST_po(xor) +FAST_po(add) +FAST_po(sub) +FAST_po(ror) +FAST_po(shl) +FAST_po(shr) +FAST_po(sar) +FAST_po(shad) +FAST_po(shld) +FAST_po(test) +FAST_po(seteq) +FAST_po(setge) +FAST_po(setgt) +FAST_po(setae) +FAST_po(setab) +FAST_po(setpeq) +FAST_po(mul_u16) +FAST_po(mul_s16) +FAST_po(mul_i32) +FAST_gis + +FAST_sig(aBaCbC) +FAST_po(and) +FAST_po(or) +FAST_po(xor) +FAST_po(add) +FAST_po(sub) +FAST_po(ror) +FAST_po(shl) +FAST_po(shr) +FAST_po(sar) +FAST_po(shad) +FAST_po(shld) +FAST_po(test) +FAST_po(seteq) +FAST_po(setge) +FAST_po(setgt) +FAST_po(setae) +FAST_po(setab) +FAST_po(setpeq) +FAST_po(mul_u16) +FAST_po(mul_s16) +FAST_po(mul_i32) +FAST_gis + +FAST_sig(eDeDfD) +FAST_po(fadd) +FAST_po(fsub) +FAST_po(fmul) +FAST_po(fdiv) +FAST_gis + +FAST_sig(eDfD) +FAST_po(fneg) +FAST_po(fabs) +FAST_po(fsrra) +FAST_gis + + +FAST_sig(eDeDbC) +FAST_po(fseteq) +FAST_po(fsetgt) +FAST_gis + +FAST_sig(eDeDeDfD) +FAST_po(fmac) +FAST_gis + +FAST_sig(gHgHfD) +FAST_po(fipr) +FAST_gis + +FAST_sig(aCaCcCdC) +FAST_po(div32u) +FAST_gis + +FAST_sig(aCaCaCcCdC) +FAST_po(adc) +FAST_po(sbc) +FAST_gis + +FAST_sig(aCaCaCbC) +FAST_po(div32p2) +FAST_gis + +FAST_sig(aCbC) +FAST_po(neg) +FAST_po(not) +FAST_po(ext_s16) +FAST_gis + template opcodeExec* createType(const CC_pars_t& prms, void* fun) { + auto frv = createType_fast(prms, fun); + if (frv) + return frv; + if (!funs.count(fun)) { funs[fun] = funs_id_count++; } - static opcodeExec* (*ctors[])(const CC_pars_t& prms, void* fun) = { XREP_64(0, phrase) }; + static opcodeExec* (*ctors[])(const CC_pars_t& prms, void* fun) = { XREP_64(0, __noop) }; int id = funs[fun]; From 1710f76ab1752e491a22d7827925baf630eb6bee Mon Sep 17 00:00:00 2001 From: Stefanos Kornilios Mitsis Poiitidis Date: Thu, 23 Jul 2015 14:42:07 +0200 Subject: [PATCH 07/18] unroll the execution loop --- core/rec-cpp/rec_cpp.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/core/rec-cpp/rec_cpp.cpp b/core/rec-cpp/rec_cpp.cpp index a8c23e07e..c54284d0e 100644 --- a/core/rec-cpp/rec_cpp.cpp +++ b/core/rec-cpp/rec_cpp.cpp @@ -677,6 +677,17 @@ struct opcode_writem_offs_imm : public opcodeExec { } }; +#define DREP_1(x, phrase) if (x < cnt) ops[x]->execute(); +#define DREP_2(x, phrase) DREP_1(x, phrase) DREP_1(x+1, phrase) +#define DREP_4(x, phrase) DREP_2(x, phrase) DREP_2(x+2, phrase) +#define DREP_8(x, phrase) DREP_4(x, phrase) DREP_4(x+4, phrase) +#define DREP_16(x, phrase) DREP_8(x, phrase) DREP_8(x+8, phrase) +#define DREP_32(x, phrase) DREP_16(x, phrase) DREP_16(x+16, phrase) +#define DREP_64(x, phrase) DREP_32(x, phrase) DREP_32(x+32, phrase) +#define DREP_128(x, phrase) DREP_64(x, phrase) DREP_64(x+64, phrase) +#define DREP_256(x, phrase) DREP_128(x, phrase) DREP_128(x+128, phrase) +#define DREP_512(x, phrase) DREP_256(x, phrase) DREP_256(x+256, phrase) + template class fnblock { public: @@ -684,9 +695,8 @@ public: int cc; void execute() { cycle_counter -= cc; - for (int i = 0; i < cnt; i++) { - ops[i]->execute(); - } + + DREP_512(0, phrase); } static void runner(void* fnb) { From 00dbd6222f801e4d38fd64067e037675e19e22ff Mon Sep 17 00:00:00 2001 From: Stefanos Kornilios Mitsis Poiitidis Date: Fri, 24 Jul 2015 02:34:50 +0200 Subject: [PATCH 08/18] rec-cpp: Massage for gcc compatibility --- core/core.mk | 4 ++ core/rec-cpp/rec_cpp.cpp | 14 +++-- core/rend/gles/gles.cpp | 2 +- shell/linCPP/Makefile | 122 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 136 insertions(+), 6 deletions(-) create mode 100644 shell/linCPP/Makefile diff --git a/core/core.mk b/core/core.mk index 85c609d6b..7f7b391ad 100755 --- a/core/core.mk +++ b/core/core.mk @@ -37,6 +37,10 @@ ifdef X64_REC RZDCY_MODULES += rec-x64/ endif +ifdef CPP_REC + RZDCY_MODULES += rec-cpp/ +endif + ifndef NO_REND RZDCY_MODULES += rend/gles/ else diff --git a/core/rec-cpp/rec_cpp.cpp b/core/rec-cpp/rec_cpp.cpp index c54284d0e..90b3f57ff 100644 --- a/core/rec-cpp/rec_cpp.cpp +++ b/core/rec-cpp/rec_cpp.cpp @@ -742,7 +742,8 @@ fnrv fnnCtor<0>(int cycles) { template opcodeExec* createType(const CC_pars_t& prms, void* fun) { - auto rv = new CTR::opex(); + typedef typename CTR::template opex thetype; + auto rv = new thetype(); rv->setup(prms, fun); return rv; @@ -750,7 +751,8 @@ opcodeExec* createType(const CC_pars_t& prms, void* fun) { template opcodeExec* createType2(const CC_pars_t& prms, void* fun) { - auto rv = new CTR::opex2(); + typedef typename CTR::template opex2 thetype; + auto rv = new thetype(); rv->setup(prms, fun); return rv; @@ -767,10 +769,12 @@ opcodeExec* createType_fast(const CC_pars_t& prms, void* fun) { return 0; } +#define OPCODE_CC(sig) opcode_cc_##sig + #define FAST_sig(sig, ...) \ template <> \ -opcodeExec* createType_fast(const CC_pars_t& prms, void* fun) { \ - using CTR = opcode_cc_##sig; \ +opcodeExec* createType_fast(const CC_pars_t& prms, void* fun) { \ + typedef OPCODE_CC(sig) CTR; \ \ static map funsf = {\ @@ -785,7 +789,7 @@ opcodeExec* createType_fast(const CC_pars_t& prms, void* fun) { } \ } -#define FAST_po(n) { &shil_opcl_##n::f1, &createType2 < shil_opcl_##n, CTR > }, +#define FAST_po(n) { (void*)&shil_opcl_##n::f1, &createType2 < shil_opcl_##n, CTR > }, FAST_sig(aCaCbC) FAST_po(and) diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 5c00100ca..fc589c45a 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -1034,7 +1034,7 @@ void tryfit(float* x,float* y) for (int i=0;i<128;i++) { float diff=min(max(b*logf(x[i])/logf(2.0)+a,(double)0),(double)1)-y[i]; - maxdev=max((float)abs(diff),(float)maxdev); + maxdev=max((float)fabs((float)diff),(float)maxdev); } printf("FOG TABLE Curve match: maxdev: %.02f cents\n",maxdev*100); fog_coefs[0]=a; diff --git a/shell/linCPP/Makefile b/shell/linCPP/Makefile new file mode 100644 index 000000000..9489d42b8 --- /dev/null +++ b/shell/linCPP/Makefile @@ -0,0 +1,122 @@ + +LOCAL_PATH := $(call my-dir) +FOR_LINUX :=1 +NOT_ARM := 1 +CPP_REC := 1 +#NO_REC := 1 +#NO_REND := 1 +WEBUI :=1 +USE_ALSA := 1 +USE_OSS := 1 +#USE_PULSEAUDIO := 1 + +RZDCY_SRC_DIR = ../../core + +include $(RZDCY_SRC_DIR)/core.mk + + +CXX=${CC_PREFIX}g++ +CC=${CC_PREFIX}gcc +AS=${CC_PREFIX}as +STRIP=${CC_PREFIX}strip + +LD=${CC} + +MFLAGS := #-m32 +#-marm -march=armv7-a -mtune=cortex-a9 -mfpu=neon -mfloat-abi=softfp -funroll-loops +ASFLAGS := +#-march=armv7-a -mfpu=neon -mfloat-abi=softfp + +LDFLAGS := -g -Wl,-Map,$(notdir $@).map,--gc-sections -Wl,-O3 -Wl,--sort-common + +CFLAGS := -g -O3 -D RELEASE -c -D TARGET_LINUX_x64 -D USES_HOMEDIR -D TARGET_NO_JIT +CFLAGS += -D SUPPORT_X11 +CFLAGS += -frename-registers -fno-strict-aliasing #-fsingle-precision-constant +CFLAGS += -ffast-math -ftree-vectorize + + +#-fprefetch-loop-arrays +#-std=c++0x +CXXFLAGS += $(CFLAGS) $(MFLAGS) -fexceptions -fno-rtti -fpermissive -std=gnu++11 +CXXFLAGS += -D SUPPORT_X11 +CXXFLAGS += -fno-operator-names + +ifdef PGO_MAKE + CFLAGS += -fprofile-generate -pg + LDFLAGS += -fprofile-generate +else + CFLAGS += -fomit-frame-pointer +endif + +ifdef PGO_USE + CFLAGS += -fprofile-use +endif + + +ifdef LTO_TEST + CFLAGS += -flto -fwhole-program + LDFLAGS +=-flto -fwhole-program +endif + +INCS := -I$(RZDCY_SRC_DIR) -I$(RZDCY_SRC_DIR)/deps -I$(RZDCY_SRC_DIR)/khronos + +LIBS := # use system libs +LIBS += -lm -lrt -ldl +LIBS += -lpthread -lX11 + +ifdef USE_ALSA + CXXFLAGS += -D USE_ALSA + LIBS += -lasound +endif + +ifdef USE_OSS + CXXFLAGS += -D USE_OSS +endif + +ifdef USE_PULSEAUDIO + CXXFLAGS += -D USE_PULSEAUDIO + LIBS += -lpulse-simple +endif + +ifdef USE_GLES + CXXFLAGS += -DGLES + LIBS += -lEGL -lGLESv2 +else + LIBS += -ldl -lGL #for desktop gl +endif + + +OBJECTS=$(RZDCY_FILES:.cpp=.build_obj) +OBJECTS:=$(OBJECTS:.c=.build_obj) +OBJECTS:=$(OBJECTS:.S=.build_obj) +OBJECTS:=$(patsubst $(RZDCY_SRC_DIR)/%,obj/%,$(OBJECTS)) + + +EXECUTABLE_STRIPPED=nosym-reicast.elf +EXECUTABLE=reicast.elf + +PACKAGE_FILES=$(EXECUTABLE_STRIPPED) default.gcw0.desktop icon-32.png + +all: $(CPPFILES) $(EXECUTABLE) $(EXECUTABLE_STRIPPED) + +$(EXECUTABLE): $(OBJECTS) + $(CXX) $(MFLAGS) $(EXTRAFLAGS) $(LDFLAGS) $(OBJECTS) $(LIBS) -o $@ + +$(EXECUTABLE_STRIPPED): $(EXECUTABLE) + cp $< $@ && $(STRIP) $@ + +obj/%.build_obj : $(RZDCY_SRC_DIR)/%.cpp + mkdir -p $(dir $@) + $(CXX) $(EXTRAFLAGS) $(INCS) $(CFLAGS) $(CXXFLAGS) $< -o $@ + +obj/%.build_obj : $(RZDCY_SRC_DIR)/%.c + mkdir -p $(dir $@) + $(CC) $(EXTRAFLAGS) $(INCS) $(CFLAGS) $< -o $@ + +obj/%.build_obj : $(RZDCY_SRC_DIR)/%.S + mkdir -p $(dir $@) + $(AS) $(ASFLAGS) $(INCS) $< -o $@ + + +clean: + rm $(OBJECTS) $(EXECUTABLE) -f From 51a6ed214626bd8636d08a70ba7231c1aebcdbd4 Mon Sep 17 00:00:00 2001 From: Stefanos Kornilios Mitsis Poiitidis Date: Sat, 25 Jul 2015 08:16:53 +0200 Subject: [PATCH 09/18] rec/all: optional mips counter on recs Also de-unroll rec-cpp for debug builds --- core/hw/pvr/spg.cpp | 9 ++++++--- core/rec-cpp/rec_cpp.cpp | 32 ++++++++++++++++++++++---------- core/rec-x86/rec_x86_driver.cpp | 9 ++++++++- 3 files changed, 36 insertions(+), 14 deletions(-) diff --git a/core/hw/pvr/spg.cpp b/core/hw/pvr/spg.cpp index 50a9147de..13aaa3c85 100755 --- a/core/hw/pvr/spg.cpp +++ b/core/hw/pvr/spg.cpp @@ -86,6 +86,8 @@ extern double speed_load_mspdf; double speed_load_mspdf; #endif +int mips_counter; + double full_rps; u32 fskip=0; @@ -184,12 +186,13 @@ int spg_line_sched(int tag, int cycl, int jit) spd_cpu*100/200,spd_vbs, mode,res,fullvbs, spd_fps,fskip/ts); - #else - sprintf(fpsStr,"%s/%c - %4.2f (%4.2f) - %4.2f - V: %4.2f (%.2f, %s%s%4.2f) R: %4.2f+%4.2f VTX: %4.2f%c", + #else + sprintf(fpsStr,"%s/%c - %4.2f (%4.2f) - %4.2f - V: %4.2f (%.2f, %s%s%4.2f) R: %4.2f+%4.2f VTX: %4.2f%c, MIPS: %.2f", VER_SHORTNAME,'n',mspdf,speed_load_mspdf,spd_cpu*100/200,spd_vbs, spd_vbs/full_rps,mode,res,fullvbs, spd_fps,fskip/ts - ,mv,mv_c); + , mv, mv_c, mips_counter/ 1024.0 / 1024.0); + mips_counter = 0; #endif fskip=0; diff --git a/core/rec-cpp/rec_cpp.cpp b/core/rec-cpp/rec_cpp.cpp index 90b3f57ff..20d02c46b 100644 --- a/core/rec-cpp/rec_cpp.cpp +++ b/core/rec-cpp/rec_cpp.cpp @@ -19,6 +19,9 @@ #define SHIL_MODE 2 #include "hw/sh4/dyna/shil_canonical.h" + +#define MIPS_COUNTER 0 + struct DynaRBI : RuntimeBlockInfo { virtual u32 Relink() { @@ -34,6 +37,7 @@ struct DynaRBI : RuntimeBlockInfo int cycle_counter; +extern int mips_counter; void ngen_FailedToFindBlock_internal() { rdv_FailedToFindBlock(Sh4cntx.pc); @@ -677,16 +681,20 @@ struct opcode_writem_offs_imm : public opcodeExec { } }; -#define DREP_1(x, phrase) if (x < cnt) ops[x]->execute(); -#define DREP_2(x, phrase) DREP_1(x, phrase) DREP_1(x+1, phrase) -#define DREP_4(x, phrase) DREP_2(x, phrase) DREP_2(x+2, phrase) -#define DREP_8(x, phrase) DREP_4(x, phrase) DREP_4(x+4, phrase) -#define DREP_16(x, phrase) DREP_8(x, phrase) DREP_8(x+8, phrase) -#define DREP_32(x, phrase) DREP_16(x, phrase) DREP_16(x+16, phrase) -#define DREP_64(x, phrase) DREP_32(x, phrase) DREP_32(x+32, phrase) -#define DREP_128(x, phrase) DREP_64(x, phrase) DREP_64(x+64, phrase) -#define DREP_256(x, phrase) DREP_128(x, phrase) DREP_128(x+128, phrase) -#define DREP_512(x, phrase) DREP_256(x, phrase) DREP_256(x+256, phrase) +#if !defined(_DEBUG) + #define DREP_1(x, phrase) if (x < cnt) ops[x]->execute(); + #define DREP_2(x, phrase) DREP_1(x, phrase) DREP_1(x+1, phrase) + #define DREP_4(x, phrase) DREP_2(x, phrase) DREP_2(x+2, phrase) + #define DREP_8(x, phrase) DREP_4(x, phrase) DREP_4(x+4, phrase) + #define DREP_16(x, phrase) DREP_8(x, phrase) DREP_8(x+8, phrase) + #define DREP_32(x, phrase) DREP_16(x, phrase) DREP_16(x+16, phrase) + #define DREP_64(x, phrase) DREP_32(x, phrase) DREP_32(x+32, phrase) + #define DREP_128(x, phrase) DREP_64(x, phrase) DREP_64(x+64, phrase) + #define DREP_256(x, phrase) DREP_128(x, phrase) DREP_128(x+128, phrase) + #define DREP_512(x, phrase) DREP_256(x, phrase) DREP_256(x+256, phrase) +#else + #define DREP_512(x, phrase) for (int i=0; iexecute(); +#endif template class fnblock { @@ -696,6 +704,10 @@ public: void execute() { cycle_counter -= cc; +#if MIPS_COUNTER + mips_counter += cnt; +#endif + DREP_512(0, phrase); } diff --git a/core/rec-x86/rec_x86_driver.cpp b/core/rec-x86/rec_x86_driver.cpp index 027be23af..b11da90ea 100644 --- a/core/rec-x86/rec_x86_driver.cpp +++ b/core/rec-x86/rec_x86_driver.cpp @@ -265,6 +265,7 @@ u32* GetRegPtr(u32 reg) u32 cvld; u32 rdmt[6]; extern u32 memops_t,memops_l; +extern int mips_counter; void CheckBlock(RuntimeBlockInfo* block,x86_ptr_imm place) { @@ -286,6 +287,8 @@ void CheckBlock(RuntimeBlockInfo* block,x86_ptr_imm place) } } + + void ngen_Compile(RuntimeBlockInfo* block,bool force_checks, bool reset, bool staging,bool optimise) { //initialise stuff @@ -305,7 +308,11 @@ void ngen_Compile(RuntimeBlockInfo* block,bool force_checks, bool reset, bool st x86e->Emit(op_add32,&memops_t,block->memops); x86e->Emit(op_add32,&memops_l,block->linkedmemops); - + +#ifdef MIPS_COUNTER + x86e->Emit(op_add32, &mips_counter, block->oplist.size()); +#endif + //run register allocator reg.DoAlloc(block,alloc_regs,xmm_alloc_regs); From b1f7015a98f5d3299c9a6095e614d0d5c84bc239 Mon Sep 17 00:00:00 2001 From: Stefanos Kornilios Mitsis Poiitidis Date: Sat, 25 Jul 2015 10:44:15 +0200 Subject: [PATCH 10/18] rec/cpp: Mostly fully direct specialized dispatchers - Nest shil implementations to shilop_##name::form::impl for easier template matching - Add more direct handlers - Add prints if a direct handler isn't found - Remove multiple indirect handlers, leave them for fallback --- core/hw/sh4/dyna/shil_canonical.h | 11 +- core/rec-cpp/rec_cpp.cpp | 195 ++++++++++++++++++------------ 2 files changed, 121 insertions(+), 85 deletions(-) diff --git a/core/hw/sh4/dyna/shil_canonical.h b/core/hw/sh4/dyna/shil_canonical.h index 5a7bbce79..061e42a55 100644 --- a/core/hw/sh4/dyna/shil_canonical.h +++ b/core/hw/sh4/dyna/shil_canonical.h @@ -42,7 +42,7 @@ extern "C" f32 fipr_asm(float* fn, float* fm); #define shil_opc(name) struct shil_opcl_##name { #define shil_opc_end() }; - #define shil_canonical(rv,name,args,code) static rv name args { code } + #define shil_canonical(rv,name,args,code) struct name { static rv impl args { code } }; #define shil_cf_arg_u32(x) ngen_CC_Param(op,&op->x,CPT_u32); #define shil_cf_arg_f32(x) ngen_CC_Param(op,&op->x,CPT_f32); @@ -50,7 +50,8 @@ extern "C" f32 fipr_asm(float* fn, float* fm); #define shil_cf_rv_u32(x) ngen_CC_Param(op,&op->x,CPT_u32rv); #define shil_cf_rv_f32(x) ngen_CC_Param(op,&op->x,CPT_f32rv); #define shil_cf_rv_u64(x) ngen_CC_Param(op,&op->rd,CPT_u64rvL); ngen_CC_Param(op,&op->rd2,CPT_u64rvH); - #define shil_cf(x) ngen_CC_Call(op,(void*)x); + #define shil_cf_ext(x) ngen_CC_Call(op,(void*)&x); + #define shil_cf(x) shil_cf_ext(x::impl) #define shil_compile(code) static void compile(shil_opcode* op) { ngen_CC_Start(op); code ngen_CC_Finish(op); } #elif SHIL_MODE==2 @@ -61,7 +62,7 @@ extern "C" f32 fipr_asm(float* fn, float* fm); #define shil_opc(name) struct shil_opcl_##name { #define shil_opc_end() }; - #define shil_canonical(rv,name,args,code) static rv name args; + #define shil_canonical(rv,name,args,code) struct name { static rv impl args; }; #define shil_compile(code) static void compile(shil_opcode* op); #elif SHIL_MODE==3 //generate struct list ... @@ -208,7 +209,7 @@ shil_opc_end() shil_opc(sync_sr) shil_compile ( - shil_cf(UpdateSR); + shil_cf_ext(UpdateSR); //die(); ) shil_opc_end() @@ -216,7 +217,7 @@ shil_opc_end() shil_opc(sync_fpscr) shil_compile ( - shil_cf(UpdateFPSCR); + shil_cf_ext(UpdateFPSCR); //die(); ) shil_opc_end() diff --git a/core/rec-cpp/rec_cpp.cpp b/core/rec-cpp/rec_cpp.cpp index 20d02c46b..d0009a75a 100644 --- a/core/rec-cpp/rec_cpp.cpp +++ b/core/rec-cpp/rec_cpp.cpp @@ -2,6 +2,7 @@ #include "types.h" #include +#include #if FEAT_SHREC == DYNAREC_CPP #include "hw/sh4/sh4_opcode_list.h" @@ -106,35 +107,27 @@ typedef vector CC_pars_t; struct opcode_cc_aBaCbC { - template - struct opex : public opcodeExec { - void* fn; - u32* rs1; + template + struct opex2 : public opcodeExec { + u32 rs2; + u32* rs1; u32* rd; - void execute() { - *rd = ((u32(*)(u32, u32))fn)(*rs1, rs2); - } void setup(const CC_pars_t& prms, void* fun) { - fn = fun; rs2 = prms[0].prm->imm_value(); rs1 = prms[1].prm->reg_ptr(); rd = prms[2].prm->reg_ptr(); verify(prms.size() == 3); } - }; - template - struct opex2 : public opex<64> { void execute() { - *rd = ((u32(*)(u32, u32))&T::f1)(*rs1, rs2); + *rd = ((u32(*)(u32, u32))&T::impl)(*rs1, rs2); } }; }; struct opcode_cc_aCaCbC { - template struct opex : public opcodeExec { void* fn; u32* rs1; @@ -154,15 +147,14 @@ struct opcode_cc_aCaCbC { }; template - struct opex2 : public opex<64> { + struct opex2 : public opex { void execute() { - *rd = ((u32(*)(u32, u32))&T::f1)(*rs1, *rs2); + *rd = ((u32(*)(u32, u32))&T::impl)(*rs1, *rs2); } }; }; struct opcode_cc_aCbC { - template struct opex : public opcodeExec { void* fn; u32* rs1; @@ -180,16 +172,14 @@ struct opcode_cc_aCbC { }; template - struct opex2 : public opex<64> { + struct opex2 : public opex { void execute() { - *rd = ((u32(*)(u32))&T::f1)(*rs1); + *rd = ((u32(*)(u32))&T::impl)(*rs1); } }; - }; struct opcode_cc_aC { - template struct opex : public opcodeExec { void* fn; u32* rs1; @@ -203,10 +193,16 @@ struct opcode_cc_aC { verify(prms.size() == 1); } }; + + template + struct opex2 : public opex { + void execute() { + ((void(*)(u32))&T::impl)(*rs1); + } + }; }; struct opcode_cc_aCaCaCbC { - template struct opex : public opcodeExec { void* fn; u32* rs1; @@ -228,16 +224,15 @@ struct opcode_cc_aCaCaCbC { }; template - struct opex2 : public opex<64> { + struct opex2 : public opex { void execute() { - *rd = ((u32(*)(u32, u32, u32))&T::f1)(*rs1, *rs2, *rs3); + *rd = ((u32(*)(u32, u32, u32))&T::impl)(*rs1, *rs2, *rs3); } }; }; struct opcode_cc_aCaCaCcCdC { //split this to two cases, u64 and u64L/u32H - template struct opex : public opcodeExec { void* fn; u32* rs1; @@ -265,9 +260,9 @@ struct opcode_cc_aCaCaCcCdC { } }; template - struct opex2 : public opex<64> { + struct opex2 : public opex { void execute() { - auto rv = ((u64(*)(u32, u32, u32))&T::f1)(*rs1, *rs2, *rs3); + auto rv = ((u64(*)(u32, u32, u32))&T::impl)(*rs1, *rs2, *rs3); *rd = (u32)rv; *rd2 = rv >> 32; @@ -277,7 +272,6 @@ struct opcode_cc_aCaCaCcCdC { }; struct opcode_cc_aCaCcCdC { - template struct opex : public opcodeExec { void* fn; u32* rs1; @@ -302,9 +296,9 @@ struct opcode_cc_aCaCcCdC { }; template - struct opex2 : public opex<64> { + struct opex2 : public opex { void execute() { - auto rv = ((u64(*)(u32, u32))T::f1)(*rs1, *rs2); + auto rv = ((u64(*)(u32, u32))&T::impl)(*rs1, *rs2); *rd = (u32)rv; *rd2 = rv >> 32; } @@ -312,7 +306,6 @@ struct opcode_cc_aCaCcCdC { }; struct opcode_cc_eDeDeDfD { - template struct opex : public opcodeExec { void* fn; f32* rs1; @@ -333,16 +326,15 @@ struct opcode_cc_eDeDeDfD { }; template - struct opex2 : public opex<64> { + struct opex2 : public opex { void execute() { - *rd = ((f32(*)(f32, f32, f32))&T::f1)(*rs1, *rs2, *rs3); + *rd = ((f32(*)(f32, f32, f32))&T::impl)(*rs1, *rs2, *rs3); } }; }; struct opcode_cc_eDeDfD { - template struct opex : public opcodeExec { void* fn; f32* rs1; @@ -361,15 +353,14 @@ struct opcode_cc_eDeDfD { }; template - struct opex2 : public opex<64> { + struct opex2 : public opex { void execute() { - *rd = ((f32(*)(f32, f32))&T::f1)(*rs1, *rs2); + *rd = ((f32(*)(f32, f32))&T::impl)(*rs1, *rs2); } }; }; struct opcode_cc_eDeDbC { - template struct opex : public opcodeExec { void* fn; f32* rs1; @@ -388,15 +379,14 @@ struct opcode_cc_eDeDbC { }; template - struct opex2 : public opex<64> { + struct opex2 : public opex { void execute() { - *rd = ((u32(*)(f32, f32))&T::f1)(*rs1, *rs2); + *rd = ((u32(*)(f32, f32))&T::impl)(*rs1, *rs2); } }; }; struct opcode_cc_eDbC { - template struct opex : public opcodeExec { void* fn; f32* rs1; @@ -411,10 +401,16 @@ struct opcode_cc_eDbC { rd = (u32*)prms[1].prm->reg_ptr(); } }; + + template + struct opex2 : public opex { + void execute() { + *rd = ((u32(*)(f32))&T::impl)(*rs1); + } + }; }; struct opcode_cc_aCfD { - template struct opex : public opcodeExec { void* fn; u32* rs1; @@ -429,10 +425,16 @@ struct opcode_cc_aCfD { rd = (f32*)prms[1].prm->reg_ptr(); } }; + + template + struct opex2 : public opex { + void execute() { + *rd = ((f32(*)(u32))&T::impl)(*rs1); + } + }; }; struct opcode_cc_eDfD { - template struct opex : public opcodeExec { void* fn; f32* rs1; @@ -449,15 +451,14 @@ struct opcode_cc_eDfD { }; template - struct opex2 : public opex<64> { + struct opex2 : public opex { void execute() { - *rd = ((f32(*)(f32))&T::f1)(*rs1); + *rd = ((f32(*)(f32))&T::impl)(*rs1); } }; }; struct opcode_cc_aCgE { - template struct opex : public opcodeExec { void* fn; u32* rs1; @@ -472,10 +473,16 @@ struct opcode_cc_aCgE { rd = (f32*)prms[1].prm->reg_ptr(); } }; + + template + struct opex2 : public opex { + void execute() { + ((void(*)(f32*, u32))&T::impl)(rd, *rs1); + } + }; }; struct opcode_cc_gJgHgH { - template struct opex : public opcodeExec { void* fn; f32* rs2; @@ -492,10 +499,16 @@ struct opcode_cc_gJgHgH { rd = (f32*)prms[2].prm->reg_ptr(); } }; + + template + struct opex2 : public opex { + void execute() { + ((void(*)(f32*, f32*, f32*))&T::impl)(rd, rs1, rs2); + } + }; }; struct opcode_cc_gHgHfD { - template struct opex : public opcodeExec { void* fn; f32* rs2; @@ -514,9 +527,9 @@ struct opcode_cc_gHgHfD { }; template - struct opex2 : public opex<64> { + struct opex2 : public opex { void execute() { - *rd = ((f32(*)(f32*, f32*))&T::f1)(rs1, rs2); + *rd = ((f32(*)(f32*, f32*))&T::impl)(rs1, rs2); } }; }; @@ -682,7 +695,7 @@ struct opcode_writem_offs_imm : public opcodeExec { }; #if !defined(_DEBUG) - #define DREP_1(x, phrase) if (x < cnt) ops[x]->execute(); + #define DREP_1(x, phrase) if (x < cnt) ops[x]->execute(); else return; #define DREP_2(x, phrase) DREP_1(x, phrase) DREP_1(x+1, phrase) #define DREP_4(x, phrase) DREP_2(x, phrase) DREP_2(x+2, phrase) #define DREP_8(x, phrase) DREP_4(x, phrase) DREP_4(x+4, phrase) @@ -743,24 +756,6 @@ fnrv fnnCtor<0>(int cycles) { return rvb; } - -#define XREP_1(x, phrase) &createType -#define XREP_2(x, phrase) XREP_1(x, phrase), XREP_1(x+1, phrase) -#define XREP_4(x, phrase) XREP_2(x, phrase), XREP_2(x+2, phrase) -#define XREP_8(x, phrase) XREP_4(x, phrase), XREP_4(x+4, phrase) -#define XREP_16(x, phrase) XREP_8(x, phrase), XREP_8(x+8, phrase) -#define XREP_32(x, phrase) XREP_16(x, phrase), XREP_16(x+16, phrase) -#define XREP_64(x, phrase) XREP_32(x, phrase), XREP_32(x+32, phrase) - -template -opcodeExec* createType(const CC_pars_t& prms, void* fun) { - typedef typename CTR::template opex thetype; - auto rv = new thetype(); - - rv->setup(prms, fun); - return rv; -} - template opcodeExec* createType2(const CC_pars_t& prms, void* fun) { typedef typename CTR::template opex2 thetype; @@ -777,7 +772,7 @@ map funs; int funs_id_count; template -opcodeExec* createType_fast(const CC_pars_t& prms, void* fun) { +opcodeExec* createType_fast(const CC_pars_t& prms, void* fun, shil_opcode* opcode) { return 0; } @@ -785,7 +780,7 @@ opcodeExec* createType_fast(const CC_pars_t& prms, void* fun) { #define FAST_sig(sig, ...) \ template <> \ -opcodeExec* createType_fast(const CC_pars_t& prms, void* fun) { \ +opcodeExec* createType_fast(const CC_pars_t& prms, void* fun, shil_opcode* opcode) { \ typedef OPCODE_CC(sig) CTR; \ \ static map funsf = {\ @@ -801,7 +796,8 @@ opcodeExec* createType_fast(const CC_pars_t& prms, void* fun) { } \ } -#define FAST_po(n) { (void*)&shil_opcl_##n::f1, &createType2 < shil_opcl_##n, CTR > }, +#define FAST_po2(n,fn) { (void*)&shil_opcl_##n::fn::impl, &createType2 < shil_opcl_##n::fn, CTR > }, +#define FAST_po(n) FAST_po2(n, f1) FAST_sig(aCaCbC) FAST_po(and) @@ -862,6 +858,7 @@ FAST_sig(eDfD) FAST_po(fneg) FAST_po(fabs) FAST_po(fsrra) +FAST_po(fsqrt) FAST_gis @@ -880,6 +877,7 @@ FAST_gis FAST_sig(aCaCcCdC) FAST_po(div32u) +FAST_po(div32s) FAST_gis FAST_sig(aCaCaCcCdC) @@ -894,29 +892,60 @@ FAST_gis FAST_sig(aCbC) FAST_po(neg) FAST_po(not) +FAST_po(ext_s8) FAST_po(ext_s16) +FAST_po(swaplb) FAST_gis -template -opcodeExec* createType(const CC_pars_t& prms, void* fun) { +FAST_sig(aCfD) +FAST_po(cvt_i2f_z) +FAST_po(cvt_i2f_n) +FAST_gis - auto frv = createType_fast(prms, fun); + +FAST_sig(aCgE) +FAST_po2(fsca, fsca_table) +FAST_gis + +FAST_sig(eDbC) +FAST_po(cvt_f2i_t) +FAST_gis + +FAST_sig(gJgHgH) +FAST_po(ftrv) +FAST_gis + +FAST_sig(aC) +FAST_po2(pref, f1) +FAST_po2(pref, f2) +FAST_gis + +typedef opcodeExec*(*foas)(const CC_pars_t& prms, void* fun, shil_opcode* opcode); + +string getCTN(foas code); + +template +opcodeExec* createType(const CC_pars_t& prms, void* fun, shil_opcode* opcode) { + + auto frv = createType_fast(prms, fun, opcode); if (frv) return frv; if (!funs.count(fun)) { funs[fun] = funs_id_count++; + + printf("DEFINE %s: FAST_po(%s)\n", getCTN(&createType).c_str(), shil_opcode_name(opcode->op)); } - static opcodeExec* (*ctors[])(const CC_pars_t& prms, void* fun) = { XREP_64(0, __noop) }; + typedef typename CTR::opex thetype; + auto rv = new thetype(); - int id = funs[fun]; - - return ctors[id](prms, fun); + rv->setup(prms, fun); + return rv; } -map< string, opcodeExec*(*)(const CC_pars_t& prms, void* fun)> unmap = { - { "aBaCbC", &createType }, +map< string, foas> unmap = { + { "aBaCbC", &createType_fast }, { "aCaCbC", &createType }, { "aCbC", &createType }, { "aC", &createType }, @@ -939,6 +968,12 @@ map< string, opcodeExec*(*)(const CC_pars_t& prms, void* fun)> unmap = { { "gHgHfD", &createType }, }; +string getCTN(foas f) { + auto it = find_if(unmap.begin(), unmap.end(), [f](const map< string, foas>::value_type& s) { return s.second == f; }); + + return it->first; +} + struct { void* fnb; void(*runner)(void* fnb); @@ -1296,7 +1331,7 @@ public: } if (unmap.count(nm)) { - ptrsg[opcode_index] = unmap[nm](CC_pars, ccfn); + ptrsg[opcode_index] = unmap[nm](CC_pars, ccfn, op); } else { printf("IMPLEMENT CC_CALL CLASS: %s\n", nm.c_str()); From cbda836a96a43c2bc0fd37800e0bfb294470ba93 Mon Sep 17 00:00:00 2001 From: Stefanos Kornilios Mitsis Poiitidis Date: Sat, 25 Jul 2015 20:33:53 +0200 Subject: [PATCH 11/18] she/core: Less stupid register swap functions --- core/hw/sh4/sh4_core_regs.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/core/hw/sh4/sh4_core_regs.cpp b/core/hw/sh4/sh4_core_regs.cpp index b4b00b543..069dcf6ea 100644 --- a/core/hw/sh4/sh4_core_regs.cpp +++ b/core/hw/sh4/sh4_core_regs.cpp @@ -13,23 +13,23 @@ u8* sh4_dyna_rcb; INLINE void ChangeGPR() { - u32 temp[8]; + u32 temp; for (int i=0;i<8;i++) { - temp[i]=r[i]; + temp=r[i]; r[i]=r_bank[i]; - r_bank[i]=temp[i]; + r_bank[i]=temp; } } INLINE void ChangeFP() { - u32 temp[16]; + u32 temp; for (int i=0;i<16;i++) { - temp[i]=fr_hex[i]; + temp=fr_hex[i]; fr_hex[i]=xf_hex[i]; - xf_hex[i]=temp[i]; + xf_hex[i]=temp; } } From fb55e7f6f8ce182641c92b77eb72ea722567c7f4 Mon Sep 17 00:00:00 2001 From: Stefanos Kornilios Mitsis Poiitidis Date: Sat, 25 Jul 2015 20:36:06 +0200 Subject: [PATCH 12/18] sh4/canonical: Provide impls for sync_sr and sync_fpscr This shouldn't be a requirement as directly using shil_cf_ext should work, but for some reason if I don't msvc optimizes the functions out. --- core/hw/sh4/dyna/shil_canonical.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/core/hw/sh4/dyna/shil_canonical.h b/core/hw/sh4/dyna/shil_canonical.h index 061e42a55..68c6a5091 100644 --- a/core/hw/sh4/dyna/shil_canonical.h +++ b/core/hw/sh4/dyna/shil_canonical.h @@ -207,18 +207,26 @@ shil_opc_end() //Canonical impl. opcodes ! shil_opc(sync_sr) +shil_canonical +( +void, f1, (), + UpdateSR(); +) shil_compile ( - shil_cf_ext(UpdateSR); - //die(); + shil_cf(f1); ) shil_opc_end() shil_opc(sync_fpscr) +shil_canonical +( +void, f1, (), + UpdateFPSCR(); +) shil_compile ( - shil_cf_ext(UpdateFPSCR); - //die(); + shil_cf(f1); ) shil_opc_end() From 4b97f468b578b07539e1589eae93b0865291ae9b Mon Sep 17 00:00:00 2001 From: Stefanos Kornilios Mitsis Poiitidis Date: Sat, 25 Jul 2015 20:37:47 +0200 Subject: [PATCH 13/18] rec-cpp: Implement block ends and a few more decoders - Full IL mode now works! - Dispatch up to 270 mips on 2.2ghz i7, on a complex 3d scene - Will run many games fullspeed on modern pc, thanks to idleskip --- core/rec-cpp/rec_cpp.cpp | 165 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 158 insertions(+), 7 deletions(-) diff --git a/core/rec-cpp/rec_cpp.cpp b/core/rec-cpp/rec_cpp.cpp index d0009a75a..abf1ee88e 100644 --- a/core/rec-cpp/rec_cpp.cpp +++ b/core/rec-cpp/rec_cpp.cpp @@ -73,7 +73,7 @@ void ngen_init() void ngen_GetFeatures(ngen_features* dst) { dst->InterpreterFallback = false; - dst->OnlyDynamicEnds = true; + dst->OnlyDynamicEnds = false; } RuntimeBlockInfo* ngen_AllocateBlock() @@ -534,6 +534,57 @@ struct opcode_cc_gHgHfD { }; }; +struct opcode_cc_vV { + struct opex : public opcodeExec { + void* fn; + + void execute() { + ((void(*)())fn)(); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + } + }; + + template + struct opex2 : public opex { + void execute() { + ((void(*)())&T::impl)(); + } + }; +}; + +//u64* fd1,u64* fd2,u64* fs1,u64* fs2 +//slightly violates the type, as it's FV4PTR but we pass u64* +struct opcode_cc_gJgJgJgJ { + struct opex : public opcodeExec { + void* fn; + u64* rs2; + u64* rs1; + u64* rd; + u64* rd2; + void execute() { + ((void(*)(u64*, u64*, u64*, u64*))fn)(rd, rd2, rs1, rs2); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs2 = (u64*)prms[0].prm->reg_ptr(); + rs1 = (u64*)prms[1].prm->reg_ptr(); + rd2 = (u64*)prms[2].prm->reg_ptr(); + rd = (u64*)prms[3].prm->reg_ptr(); + } + }; + + template + struct opex2 : public opex { + void execute() { + ((void(*)(u64*, u64*, u64*, u64*))&T::impl)(rd, rd2, rs1, rs2); + } + }; +}; + struct opcode_ifb_pc : public opcodeExec { OpCallFP* oph; u32 pc; @@ -557,7 +608,7 @@ struct opcode_ifb : public opcodeExec { struct opcode_jdyn : public opcodeExec { u32* src; void execute() { - next_pc = *src; + Sh4cntx.jdyn = *src; } }; @@ -565,7 +616,7 @@ struct opcode_jdyn_imm : public opcodeExec { u32* src; u32 imm; void execute() { - next_pc = *src + imm; + Sh4cntx.jdyn = *src + imm; } }; @@ -694,6 +745,70 @@ struct opcode_writem_offs_imm : public opcodeExec { } }; +template +struct opcode_blockend : public opcodeExec { + int next_pc_value; + int branch_pc_value; + u32* jdyn; + + opcodeExec* setup(RuntimeBlockInfo* block) { + next_pc_value = block->NextBlock; + branch_pc_value = block->BranchBlock; + + jdyn = &Sh4cntx.jdyn; + if (!block->has_jcond && BET_GET_CLS(block->BlockType) == BET_CLS_COND) { + jdyn = &sr.T; + } + return this; + } + + void execute() { + //do whatever + + + switch (end_type) { + + case BET_StaticJump: + case BET_StaticCall: + next_pc = branch_pc_value; + break; + + case BET_Cond_0: + if (*jdyn != 0) + next_pc = next_pc_value; + else + next_pc = branch_pc_value; + break; + + case BET_Cond_1: + if (*jdyn != 1) + next_pc = next_pc_value; + else + next_pc = branch_pc_value; + break; + + case BET_DynamicJump: + case BET_DynamicCall: + case BET_DynamicRet: + next_pc = *jdyn; + break; + + case BET_DynamicIntr: + case BET_StaticIntr: + if (end_type == BET_DynamicIntr) + next_pc = *jdyn; + else + next_pc = next_pc_value; + + UpdateINTC(); + break; + + default: + die("NOT GONNA HAPPEN TODAY, ALRIGHY?"); + } + } +}; + #if !defined(_DEBUG) #define DREP_1(x, phrase) if (x < cnt) ops[x]->execute(); else return; #define DREP_2(x, phrase) DREP_1(x, phrase) DREP_1(x+1, phrase) @@ -920,6 +1035,16 @@ FAST_po2(pref, f1) FAST_po2(pref, f2) FAST_gis +FAST_sig(vV) +FAST_po(sync_sr) +FAST_po(sync_fpscr) +FAST_gis + +FAST_sig(gJgJgJgJ) +FAST_po(frswap) +FAST_gis + + typedef opcodeExec*(*foas)(const CC_pars_t& prms, void* fun, shil_opcode* opcode); string getCTN(foas code); @@ -966,6 +1091,8 @@ map< string, foas> unmap = { { "aCgE", &createType }, { "gJgHgH", &createType }, { "gHgHfD", &createType }, + { "gJgJgJgJ", &createType }, + { "vV", &createType }, }; string getCTN(foas f) { @@ -1029,7 +1156,8 @@ public: opcodeExec** ptrsg; void compile(RuntimeBlockInfo* block, bool force_checks, bool reset, bool staging, bool optimise) { - auto ptrs = fnnCtor_forreal(block->oplist.size())(block->guest_cycles); + //we need an extra one for the end opcode + auto ptrs = fnnCtor_forreal(block->oplist.size() + 1)(block->guest_cycles); ptrsg = ptrs.ptrs; @@ -1068,7 +1196,8 @@ public: } } break; - + + case shop_jcond: case shop_jdyn: { if (op.rs2.is_imm()) { @@ -1298,9 +1427,29 @@ public: } } - verify(block->BlockType == BET_DynamicJump); + //Block end opcode + { + opcodeExec* op; + + #define CASEWS(n) case n: op = (new opcode_blockend())->setup(block); break + + switch (block->BlockType) { + CASEWS(BET_StaticJump); + CASEWS(BET_StaticCall); + CASEWS(BET_StaticIntr); + + CASEWS(BET_DynamicJump); + CASEWS(BET_DynamicCall); + CASEWS(BET_DynamicRet); + CASEWS(BET_DynamicIntr); + + CASEWS(BET_Cond_0); + CASEWS(BET_Cond_1); + } + + ptrs.ptrs[block->oplist.size()] = op; + } - //emit_Skip(getSize()); } CC_pars_t CC_pars; @@ -1329,6 +1478,8 @@ public: nm += (char)(m.type + 'a'); nm += (char)(m.prm->type + 'A'); } + if (!nm.size()) + nm = "vV"; if (unmap.count(nm)) { ptrsg[opcode_index] = unmap[nm](CC_pars, ccfn, op); From 1e0224dd1b45cee4f544dbbe48d800f25543a9d2 Mon Sep 17 00:00:00 2001 From: Stefanos Kornilios Mitsis Poiitidis Date: Tue, 28 Jul 2015 17:33:24 +0200 Subject: [PATCH 14/18] rec-cpp: Add sig aCaCcCdC for rocl, rocr --- core/rec-cpp/rec_cpp.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/rec-cpp/rec_cpp.cpp b/core/rec-cpp/rec_cpp.cpp index abf1ee88e..1a11820d9 100644 --- a/core/rec-cpp/rec_cpp.cpp +++ b/core/rec-cpp/rec_cpp.cpp @@ -993,6 +993,8 @@ FAST_gis FAST_sig(aCaCcCdC) FAST_po(div32u) FAST_po(div32s) +FAST_po(rocr) +FAST_po(rocl) FAST_gis FAST_sig(aCaCaCcCdC) From 5b67e3f090034c1b3885a4cdb65a5a1e4bfe1e10 Mon Sep 17 00:00:00 2001 From: ~skmp Date: Tue, 28 Jul 2015 19:10:31 +0300 Subject: [PATCH 15/18] rec-cpp: SH4_TCB doesn't need to be in .text if not executable --- core/hw/sh4/dyna/driver.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/core/hw/sh4/dyna/driver.cpp b/core/hw/sh4/dyna/driver.cpp index 25aaae103..9bd767919 100644 --- a/core/hw/sh4/dyna/driver.cpp +++ b/core/hw/sh4/dyna/driver.cpp @@ -28,12 +28,13 @@ #if FEAT_SHREC != DYNAREC_NONE //uh uh -#if HOST_OS == OS_WINDOWS - u8 SH4_TCB[2*CODE_SIZE+4096]; +u8 SH4_TCB[2*CODE_SIZE+4096] +#if HOST_OS == OS_WINDOWS || FEAT_SHREC != DYNAREC_JIT + ; #elif HOST_OS == OS_LINUX - u8 SH4_TCB[2*CODE_SIZE+4096] __attribute__((section(".text"))); + __attribute__((section(".text"))); #elif HOST_OS==OS_DARWIN - u8 SH4_TCB[2*CODE_SIZE+4096] __attribute__((section("__TEXT,.text"))); + __attribute__((section("__TEXT,.text"))); #else #error SH4_TCB ALLOC #endif From ec5bb09917ad9e272a1704c69ef0c4014a49526d Mon Sep 17 00:00:00 2001 From: Stefanos Kornilios Mitsis Poiitidis Date: Tue, 28 Jul 2015 18:43:38 +0200 Subject: [PATCH 16/18] rec-cpp: Add sig aCaCcCdC for mul_u64, mul_s64 --- core/rec-cpp/rec_cpp.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/rec-cpp/rec_cpp.cpp b/core/rec-cpp/rec_cpp.cpp index 1a11820d9..030c5309e 100644 --- a/core/rec-cpp/rec_cpp.cpp +++ b/core/rec-cpp/rec_cpp.cpp @@ -995,6 +995,8 @@ FAST_po(div32u) FAST_po(div32s) FAST_po(rocr) FAST_po(rocl) +FAST_po(mul_u64) +FAST_po(mul_s64) FAST_gis FAST_sig(aCaCaCcCdC) From f1a6f04dce06aa5d11271ff50b12c6786d48558e Mon Sep 17 00:00:00 2001 From: ~skmp Date: Tue, 28 Jul 2015 20:15:38 +0300 Subject: [PATCH 17/18] rec-cpp: verify() that we have an executer w/ shop_count --- core/rec-cpp/rec_cpp.cpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/core/rec-cpp/rec_cpp.cpp b/core/rec-cpp/rec_cpp.cpp index 030c5309e..8b2ff91f0 100644 --- a/core/rec-cpp/rec_cpp.cpp +++ b/core/rec-cpp/rec_cpp.cpp @@ -1135,10 +1135,6 @@ int idxnxx = 0; DynarecCodeEntryPtr FNS[] = { REP_8192(0, &disaptchn) }; -typedef fnrv(*FNAFB)(int cycles); - -FNAFB FNA[] = { REP_512(0, &fnnCtor) }; - DynarecCodeEntryPtr getndpn_forreal(int n) { if (n >= 8192) return 0; @@ -1146,11 +1142,14 @@ DynarecCodeEntryPtr getndpn_forreal(int n) { return FNS[n]; } +typedef fnrv(*FNAFB)(int cycles); + +FNAFB FNA[] = { REP_512(1, &fnnCtor) }; + FNAFB fnnCtor_forreal(size_t n) { - if (n > 512) - return 0; - else - return FNA[n]; + verify(n > 0); + verify(n <= 512); + return FNA[n - 1]; } class BlockCompiler { @@ -1541,4 +1540,4 @@ void ngen_ResetBlocks() delete dispatchb[id].fnb; */ } -#endif \ No newline at end of file +#endif From a82f9704d6abc3867d2bfe0dc5e5944acee970eb Mon Sep 17 00:00:00 2001 From: Stefanos Kornilios Mitsis Poiitidis Date: Tue, 28 Jul 2015 19:02:49 +0200 Subject: [PATCH 18/18] shrec: Limit max blocksize to 511 shops --- core/hw/sh4/dyna/decoder.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/core/hw/sh4/dyna/decoder.cpp b/core/hw/sh4/dyna/decoder.cpp index e6b3ab86c..8e1f8674a 100644 --- a/core/hw/sh4/dyna/decoder.cpp +++ b/core/hw/sh4/dyna/decoder.cpp @@ -15,6 +15,9 @@ #include "hw/sh4/sh4_mem.h" #include "decoder_opcodes.h" +#define BLOCK_MAX_SH_OPS_SOFT 500 +#define BLOCK_MAX_SH_OPS_HARD 511 + RuntimeBlockInfo* blk; @@ -1054,7 +1057,10 @@ void dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles) //there is no break here by design case NDO_NextOp: { - if (blk->guest_cycles>=max_cycles && !state.cpu.is_delayslot) + if ( + ( (blk->oplist.size() >= BLOCK_MAX_SH_OPS_SOFT) || (blk->guest_cycles >= max_cycles) ) + && !state.cpu.is_delayslot + ) { dec_End(state.cpu.rpc,BET_StaticJump,false); } @@ -1142,6 +1148,8 @@ _end: blk->NextBlock=state.NextAddr; blk->BranchBlock=state.JumpAddr; blk->BlockType=state.BlockType; + + verify(blk->oplist.size() <= BLOCK_MAX_SH_OPS_HARD); #if HOST_OS == OS_WINDOWS switch(rbi->addr)