From e4138f9a6083b7398c7483d8002a6ebc26e15ae6 Mon Sep 17 00:00:00 2001 From: Stefanos Kornilios Mitsis Poiitidis Date: Thu, 23 Jul 2015 05:16:41 +0200 Subject: [PATCH] rec-cpp: Wastefully generate more dispatchers for better BTB cache locality The real solution is to have inlining between the (now static) dispatchers and the impls. It's gonna be hard to convince the compiler on that. --- core/rec-cpp/rec_cpp.cpp | 549 ++++++++++++++++++++++----------------- 1 file changed, 311 insertions(+), 238 deletions(-) diff --git a/core/rec-cpp/rec_cpp.cpp b/core/rec-cpp/rec_cpp.cpp index d03989e00..9c765ff5b 100644 --- a/core/rec-cpp/rec_cpp.cpp +++ b/core/rec-cpp/rec_cpp.cpp @@ -98,291 +98,337 @@ struct CC_PS typedef vector CC_pars_t; -struct opcode_cc_aBaCbC : public opcodeExec { - void* fn; - u32* rs1; - u32 rs2; - u32* rd; - void execute() { - *rd = ((u32(*)(u32, u32))fn)(*rs1, rs2); - } - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs2 = prms[0].prm->imm_value(); - rs1 = prms[1].prm->reg_ptr(); - rd = prms[2].prm->reg_ptr(); - verify(prms.size() == 3); - } +struct opcode_cc_aBaCbC { + template + struct opex : public opcodeExec { + void* fn; + u32* rs1; + u32 rs2; + u32* rd; + void execute() { + *rd = ((u32(*)(u32, u32))fn)(*rs1, rs2); + } + + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs2 = prms[0].prm->imm_value(); + rs1 = prms[1].prm->reg_ptr(); + rd = prms[2].prm->reg_ptr(); + verify(prms.size() == 3); + } + }; }; -struct opcode_cc_aCaCbC : public opcodeExec { - void* fn; - u32* rs1; - u32* rs2; - u32* rd; - void execute() { - *rd = ((u32(*)(u32, u32))fn)(*rs1, *rs2); - } +struct opcode_cc_aCaCbC { + template + struct opex : public opcodeExec { + void* fn; + u32* rs1; + u32* rs2; + u32* rd; + void execute() { + *rd = ((u32(*)(u32, u32))fn)(*rs1, *rs2); + } - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs2 = prms[0].prm->reg_ptr(); - rs1 = prms[1].prm->reg_ptr(); - rd = prms[2].prm->reg_ptr(); - verify(prms.size() == 3); - } + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs2 = prms[0].prm->reg_ptr(); + rs1 = prms[1].prm->reg_ptr(); + rd = prms[2].prm->reg_ptr(); + verify(prms.size() == 3); + } + }; }; -struct opcode_cc_aCbC : public opcodeExec { - void* fn; - u32* rs1; - u32* rd; - void execute() { - *rd = ((u32(*)(u32))fn)(*rs1); - } +struct opcode_cc_aCbC { + template + struct opex : public opcodeExec { + void* fn; + u32* rs1; + u32* rd; + void execute() { + *rd = ((u32(*)(u32))fn)(*rs1); + } - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs1 = prms[0].prm->reg_ptr(); - rd = prms[1].prm->reg_ptr(); - verify(prms.size() == 2); - } + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs1 = prms[0].prm->reg_ptr(); + rd = prms[1].prm->reg_ptr(); + verify(prms.size() == 2); + } + }; }; -struct opcode_cc_aC : public opcodeExec { - void* fn; - u32* rs1; - void execute() { - ((void(*)(u32))fn)(*rs1); - } +struct opcode_cc_aC { + template + struct opex : public opcodeExec { + void* fn; + u32* rs1; + void execute() { + ((void(*)(u32))fn)(*rs1); + } - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs1 = prms[0].prm->reg_ptr(); - verify(prms.size() == 1); - } + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs1 = prms[0].prm->reg_ptr(); + verify(prms.size() == 1); + } + }; }; -struct opcode_cc_aCaCaCbC : public opcodeExec { - void* fn; - u32* rs1; - u32* rs2; - u32* rs3; - u32* rd; - void execute() { - *rd = ((u32(*)(u32, u32, u32))fn)(*rs1, *rs2, *rs3); - } +struct opcode_cc_aCaCaCbC { + template + struct opex : public opcodeExec { + void* fn; + u32* rs1; + u32* rs2; + u32* rs3; + u32* rd; + void execute() { + *rd = ((u32(*)(u32, u32, u32))fn)(*rs1, *rs2, *rs3); + } - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs3 = prms[0].prm->reg_ptr(); - rs2 = prms[1].prm->reg_ptr(); - rs1 = prms[2].prm->reg_ptr(); - rd = prms[3].prm->reg_ptr(); - verify(prms.size() == 4); - } + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs3 = prms[0].prm->reg_ptr(); + rs2 = prms[1].prm->reg_ptr(); + rs1 = prms[2].prm->reg_ptr(); + rd = prms[3].prm->reg_ptr(); + verify(prms.size() == 4); + } + }; }; -//split this to two cases, u64 and u64L/u32H -struct opcode_cc_aCaCaCcCdC : public opcodeExec { - void* fn; - u32* rs1; - u32* rs2; - u32* rs3; - u32* rd; - u32* rd2; - void execute() { - auto rv = ((u64(*)(u32, u32, u32))fn)(*rs1, *rs2, *rs3); +struct opcode_cc_aCaCaCcCdC { + //split this to two cases, u64 and u64L/u32H + template + struct opex : public opcodeExec { + void* fn; + u32* rs1; + u32* rs2; + u32* rs3; + u32* rd; + u32* rd2; + void execute() { + auto rv = ((u64(*)(u32, u32, u32))fn)(*rs1, *rs2, *rs3); - *rd = rv; - *rd2 = rv >> 32; - } + *rd = (u32)rv; + *rd2 = rv >> 32; + } - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs3 = prms[0].prm->reg_ptr(); - rs2 = prms[1].prm->reg_ptr(); - rs1 = prms[2].prm->reg_ptr(); - rd = prms[3].prm->reg_ptr(); - rd2 = prms[4].prm->reg_ptr(); + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs3 = prms[0].prm->reg_ptr(); + rs2 = prms[1].prm->reg_ptr(); + rs1 = prms[2].prm->reg_ptr(); + rd = prms[3].prm->reg_ptr(); + rd2 = prms[4].prm->reg_ptr(); - //verify((u64*)(rd2 - 1) == rd); - verify(prms.size() == 5); - } + //verify((u64*)(rd2 - 1) == rd); + verify(prms.size() == 5); + } + }; }; +struct opcode_cc_aCaCcCdC { + template + struct opex : public opcodeExec { + void* fn; + u32* rs1; + u32* rs2; + u32* rd; + u32* rd2; + void execute() { + auto rv = ((u64(*)(u32, u32))fn)(*rs1, *rs2); + *rd = (u32)rv; + *rd2 = rv >> 32; + } -struct opcode_cc_aCaCcCdC : public opcodeExec { - void* fn; - u32* rs1; - u32* rs2; - u32* rd; - u32* rd2; - void execute() { - auto rv = ((u64(*)(u32, u32))fn)(*rs1, *rs2); - *rd = rv; - *rd2 = rv >> 32; - } + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs2 = prms[0].prm->reg_ptr(); + rs1 = prms[1].prm->reg_ptr(); + rd = prms[2].prm->reg_ptr(); + rd2 = prms[3].prm->reg_ptr(); - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs2 = prms[0].prm->reg_ptr(); - rs1 = prms[1].prm->reg_ptr(); - rd = prms[2].prm->reg_ptr(); - rd2 = prms[3].prm->reg_ptr(); - - verify(prms.size() == 4); - } + verify(prms.size() == 4); + } + }; }; +struct opcode_cc_eDeDeDfD { + template + struct opex : public opcodeExec { + void* fn; + f32* rs1; + f32* rs2; + f32* rs3; + f32* rd; + void execute() { + *rd = ((f32(*)(f32, f32, f32))fn)(*rs1, *rs2, *rs3); + } -struct opcode_cc_eDeDeDfD : public opcodeExec { - void* fn; - f32* rs1; - f32* rs2; - f32* rs3; - f32* rd; - void execute() { - *rd = ((f32(*)(f32, f32, f32))fn)(*rs1, *rs2, *rs3); - } - - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs3 = (f32*)prms[0].prm->reg_ptr(); - rs2 = (f32*)prms[1].prm->reg_ptr(); - rs1 = (f32*)prms[2].prm->reg_ptr(); - rd = (f32*)prms[3].prm->reg_ptr(); - } + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs3 = (f32*)prms[0].prm->reg_ptr(); + rs2 = (f32*)prms[1].prm->reg_ptr(); + rs1 = (f32*)prms[2].prm->reg_ptr(); + rd = (f32*)prms[3].prm->reg_ptr(); + } + }; }; +struct opcode_cc_eDeDfD { + template + struct opex : public opcodeExec { + void* fn; + f32* rs1; + f32* rs2; + f32* rd; + void execute() { + *rd = ((f32(*)(f32, f32))fn)(*rs1, *rs2); + } -struct opcode_cc_eDeDfD : public opcodeExec { - void* fn; - f32* rs1; - f32* rs2; - f32* rd; - void execute() { - *rd = ((f32(*)(f32, f32))fn)(*rs1, *rs2); - } - - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs2 = (f32*)prms[0].prm->reg_ptr(); - rs1 = (f32*)prms[1].prm->reg_ptr(); - rd = (f32*)prms[2].prm->reg_ptr(); - } + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs2 = (f32*)prms[0].prm->reg_ptr(); + rs1 = (f32*)prms[1].prm->reg_ptr(); + rd = (f32*)prms[2].prm->reg_ptr(); + } + }; }; -struct opcode_cc_eDeDbC : public opcodeExec { - void* fn; - f32* rs1; - f32* rs2; - u32* rd; - void execute() { - *rd = ((u32(*)(f32, f32))fn)(*rs1, *rs2); - } +struct opcode_cc_eDeDbC { + template + struct opex : public opcodeExec { + void* fn; + f32* rs1; + f32* rs2; + u32* rd; + void execute() { + *rd = ((u32(*)(f32, f32))fn)(*rs1, *rs2); + } - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs2 = (f32*)prms[0].prm->reg_ptr(); - rs1 = (f32*)prms[1].prm->reg_ptr(); - rd = (u32*)prms[2].prm->reg_ptr(); - } + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs2 = (f32*)prms[0].prm->reg_ptr(); + rs1 = (f32*)prms[1].prm->reg_ptr(); + rd = (u32*)prms[2].prm->reg_ptr(); + } + }; }; -struct opcode_cc_eDbC : public opcodeExec { - void* fn; - f32* rs1; - u32* rd; - void execute() { - *rd = ((u32(*)(f32))fn)(*rs1); - } +struct opcode_cc_eDbC { + template + struct opex : public opcodeExec { + void* fn; + f32* rs1; + u32* rd; + void execute() { + *rd = ((u32(*)(f32))fn)(*rs1); + } - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs1 = (f32*)prms[0].prm->reg_ptr(); - rd = (u32*)prms[1].prm->reg_ptr(); - } + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs1 = (f32*)prms[0].prm->reg_ptr(); + rd = (u32*)prms[1].prm->reg_ptr(); + } + }; }; -struct opcode_cc_aCfD : public opcodeExec { - void* fn; - u32* rs1; - f32* rd; - void execute() { - *rd = ((f32(*)(u32))fn)(*rs1); - } +struct opcode_cc_aCfD { + template + struct opex : public opcodeExec { + void* fn; + u32* rs1; + f32* rd; + void execute() { + *rd = ((f32(*)(u32))fn)(*rs1); + } - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs1 = (u32*)prms[0].prm->reg_ptr(); - rd = (f32*)prms[1].prm->reg_ptr(); - } + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs1 = (u32*)prms[0].prm->reg_ptr(); + rd = (f32*)prms[1].prm->reg_ptr(); + } + }; }; -struct opcode_cc_eDfD : public opcodeExec { - void* fn; - f32* rs1; - f32* rd; - void execute() { - *rd = ((f32(*)(f32))fn)(*rs1); - } +struct opcode_cc_eDfD { + template + struct opex : public opcodeExec { + void* fn; + f32* rs1; + f32* rd; + void execute() { + *rd = ((f32(*)(f32))fn)(*rs1); + } - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs1 = (f32*)prms[0].prm->reg_ptr(); - rd = (f32*)prms[1].prm->reg_ptr(); - } + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs1 = (f32*)prms[0].prm->reg_ptr(); + rd = (f32*)prms[1].prm->reg_ptr(); + } + }; }; -struct opcode_cc_aCgE : public opcodeExec { - void* fn; - u32* rs1; - f32* rd; - void execute() { - ((void(*)(f32*, u32))fn)(rd, *rs1); - } +struct opcode_cc_aCgE { + template + struct opex : public opcodeExec { + void* fn; + u32* rs1; + f32* rd; + void execute() { + ((void(*)(f32*, u32))fn)(rd, *rs1); + } - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs1 = (u32*)prms[0].prm->reg_ptr(); - rd = (f32*)prms[1].prm->reg_ptr(); - } + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs1 = (u32*)prms[0].prm->reg_ptr(); + rd = (f32*)prms[1].prm->reg_ptr(); + } + }; }; -struct opcode_cc_gJgHgH : public opcodeExec { - void* fn; - f32* rs2; - f32* rs1; - f32* rd; - void execute() { - ((void(*)(f32*, f32*, f32*))fn)(rd, rs1, rs2); - } +struct opcode_cc_gJgHgH { + template + struct opex : public opcodeExec { + void* fn; + f32* rs2; + f32* rs1; + f32* rd; + void execute() { + ((void(*)(f32*, f32*, f32*))fn)(rd, rs1, rs2); + } - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs2 = (f32*)prms[0].prm->reg_ptr(); - rs1 = (f32*)prms[1].prm->reg_ptr(); - rd = (f32*)prms[2].prm->reg_ptr(); - } + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs2 = (f32*)prms[0].prm->reg_ptr(); + rs1 = (f32*)prms[1].prm->reg_ptr(); + rd = (f32*)prms[2].prm->reg_ptr(); + } + }; }; -struct opcode_cc_gHgHfD : public opcodeExec { - void* fn; - f32* rs2; - f32* rs1; - f32* rd; - void execute() { - *rd = ((f32(*)(f32*, f32*))fn)(rs1, rs2); - } +struct opcode_cc_gHgHfD { + template + struct opex : public opcodeExec { + void* fn; + f32* rs2; + f32* rs1; + f32* rd; + void execute() { + *rd = ((f32(*)(f32*, f32*))fn)(rs1, rs2); + } - void setup(const CC_pars_t& prms, void* fun) { - fn = fun; - rs2 = (f32*)prms[0].prm->reg_ptr(); - rs1 = (f32*)prms[1].prm->reg_ptr(); - rd = (f32*)prms[2].prm->reg_ptr(); - } + void setup(const CC_pars_t& prms, void* fun) { + fn = fun; + rs2 = (f32*)prms[0].prm->reg_ptr(); + rs1 = (f32*)prms[1].prm->reg_ptr(); + rd = (f32*)prms[2].prm->reg_ptr(); + } + }; }; struct opcode_ifb_pc : public opcodeExec { @@ -589,14 +635,41 @@ fnrv fnnCtor<0>(int cycles) { return rvb; } -template + +#define XREP_1(x, phrase) &createType +#define XREP_2(x, phrase) XREP_1(x, phrase), XREP_1(x+1, phrase) +#define XREP_4(x, phrase) XREP_2(x, phrase), XREP_2(x+2, phrase) +#define XREP_8(x, phrase) XREP_4(x, phrase), XREP_4(x+4, phrase) +#define XREP_16(x, phrase) XREP_8(x, phrase), XREP_8(x+8, phrase) +#define XREP_32(x, phrase) XREP_16(x, phrase), XREP_16(x+16, phrase) +#define XREP_64(x, phrase) XREP_32(x, phrase), XREP_32(x+32, phrase) + +template opcodeExec* createType(const CC_pars_t& prms, void* fun) { - auto rv = new CTR(); + auto rv = new CTR::opex(); rv->setup(prms, fun); return rv; } + +map funs; +int funs_id_count; + +template +opcodeExec* createType(const CC_pars_t& prms, void* fun) { + + if (!funs.count(fun)) { + funs[fun] = funs_id_count++; + } + + static opcodeExec* (*ctors[])(const CC_pars_t& prms, void* fun) = { XREP_64(0, phrase) }; + + int id = funs[fun]; + + return ctors[id](prms, fun); +} + map< string, opcodeExec*(*)(const CC_pars_t& prms, void* fun)> unmap = { { "aBaCbC", &createType }, { "aCaCbC", &createType },