From 62ce1c7653a58ea38042f2c485127e8ade37a989 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 20 Dec 2020 13:23:17 +0100 Subject: [PATCH] Jits: Discard registers which we know will be overwritten This commit adds a new "discarded" state for registers. Discarding a register is like flushing it, but without actually writing its value back to memory. We can discard a register only when it is guaranteed that no instruction will read from the register before it is next written to. Discarding reduces the register pressure a little, and can also let us skip a few flushes on interpreter fallbacks. --- .../Interpreter/Interpreter_Tables.cpp | 94 +++++++++---------- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 11 ++- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 4 +- .../Core/PowerPC/Jit64/RegCache/CachedReg.h | 26 +++-- .../PowerPC/Jit64/RegCache/FPURegCache.cpp | 6 +- .../PowerPC/Jit64/RegCache/GPRRegCache.cpp | 8 +- .../PowerPC/Jit64/RegCache/JitRegCache.cpp | 45 +++++++-- .../Core/PowerPC/Jit64/RegCache/JitRegCache.h | 1 + Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 7 +- .../PowerPC/JitArm64/JitArm64_RegCache.cpp | 45 +++++++-- .../Core/PowerPC/JitArm64/JitArm64_RegCache.h | 12 +++ Source/Core/Core/PowerPC/PPCAnalyst.cpp | 31 +++--- Source/Core/Core/PowerPC/PPCAnalyst.h | 6 +- Source/Core/Core/PowerPC/PPCTables.h | 1 + 14 files changed, 203 insertions(+), 94 deletions(-) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp index 9ba1235dfd..dcb36a1d17 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp @@ -85,51 +85,51 @@ static std::array primarytable = {54, Interpreter::stfd, {"stfd", OpType::StoreFP, FL_IN_FLOAT_S | FL_IN_A0 | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, {55, Interpreter::stfdu, {"stfdu", OpType::StoreFP, FL_IN_FLOAT_S | FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {56, Interpreter::psq_l, {"psq_l", OpType::LoadPS, FL_OUT_FLOAT_D | FL_IN_A0 | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {57, Interpreter::psq_lu, {"psq_lu", OpType::LoadPS, FL_OUT_FLOAT_D | FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {60, Interpreter::psq_st, {"psq_st", OpType::StorePS, FL_IN_FLOAT_S | FL_IN_A0 | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, - {61, Interpreter::psq_stu, {"psq_stu", OpType::StorePS, FL_IN_FLOAT_S | FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, + {56, Interpreter::psq_l, {"psq_l", OpType::LoadPS, FL_OUT_FLOAT_D | FL_IN_A0 | FL_USE_FPU | FL_LOADSTORE | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {57, Interpreter::psq_lu, {"psq_lu", OpType::LoadPS, FL_OUT_FLOAT_D | FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {60, Interpreter::psq_st, {"psq_st", OpType::StorePS, FL_IN_FLOAT_S | FL_IN_A0 | FL_USE_FPU | FL_LOADSTORE | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {61, Interpreter::psq_stu, {"psq_stu", OpType::StorePS, FL_IN_FLOAT_S | FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, //missing: 0, 1, 2, 5, 6, 9, 22, 30, 62, 58 }}; static std::array table4 = {{ //SUBOP10 - {0, Interpreter::ps_cmpu0, {"ps_cmpu0", OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF, 1, 0, 0, 0}}, - {32, Interpreter::ps_cmpo0, {"ps_cmpo0", OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF, 1, 0, 0, 0}}, - {40, Interpreter::ps_neg, {"ps_neg", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, - {136, Interpreter::ps_nabs, {"ps_nabs", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, - {264, Interpreter::ps_abs, {"ps_abs", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, - {64, Interpreter::ps_cmpu1, {"ps_cmpu1", OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF, 1, 0, 0, 0}}, - {72, Interpreter::ps_mr, {"ps_mr", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, - {96, Interpreter::ps_cmpo1, {"ps_cmpo1", OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF, 1, 0, 0, 0}}, - {528, Interpreter::ps_merge00, {"ps_merge00", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_IN_FLOAT_AB_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, - {560, Interpreter::ps_merge01, {"ps_merge01", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_IN_FLOAT_AB_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, - {592, Interpreter::ps_merge10, {"ps_merge10", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_IN_FLOAT_AB_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, - {624, Interpreter::ps_merge11, {"ps_merge11", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_IN_FLOAT_AB_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, + {0, Interpreter::ps_cmpu0, {"ps_cmpu0", OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {32, Interpreter::ps_cmpo0, {"ps_cmpo0", OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {40, Interpreter::ps_neg, {"ps_neg", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {136, Interpreter::ps_nabs, {"ps_nabs", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {264, Interpreter::ps_abs, {"ps_abs", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {64, Interpreter::ps_cmpu1, {"ps_cmpu1", OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {72, Interpreter::ps_mr, {"ps_mr", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {96, Interpreter::ps_cmpo1, {"ps_cmpo1", OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {528, Interpreter::ps_merge00, {"ps_merge00", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_IN_FLOAT_AB_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {560, Interpreter::ps_merge01, {"ps_merge01", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_IN_FLOAT_AB_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {592, Interpreter::ps_merge10, {"ps_merge10", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_IN_FLOAT_AB_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {624, Interpreter::ps_merge11, {"ps_merge11", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_IN_FLOAT_AB_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, - {1014, Interpreter::dcbz_l, {"dcbz_l", OpType::System, FL_IN_A0B | FL_LOADSTORE, 1, 0, 0, 0}}, + {1014, Interpreter::dcbz_l, {"dcbz_l", OpType::System, FL_IN_A0B | FL_LOADSTORE | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, }}; static std::array table4_2 = {{ - {10, Interpreter::ps_sum0, {"ps_sum0", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {11, Interpreter::ps_sum1, {"ps_sum1", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {12, Interpreter::ps_muls0, {"ps_muls0", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {13, Interpreter::ps_muls1, {"ps_muls1", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {14, Interpreter::ps_madds0, {"ps_madds0", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {15, Interpreter::ps_madds1, {"ps_madds1", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {18, Interpreter::ps_div, {"ps_div", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 17, 0, 0, 0}}, - {20, Interpreter::ps_sub, {"ps_sub", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {21, Interpreter::ps_add, {"ps_add", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {23, Interpreter::ps_sel, {"ps_sel", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_IN_FLOAT_BC_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, - {24, Interpreter::ps_res, {"ps_res", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {25, Interpreter::ps_mul, {"ps_mul", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {26, Interpreter::ps_rsqrte, {"ps_rsqrte", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 2, 0, 0, 0}}, - {28, Interpreter::ps_msub, {"ps_msub", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {29, Interpreter::ps_madd, {"ps_madd", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {30, Interpreter::ps_nmsub, {"ps_nmsub", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, - {31, Interpreter::ps_nmadd, {"ps_nmadd", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, + {10, Interpreter::ps_sum0, {"ps_sum0", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {11, Interpreter::ps_sum1, {"ps_sum1", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {12, Interpreter::ps_muls0, {"ps_muls0", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {13, Interpreter::ps_muls1, {"ps_muls1", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {14, Interpreter::ps_madds0, {"ps_madds0", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {15, Interpreter::ps_madds1, {"ps_madds1", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {18, Interpreter::ps_div, {"ps_div", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 17, 0, 0, 0}}, + {20, Interpreter::ps_sub, {"ps_sub", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {21, Interpreter::ps_add, {"ps_add", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {23, Interpreter::ps_sel, {"ps_sel", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_IN_FLOAT_BC_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {24, Interpreter::ps_res, {"ps_res", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {25, Interpreter::ps_mul, {"ps_mul", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {26, Interpreter::ps_rsqrte, {"ps_rsqrte", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 2, 0, 0, 0}}, + {28, Interpreter::ps_msub, {"ps_msub", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {29, Interpreter::ps_madd, {"ps_madd", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {30, Interpreter::ps_nmsub, {"ps_nmsub", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {31, Interpreter::ps_nmadd, {"ps_nmadd", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, }}; @@ -157,7 +157,7 @@ static std::array table19 = {150, Interpreter::isync, {"isync", OpType::InstructionCache, FL_EVIL, 1, 0, 0, 0}}, {0, Interpreter::mcrf, {"mcrf", OpType::System, FL_EVIL | FL_SET_CRn, 1, 0, 0, 0}}, - {50, Interpreter::rfi, {"rfi", OpType::System, FL_ENDBLOCK | FL_CHECKEXCEPTIONS, 2, 0, 0, 0}}, + {50, Interpreter::rfi, {"rfi", OpType::System, FL_ENDBLOCK | FL_CHECKEXCEPTIONS | FL_PROGRAMEXCEPTION, 2, 0, 0, 0}}, }}; static std::array table31 = @@ -215,7 +215,7 @@ static std::array table31 = {86, Interpreter::dcbf, {"dcbf", OpType::DataCache, FL_IN_A0B | FL_LOADSTORE, 5, 0, 0, 0}}, {246, Interpreter::dcbtst, {"dcbtst", OpType::DataCache, 0, 2, 0, 0, 0}}, {278, Interpreter::dcbt, {"dcbt", OpType::DataCache, 0, 2, 0, 0, 0}}, - {470, Interpreter::dcbi, {"dcbi", OpType::DataCache, FL_IN_A0B | FL_LOADSTORE, 5, 0, 0, 0}}, + {470, Interpreter::dcbi, {"dcbi", OpType::DataCache, FL_IN_A0B | FL_LOADSTORE | FL_PROGRAMEXCEPTION, 5, 0, 0, 0}}, {758, Interpreter::dcba, {"dcba", OpType::DataCache, 0, 5, 0, 0, 0}}, {1014, Interpreter::dcbz, {"dcbz", OpType::DataCache, FL_IN_A0B | FL_LOADSTORE, 5, 0, 0, 0}}, @@ -279,17 +279,17 @@ static std::array table31 = {983, Interpreter::stfiwx, {"stfiwx", OpType::StoreFP, FL_IN_FLOAT_S | FL_IN_A0B | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}}, {19, Interpreter::mfcr, {"mfcr", OpType::System, FL_OUT_D, 1, 0, 0, 0}}, - {83, Interpreter::mfmsr, {"mfmsr", OpType::System, FL_OUT_D, 1, 0, 0, 0}}, + {83, Interpreter::mfmsr, {"mfmsr", OpType::System, FL_OUT_D | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, {144, Interpreter::mtcrf, {"mtcrf", OpType::System, FL_IN_S | FL_SET_CRn, 1, 0, 0, 0}}, - {146, Interpreter::mtmsr, {"mtmsr", OpType::System, FL_IN_S | FL_ENDBLOCK, 1, 0, 0, 0}}, - {210, Interpreter::mtsr, {"mtsr", OpType::System, FL_IN_S, 1, 0, 0, 0}}, - {242, Interpreter::mtsrin, {"mtsrin", OpType::System, FL_IN_SB, 1, 0, 0, 0}}, - {339, Interpreter::mfspr, {"mfspr", OpType::SPR, FL_OUT_D, 1, 0, 0, 0}}, - {467, Interpreter::mtspr, {"mtspr", OpType::SPR, FL_IN_S, 2, 0, 0, 0}}, - {371, Interpreter::mftb, {"mftb", OpType::System, FL_OUT_D | FL_TIMER, 1, 0, 0, 0}}, + {146, Interpreter::mtmsr, {"mtmsr", OpType::System, FL_IN_S | FL_ENDBLOCK | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {210, Interpreter::mtsr, {"mtsr", OpType::System, FL_IN_S | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {242, Interpreter::mtsrin, {"mtsrin", OpType::System, FL_IN_SB | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {339, Interpreter::mfspr, {"mfspr", OpType::SPR, FL_OUT_D | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {467, Interpreter::mtspr, {"mtspr", OpType::SPR, FL_IN_S | FL_PROGRAMEXCEPTION, 2, 0, 0, 0}}, + {371, Interpreter::mftb, {"mftb", OpType::System, FL_OUT_D | FL_TIMER | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, {512, Interpreter::mcrxr, {"mcrxr", OpType::System, FL_SET_CRn | FL_READ_CA | FL_SET_CA, 1, 0, 0, 0}}, - {595, Interpreter::mfsr, {"mfsr", OpType::System, FL_OUT_D, 3, 0, 0, 0}}, - {659, Interpreter::mfsrin, {"mfsrin", OpType::System, FL_OUT_D | FL_IN_B, 3, 0, 0, 0}}, + {595, Interpreter::mfsr, {"mfsr", OpType::System, FL_OUT_D | FL_PROGRAMEXCEPTION, 3, 0, 0, 0}}, + {659, Interpreter::mfsrin, {"mfsrin", OpType::System, FL_OUT_D | FL_IN_B | FL_PROGRAMEXCEPTION, 3, 0, 0, 0}}, {4, Interpreter::tw, {"tw", OpType::System, FL_IN_AB | FL_ENDBLOCK, 2, 0, 0, 0}}, {598, Interpreter::sync, {"sync", OpType::System, 0, 3, 0, 0, 0}}, @@ -299,8 +299,8 @@ static std::array table31 = {310, Interpreter::eciwx, {"eciwx", OpType::System, FL_IN_A0B | FL_OUT_D | FL_LOADSTORE, 1, 0, 0, 0}}, {438, Interpreter::ecowx, {"ecowx", OpType::System, FL_IN_A0B | FL_IN_S | FL_LOADSTORE, 1, 0, 0, 0}}, {854, Interpreter::eieio, {"eieio", OpType::System, 0, 1, 0, 0, 0}}, - {306, Interpreter::tlbie, {"tlbie", OpType::System, FL_IN_B, 1, 0, 0, 0}}, - {566, Interpreter::tlbsync, {"tlbsync", OpType::System, 0, 1, 0, 0, 0}}, + {306, Interpreter::tlbie, {"tlbie", OpType::System, FL_IN_B | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, + {566, Interpreter::tlbsync, {"tlbsync", OpType::System, FL_PROGRAMEXCEPTION, 1, 0, 0, 0}}, }}; static std::array table59 = diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 05c1c76486..5e436cb13e 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -1104,8 +1104,8 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) // output, which needs to be bound in the actual instruction compilation. // TODO: make this smarter in the case that we're actually register-starved, i.e. // prioritize the more important registers. - gpr.PreloadRegisters(op.regsIn & op.gprInReg); - fpr.PreloadRegisters(op.fregsIn & op.fprInXmm); + gpr.PreloadRegisters(op.regsIn & op.gprInUse & ~op.gprDiscardable); + fpr.PreloadRegisters(op.fregsIn & op.fprInXmm & ~op.fprDiscardable); } CompileInstruction(op); @@ -1151,7 +1151,12 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) gpr.Commit(); fpr.Commit(); - // If we have a register that will never be used again, flush it. + // If we have a register that will never be used again, discard or flush it. + if (!SConfig::GetInstance().bJITRegisterCacheOff) + { + gpr.Discard(op.gprDiscardable); + fpr.Discard(op.fprDiscardable); + } gpr.Flush(~op.gprInUse); fpr.Flush(~op.fprInUse); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 184b3c82ef..d2294ba313 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -298,11 +298,11 @@ void Jit64::reg_imm(UGeckoInstruction inst) { case 14: // addi // occasionally used as MOV - emulate, with immediate propagation - if (gpr.IsImm(a) && d != a && a != 0) + if (a != 0 && d != a && gpr.IsImm(a)) { gpr.SetImmediate32(d, gpr.Imm32(a) + (u32)(s32)inst.SIMM_16); } - else if (inst.SIMM_16 == 0 && d != a && a != 0) + else if (a != 0 && d != a && inst.SIMM_16 == 0) { RCOpArg Ra = gpr.Use(a, RCMode::Read); RCX64Reg Rd = gpr.Bind(d, RCMode::Write); diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/CachedReg.h b/Source/Core/Core/PowerPC/Jit64/RegCache/CachedReg.h index 3c7d5b4912..da851ea887 100644 --- a/Source/Core/Core/PowerPC/Jit64/RegCache/CachedReg.h +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/CachedReg.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include "Common/Assert.h" #include "Common/CommonTypes.h" @@ -20,6 +21,8 @@ public: { /// Value is currently at its default location Default, + /// Value is not stored anywhere because we know it won't be read before the next write + Discarded, /// Value is currently bound to a x64 register Bound, /// Value is known as an immediate and has not been written back to its default location @@ -35,26 +38,30 @@ public: { } - const Gen::OpArg& Location() const { return location; } + const std::optional& Location() const { return location; } LocationType GetLocationType() const { + if (!location.has_value()) + return LocationType::Discarded; + if (!away) { ASSERT(!revertable); - if (location.IsImm()) + if (location->IsImm()) return LocationType::SpeculativeImmediate; ASSERT(location == default_location); return LocationType::Default; } - ASSERT(location.IsImm() || location.IsSimpleReg()); - return location.IsImm() ? LocationType::Immediate : LocationType::Bound; + ASSERT(location->IsImm() || location->IsSimpleReg()); + return location->IsImm() ? LocationType::Immediate : LocationType::Bound; } bool IsAway() const { return away; } + bool IsDiscarded() const { return !location.has_value(); } bool IsBound() const { return GetLocationType() == LocationType::Bound; } void SetBoundTo(Gen::X64Reg xreg) @@ -63,6 +70,13 @@ public: location = Gen::R(xreg); } + void SetDiscarded() + { + ASSERT(!revertable); + away = false; + location = std::nullopt; + } + void SetFlushed() { ASSERT(!revertable); @@ -104,7 +118,7 @@ public: private: Gen::OpArg default_location{}; - Gen::OpArg location{}; + std::optional location{}; bool away = false; // value not in source register bool revertable = false; size_t locked = 0; @@ -122,7 +136,7 @@ public: dirty = dirty_; } - void SetFlushed() + void Unbind() { ppcReg = static_cast(Gen::INVALID_REG); free = true; diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp b/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp index ffa1c3aff2..cb919f5b3f 100644 --- a/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp @@ -16,12 +16,14 @@ FPURegCache::FPURegCache(Jit64& jit) : RegCache{jit} void FPURegCache::StoreRegister(preg_t preg, const OpArg& new_loc) { - m_emitter->MOVAPD(new_loc, m_regs[preg].Location().GetSimpleReg()); + ASSERT_MSG(DYNA_REC, m_regs[preg].IsBound(), "Unbound register - %zu", preg); + m_emitter->MOVAPD(new_loc, m_regs[preg].Location()->GetSimpleReg()); } void FPURegCache::LoadRegister(preg_t preg, X64Reg new_loc) { - m_emitter->MOVAPD(new_loc, m_regs[preg].Location()); + ASSERT_MSG(DYNA_REC, !m_regs[preg].IsDiscarded(), "Discarded register - %zu", preg); + m_emitter->MOVAPD(new_loc, m_regs[preg].Location().value()); } const X64Reg* FPURegCache::GetAllocationOrder(size_t* count) const diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.cpp index 1ee5a7f47e..4363c60153 100644 --- a/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.cpp @@ -16,12 +16,14 @@ GPRRegCache::GPRRegCache(Jit64& jit) : RegCache{jit} void GPRRegCache::StoreRegister(preg_t preg, const OpArg& new_loc) { - m_emitter->MOV(32, new_loc, m_regs[preg].Location()); + ASSERT_MSG(DYNA_REC, !m_regs[preg].IsDiscarded(), "Discarded register - %zu", preg); + m_emitter->MOV(32, new_loc, m_regs[preg].Location().value()); } void GPRRegCache::LoadRegister(preg_t preg, X64Reg new_loc) { - m_emitter->MOV(32, ::Gen::R(new_loc), m_regs[preg].Location()); + ASSERT_MSG(DYNA_REC, !m_regs[preg].IsDiscarded(), "Discarded register - %zu", preg); + m_emitter->MOV(32, ::Gen::R(new_loc), m_regs[preg].Location().value()); } OpArg GPRRegCache::GetDefaultLocation(preg_t preg) const @@ -56,7 +58,7 @@ void GPRRegCache::SetImmediate32(preg_t preg, u32 imm_value, bool dirty) BitSet32 GPRRegCache::GetRegUtilization() const { - return m_jit.js.op->gprInReg; + return m_jit.js.op->gprInUse; } BitSet32 GPRRegCache::CountRegsIn(preg_t preg, u32 lookahead) const diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.cpp index 52f9ae6802..b9ac55f210 100644 --- a/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.cpp @@ -314,6 +314,7 @@ bool RegCache::SanityCheck() const switch (m_regs[i].GetLocationType()) { case PPCCachedReg::LocationType::Default: + case PPCCachedReg::LocationType::Discarded: case PPCCachedReg::LocationType::SpeculativeImmediate: case PPCCachedReg::LocationType::Immediate: break; @@ -322,7 +323,7 @@ bool RegCache::SanityCheck() const if (m_regs[i].IsLocked() || m_regs[i].IsRevertable()) return false; - Gen::X64Reg xr = m_regs[i].Location().GetSimpleReg(); + Gen::X64Reg xr = m_regs[i].Location()->GetSimpleReg(); if (m_xregs[xr].IsLocked()) return false; if (m_xregs[xr].Contents() != i) @@ -380,6 +381,29 @@ RCForkGuard RegCache::Fork() return RCForkGuard{*this}; } +void RegCache::Discard(BitSet32 pregs) +{ + ASSERT_MSG( + DYNA_REC, + std::none_of(m_xregs.begin(), m_xregs.end(), [](const auto& x) { return x.IsLocked(); }), + "Someone forgot to unlock a X64 reg"); + + for (preg_t i : pregs) + { + ASSERT_MSG(DYNA_REC, !m_regs[i].IsLocked(), + "Someone forgot to unlock PPC reg %zu (X64 reg %i).", i, RX(i)); + ASSERT_MSG(DYNA_REC, !m_regs[i].IsRevertable(), "Register transaction is in progress!"); + + if (m_regs[i].IsBound()) + { + X64Reg xr = RX(i); + m_xregs[xr].Unbind(); + } + + m_regs[i].SetDiscarded(); + } +} + void RegCache::Flush(BitSet32 pregs) { ASSERT_MSG( @@ -396,6 +420,7 @@ void RegCache::Flush(BitSet32 pregs) switch (m_regs[i].GetLocationType()) { case PPCCachedReg::LocationType::Default: + case PPCCachedReg::LocationType::Discarded: break; case PPCCachedReg::LocationType::SpeculativeImmediate: // We can have a cached value without a host register through speculative constants. @@ -474,8 +499,8 @@ void RegCache::DiscardRegContentsIfCached(preg_t preg) { if (m_regs[preg].IsBound()) { - X64Reg xr = m_regs[preg].Location().GetSimpleReg(); - m_xregs[xr].SetFlushed(); + X64Reg xr = m_regs[preg].Location()->GetSimpleReg(); + m_xregs[xr].Unbind(); m_regs[preg].SetFlushed(); } } @@ -494,12 +519,15 @@ void RegCache::BindToRegister(preg_t i, bool doLoad, bool makeDirty) if (doLoad) { + ASSERT_MSG(DYNA_REC, !m_regs[i].IsDiscarded(), "Attempted to load a discarded value"); LoadRegister(i, xr); } ASSERT_MSG(DYNA_REC, std::none_of(m_regs.begin(), m_regs.end(), - [xr](const auto& r) { return r.Location().IsSimpleReg(xr); }), + [xr](const auto& r) { + return r.Location().has_value() && r.Location()->IsSimpleReg(xr); + }), "Xreg %i already bound", xr); m_regs[i].SetBoundTo(xr); @@ -525,6 +553,7 @@ void RegCache::StoreFromRegister(preg_t i, FlushMode mode) switch (m_regs[i].GetLocationType()) { case PPCCachedReg::LocationType::Default: + case PPCCachedReg::LocationType::Discarded: case PPCCachedReg::LocationType::SpeculativeImmediate: return; case PPCCachedReg::LocationType::Bound: @@ -532,7 +561,7 @@ void RegCache::StoreFromRegister(preg_t i, FlushMode mode) X64Reg xr = RX(i); doStore = m_xregs[xr].IsDirty(); if (mode == FlushMode::Full) - m_xregs[xr].SetFlushed(); + m_xregs[xr].Unbind(); break; } case PPCCachedReg::LocationType::Immediate: @@ -635,13 +664,14 @@ float RegCache::ScoreRegister(X64Reg xreg) const const OpArg& RegCache::R(preg_t preg) const { - return m_regs[preg].Location(); + ASSERT_MSG(DYNA_REC, !m_regs[preg].IsDiscarded(), "Discarded register - %zu", preg); + return m_regs[preg].Location().value(); } X64Reg RegCache::RX(preg_t preg) const { ASSERT_MSG(DYNA_REC, m_regs[preg].IsBound(), "Unbound register - %zu", preg); - return m_regs[preg].Location().GetSimpleReg(); + return m_regs[preg].Location()->GetSimpleReg(); } void RegCache::Lock(preg_t preg) @@ -707,6 +737,7 @@ void RegCache::Realize(preg_t preg) } m_constraints[preg].Realized(RCConstraint::RealizedLoc::Mem); return; + case PPCCachedReg::LocationType::Discarded: case PPCCachedReg::LocationType::Bound: do_bind(); return; diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.h b/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.h index dae0e8d046..291e18018f 100644 --- a/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.h +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.h @@ -169,6 +169,7 @@ public: RCX64Reg Scratch(Gen::X64Reg xr); RCForkGuard Fork(); + void Discard(BitSet32 pregs); void Flush(BitSet32 pregs = BitSet32::AllTrue(32)); void Revert(); void Commit(); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 2d9758b248..5bc25d4be5 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -834,7 +834,12 @@ void JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) if (!CanMergeNextInstructions(1) || js.op[1].opinfo->type != ::OpType::Integer) FlushCarry(); - // If we have a register that will never be used again, flush it. + // If we have a register that will never be used again, discard or flush it. + if (!SConfig::GetInstance().bJITRegisterCacheOff) + { + gpr.DiscardRegisters(op.gprDiscardable); + fpr.DiscardRegisters(op.fprDiscardable); + } gpr.StoreRegisters(~op.gprInUse); fpr.StoreRegisters(~op.fprInUse); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index 1b26efe31b..5ce941da21 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -24,6 +24,12 @@ void Arm64RegCache::Init(ARM64XEmitter* emitter) GetAllocationOrder(); } +void Arm64RegCache::DiscardRegisters(BitSet32 regs) +{ + for (int j : regs) + DiscardRegister(j); +} + ARM64Reg Arm64RegCache::GetReg() { // If we have no registers left, dump the most stale register first @@ -96,8 +102,8 @@ void Arm64RegCache::FlushMostStaleRegister() const auto& reg = m_guest_registers[i]; const u32 last_used = reg.GetLastUsed(); - if (last_used > most_stale_amount && - (reg.GetType() != RegType::NotLoaded && reg.GetType() != RegType::Immediate)) + if (last_used > most_stale_amount && reg.GetType() != RegType::NotLoaded && + reg.GetType() != RegType::Discarded && reg.GetType() != RegType::Immediate) { most_stale_preg = i; most_stale_amount = last_used; @@ -107,6 +113,16 @@ void Arm64RegCache::FlushMostStaleRegister() FlushRegister(most_stale_preg, false); } +void Arm64RegCache::DiscardRegister(size_t preg) +{ + OpArg& reg = m_guest_registers[preg]; + ARM64Reg host_reg = reg.GetReg(); + + reg.Discard(); + if (host_reg != ARM64Reg::INVALID_REG) + UnlockRegister(host_reg); +} + // GPR Cache constexpr size_t GUEST_GPR_COUNT = 32; constexpr size_t GUEST_CR_COUNT = 8; @@ -284,6 +300,9 @@ ARM64Reg Arm64GPRCache::R(const GuestRegInfo& guest_reg) return host_reg; } break; + case RegType::Discarded: + ASSERT_MSG(DYNA_REC, false, "Attempted to read discarded register"); + break; case RegType::NotLoaded: // Register isn't loaded at /all/ { // This is a bit annoying. We try to keep these preloaded as much as possible @@ -318,14 +337,18 @@ void Arm64GPRCache::BindToRegister(const GuestRegInfo& guest_reg, bool do_load) const size_t bitsize = guest_reg.bitsize; reg.ResetLastUsed(); - reg.SetDirty(true); - if (reg.GetType() == RegType::NotLoaded) + + const RegType reg_type = reg.GetType(); + if (reg_type == RegType::NotLoaded || reg_type == RegType::Discarded) { const ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg()); reg.Load(host_reg); if (do_load) + { + ASSERT_MSG(DYNA_REC, reg_type != RegType::Discarded, "Attempted to load a discarded value"); m_emit->LDR(IndexType::Unsigned, host_reg, PPC_REG, u32(guest_reg.ppc_offset)); + } } } @@ -407,10 +430,9 @@ void Arm64FPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op) { const RegType reg_type = m_guest_registers[i].GetType(); - if (reg_type != RegType::NotLoaded && reg_type != RegType::Immediate) + if (reg_type != RegType::NotLoaded && reg_type != RegType::Discarded && + reg_type != RegType::Immediate) { - // XXX: Determine if we can keep a register in the lower 64bits - // Which will allow it to be callee saved. FlushRegister(i, mode == FlushMode::MaintainState); } } @@ -497,6 +519,9 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type) } return host_reg; } + case RegType::Discarded: + ASSERT_MSG(DYNA_REC, false, "Attempted to read discarded register"); + break; case RegType::NotLoaded: // Register isn't loaded at /all/ { host_reg = GetReg(); @@ -536,7 +561,7 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type) reg.SetDirty(true); // If not loaded at all, just alloc a new one. - if (reg.GetType() == RegType::NotLoaded) + if (reg.GetType() == RegType::NotLoaded || reg.GetType() == RegType::Discarded) { reg.Load(GetReg(), type); return reg.GetReg(); @@ -637,8 +662,8 @@ void Arm64FPRCache::FlushByHost(ARM64Reg host_reg) const OpArg& reg = m_guest_registers[i]; const RegType reg_type = reg.GetType(); - if ((reg_type != RegType::NotLoaded && reg_type != RegType::Immediate) && - reg.GetReg() == host_reg) + if (reg_type != RegType::NotLoaded && reg_type != RegType::Discarded && + reg_type != RegType::Immediate && reg.GetReg() == host_reg) { FlushRegister(i, false); return; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index aba4213a43..ca7917e197 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -47,6 +47,7 @@ static_assert(PPCSTATE_OFF(xer_so_ov) < 4096, "STRB can't store xer_so_ov!"); enum class RegType { NotLoaded, + Discarded, // Reg is not loaded because we know it won't be read before the next write Register, // Reg type is register Immediate, // Reg is really a IMM LowerPair, // Only the lower pair of a paired register @@ -86,6 +87,15 @@ public: m_reg = Arm64Gen::ARM64Reg::INVALID_REG; } + void Discard() + { + // Invalidate any previous information + m_type = RegType::Discarded; + m_reg = Arm64Gen::ARM64Reg::INVALID_REG; + + // Arbitrarily large value that won't roll over on a lot of increments + m_last_used = 0xFFFF; + } void Flush() { // Invalidate any previous information @@ -143,6 +153,7 @@ public: void Init(Arm64Gen::ARM64XEmitter* emitter); virtual void Start(PPCAnalyst::BlockRegStats& stats) {} + void DiscardRegisters(BitSet32 regs); // Flushes the register cache in different ways depending on the mode virtual void Flush(FlushMode mode, PPCAnalyst::CodeOp* op) = 0; @@ -194,6 +205,7 @@ protected: // Flushes a guest register by host provided virtual void FlushByHost(Arm64Gen::ARM64Reg host_reg) = 0; + void DiscardRegister(size_t preg); virtual void FlushRegister(size_t preg, bool maintain_state) = 0; // Get available host registers diff --git a/Source/Core/Core/PowerPC/PPCAnalyst.cpp b/Source/Core/Core/PowerPC/PPCAnalyst.cpp index aa715f8052..833f5f476a 100644 --- a/Source/Core/Core/PowerPC/PPCAnalyst.cpp +++ b/Source/Core/Core/PowerPC/PPCAnalyst.cpp @@ -551,6 +551,10 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock* block, CodeOp* code, const Gekk code->outputFPRF = (opinfo->flags & FL_SET_FPRF) != 0; code->canEndBlock = (opinfo->flags & FL_ENDBLOCK) != 0; + // TODO: Is it possible to determine that some FPU instructions never cause exceptions? + code->canCauseException = + (opinfo->flags & (FL_LOADSTORE | FL_USE_FPU | FL_PROGRAMEXCEPTION)) != 0; + code->wantsCA = (opinfo->flags & FL_READ_CA) != 0; code->outputCA = (opinfo->flags & FL_SET_CA) != 0; @@ -916,7 +920,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, std: // Scan for flag dependencies; assume the next block (or any branch that can leave the block) // wants flags, to be safe. bool wantsCR0 = true, wantsCR1 = true, wantsFPRF = true, wantsCA = true; - BitSet32 fprInUse, gprInUse, gprInReg, fprInXmm; + BitSet32 fprInUse, gprInUse, gprDiscardable, fprDiscardable, fprInXmm; for (int i = block->m_num_instructions - 1; i >= 0; i--) { CodeOp& op = code[i]; @@ -939,21 +943,26 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, std: wantsCA &= !op.outputCA || opWantsCA; op.gprInUse = gprInUse; op.fprInUse = fprInUse; - op.gprInReg = gprInReg; + op.gprDiscardable = gprDiscardable; + op.fprDiscardable = fprDiscardable; op.fprInXmm = fprInXmm; - // TODO: if there's no possible endblocks or exceptions in between, tell the regcache - // we can throw away a register if it's going to be overwritten later. gprInUse |= op.regsIn; - gprInReg |= op.regsIn; fprInUse |= op.fregsIn; + if (op.canEndBlock || op.canCauseException) + { + gprDiscardable = BitSet32{}; + fprDiscardable = BitSet32{}; + } + else + { + gprDiscardable |= op.regsOut; + gprDiscardable &= ~op.regsIn; + if (op.fregOut >= 0) + fprDiscardable[op.fregOut] = true; + fprDiscardable &= ~op.fregsIn; + } if (strncmp(op.opinfo->opname, "stfd", 4)) fprInXmm |= op.fregsIn; - // For now, we need to count output registers as "used" though; otherwise the flush - // will result in a redundant store (e.g. store to regcache, then store again to - // the same location later). - gprInUse |= op.regsOut; - if (op.fregOut >= 0) - fprInUse[op.fregOut] = true; } // Forward scan, for flags that need the other direction for calculation. diff --git a/Source/Core/Core/PowerPC/PPCAnalyst.h b/Source/Core/Core/PowerPC/PPCAnalyst.h index 6aad1f8430..3b82618978 100644 --- a/Source/Core/Core/PowerPC/PPCAnalyst.h +++ b/Source/Core/Core/PowerPC/PPCAnalyst.h @@ -45,13 +45,15 @@ struct CodeOp // 16B bool outputFPRF; bool outputCA; bool canEndBlock; + bool canCauseException; bool skipLRStack; bool skip; // followed BL-s for example // which registers are still needed after this instruction in this block BitSet32 fprInUse; BitSet32 gprInUse; - // just because a register is in use doesn't mean we actually need or want it in an x86 register. - BitSet32 gprInReg; + // which registers have values which are known to be unused after this instruction + BitSet32 gprDiscardable; + BitSet32 fprDiscardable; // we do double stores from GPRs, so we don't want to load a PowerPC floating point register into // an XMM only to move it again to a GPR afterwards. BitSet32 fprInXmm; diff --git a/Source/Core/Core/PowerPC/PPCTables.h b/Source/Core/Core/PowerPC/PPCTables.h index 6acb98cd36..4c21d27506 100644 --- a/Source/Core/Core/PowerPC/PPCTables.h +++ b/Source/Core/Core/PowerPC/PPCTables.h @@ -65,6 +65,7 @@ enum InstructionFlags : u64 FL_IN_FLOAT_C_BITEXACT = (1ull << 31), // The output is based on the exact bits in frC. FL_IN_FLOAT_AB_BITEXACT = FL_IN_FLOAT_A_BITEXACT | FL_IN_FLOAT_B_BITEXACT, FL_IN_FLOAT_BC_BITEXACT = FL_IN_FLOAT_B_BITEXACT | FL_IN_FLOAT_C_BITEXACT, + FL_PROGRAMEXCEPTION = (1ull << 32), // May generate a system exception. }; enum class OpType