Merge pull request #9351 from JosJuice/discard-registers

Jits: Discard registers which we know will be overwritten
2021-04-06 04:40:26 -04:00 · 2021-04-06 04:40:26 -04:00 · 99d43362e6
parent ccc99ebfe3 62ce1c7653
commit 99d43362e6
14 changed files with 241 additions and 131 deletions
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Tables.cpp
@ -85,51 +85,51 @@ static std::array<GekkoOPTemplate, 54> primarytable =
 	{54, Interpreter::stfd,         {"stfd",  OpType::StoreFP, FL_IN_FLOAT_S | FL_IN_A0 | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}},
 	{55, Interpreter::stfdu,        {"stfdu", OpType::StoreFP, FL_IN_FLOAT_S | FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}},

-	{56, Interpreter::psq_l,        {"psq_l",   OpType::LoadPS, FL_OUT_FLOAT_D | FL_IN_A0 | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}},
-	{57, Interpreter::psq_lu,       {"psq_lu",  OpType::LoadPS, FL_OUT_FLOAT_D | FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}},
-	{60, Interpreter::psq_st,       {"psq_st",  OpType::StorePS, FL_IN_FLOAT_S | FL_IN_A0 | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}},
-	{61, Interpreter::psq_stu,      {"psq_stu", OpType::StorePS, FL_IN_FLOAT_S | FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}},
+	{56, Interpreter::psq_l,        {"psq_l",   OpType::LoadPS, FL_OUT_FLOAT_D | FL_IN_A0 | FL_USE_FPU | FL_LOADSTORE | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{57, Interpreter::psq_lu,       {"psq_lu",  OpType::LoadPS, FL_OUT_FLOAT_D | FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{60, Interpreter::psq_st,       {"psq_st",  OpType::StorePS, FL_IN_FLOAT_S | FL_IN_A0 | FL_USE_FPU | FL_LOADSTORE | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{61, Interpreter::psq_stu,      {"psq_stu", OpType::StorePS, FL_IN_FLOAT_S | FL_OUT_A | FL_IN_A | FL_USE_FPU | FL_LOADSTORE | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},

 	//missing: 0, 1, 2, 5, 6, 9, 22, 30, 62, 58
 }};

 static std::array<GekkoOPTemplate, 13> table4 =
 {{    //SUBOP10
-	{0,    Interpreter::ps_cmpu0,   {"ps_cmpu0",   OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF, 1, 0, 0, 0}},
-	{32,   Interpreter::ps_cmpo0,   {"ps_cmpo0",   OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF, 1, 0, 0, 0}},
-	{40,   Interpreter::ps_neg,     {"ps_neg",     OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
-	{136,  Interpreter::ps_nabs,    {"ps_nabs",    OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
-	{264,  Interpreter::ps_abs,     {"ps_abs",     OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
-	{64,   Interpreter::ps_cmpu1,   {"ps_cmpu1",   OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF, 1, 0, 0, 0}},
-	{72,   Interpreter::ps_mr,      {"ps_mr",      OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
-	{96,   Interpreter::ps_cmpo1,   {"ps_cmpo1",   OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF, 1, 0, 0, 0}},
-	{528,  Interpreter::ps_merge00, {"ps_merge00", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_IN_FLOAT_AB_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
-	{560,  Interpreter::ps_merge01, {"ps_merge01", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_IN_FLOAT_AB_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
-	{592,  Interpreter::ps_merge10, {"ps_merge10", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_IN_FLOAT_AB_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
-	{624,  Interpreter::ps_merge11, {"ps_merge11", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_IN_FLOAT_AB_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
+	{0,    Interpreter::ps_cmpu0,   {"ps_cmpu0",   OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{32,   Interpreter::ps_cmpo0,   {"ps_cmpo0",   OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{40,   Interpreter::ps_neg,     {"ps_neg",     OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{136,  Interpreter::ps_nabs,    {"ps_nabs",    OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{264,  Interpreter::ps_abs,     {"ps_abs",     OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{64,   Interpreter::ps_cmpu1,   {"ps_cmpu1",   OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{72,   Interpreter::ps_mr,      {"ps_mr",      OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{96,   Interpreter::ps_cmpo1,   {"ps_cmpo1",   OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{528,  Interpreter::ps_merge00, {"ps_merge00", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_IN_FLOAT_AB_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{560,  Interpreter::ps_merge01, {"ps_merge01", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_IN_FLOAT_AB_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{592,  Interpreter::ps_merge10, {"ps_merge10", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_IN_FLOAT_AB_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{624,  Interpreter::ps_merge11, {"ps_merge11", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_IN_FLOAT_AB_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},

-	{1014, Interpreter::dcbz_l,     {"dcbz_l",     OpType::System, FL_IN_A0B | FL_LOADSTORE, 1, 0, 0, 0}},
+	{1014, Interpreter::dcbz_l,     {"dcbz_l",     OpType::System, FL_IN_A0B | FL_LOADSTORE | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
 }};

 static std::array<GekkoOPTemplate, 17> table4_2 =
 {{
-	{10, Interpreter::ps_sum0,      {"ps_sum0",   OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
-	{11, Interpreter::ps_sum1,      {"ps_sum1",   OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
-	{12, Interpreter::ps_muls0,     {"ps_muls0",  OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
-	{13, Interpreter::ps_muls1,     {"ps_muls1",  OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
-	{14, Interpreter::ps_madds0,    {"ps_madds0", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
-	{15, Interpreter::ps_madds1,    {"ps_madds1", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
-	{18, Interpreter::ps_div,       {"ps_div",    OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 17, 0, 0, 0}},
-	{20, Interpreter::ps_sub,       {"ps_sub",    OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
-	{21, Interpreter::ps_add,       {"ps_add",    OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
-	{23, Interpreter::ps_sel,       {"ps_sel",    OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_IN_FLOAT_BC_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
-	{24, Interpreter::ps_res,       {"ps_res",    OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
-	{25, Interpreter::ps_mul,       {"ps_mul",    OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
-	{26, Interpreter::ps_rsqrte,    {"ps_rsqrte", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 2, 0, 0, 0}},
-	{28, Interpreter::ps_msub,      {"ps_msub",   OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
-	{29, Interpreter::ps_madd,      {"ps_madd",   OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
-	{30, Interpreter::ps_nmsub,     {"ps_nmsub",  OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
-	{31, Interpreter::ps_nmadd,     {"ps_nmadd",  OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
+	{10, Interpreter::ps_sum0,      {"ps_sum0",   OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{11, Interpreter::ps_sum1,      {"ps_sum1",   OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{12, Interpreter::ps_muls0,     {"ps_muls0",  OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{13, Interpreter::ps_muls1,     {"ps_muls1",  OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{14, Interpreter::ps_madds0,    {"ps_madds0", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{15, Interpreter::ps_madds1,    {"ps_madds1", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{18, Interpreter::ps_div,       {"ps_div",    OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 17, 0, 0, 0}},
+	{20, Interpreter::ps_sub,       {"ps_sub",    OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{21, Interpreter::ps_add,       {"ps_add",    OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{23, Interpreter::ps_sel,       {"ps_sel",    OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_IN_FLOAT_BC_BITEXACT | FL_RC_BIT_F | FL_USE_FPU | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{24, Interpreter::ps_res,       {"ps_res",    OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{25, Interpreter::ps_mul,       {"ps_mul",    OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{26, Interpreter::ps_rsqrte,    {"ps_rsqrte", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 2, 0, 0, 0}},
+	{28, Interpreter::ps_msub,      {"ps_msub",   OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{29, Interpreter::ps_madd,      {"ps_madd",   OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{30, Interpreter::ps_nmsub,     {"ps_nmsub",  OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{31, Interpreter::ps_nmadd,     {"ps_nmadd",  OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
 }};


@ -157,7 +157,7 @@ static std::array<GekkoOPTemplate, 13> table19 =
 	{150, Interpreter::isync,       {"isync",  OpType::InstructionCache, FL_EVIL, 1, 0, 0, 0}},
 	{0,   Interpreter::mcrf,        {"mcrf",   OpType::System, FL_EVIL | FL_SET_CRn, 1, 0, 0, 0}},

-	{50,  Interpreter::rfi,         {"rfi",    OpType::System, FL_ENDBLOCK | FL_CHECKEXCEPTIONS, 2, 0, 0, 0}},
+	{50,  Interpreter::rfi,         {"rfi",    OpType::System, FL_ENDBLOCK | FL_CHECKEXCEPTIONS | FL_PROGRAMEXCEPTION, 2, 0, 0, 0}},
 }};

 static std::array<GekkoOPTemplate, 107> table31 =
@ -215,7 +215,7 @@ static std::array<GekkoOPTemplate, 107> table31 =
 	{86,   Interpreter::dcbf,       {"dcbf",   OpType::DataCache, FL_IN_A0B | FL_LOADSTORE, 5, 0, 0, 0}},
 	{246,  Interpreter::dcbtst,     {"dcbtst", OpType::DataCache, 0, 2, 0, 0, 0}},
 	{278,  Interpreter::dcbt,       {"dcbt",   OpType::DataCache, 0, 2, 0, 0, 0}},
-	{470,  Interpreter::dcbi,       {"dcbi",   OpType::DataCache, FL_IN_A0B | FL_LOADSTORE, 5, 0, 0, 0}},
+	{470,  Interpreter::dcbi,       {"dcbi",   OpType::DataCache, FL_IN_A0B | FL_LOADSTORE | FL_PROGRAMEXCEPTION, 5, 0, 0, 0}},
 	{758,  Interpreter::dcba,       {"dcba",   OpType::DataCache, 0, 5, 0, 0, 0}},
 	{1014, Interpreter::dcbz,       {"dcbz",   OpType::DataCache, FL_IN_A0B | FL_LOADSTORE, 5, 0, 0, 0}},

@ -279,17 +279,17 @@ static std::array<GekkoOPTemplate, 107> table31 =
 	{983, Interpreter::stfiwx,      {"stfiwx", OpType::StoreFP, FL_IN_FLOAT_S | FL_IN_A0B | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}},

 	{19,  Interpreter::mfcr,        {"mfcr",   OpType::System, FL_OUT_D, 1, 0, 0, 0}},
-	{83,  Interpreter::mfmsr,       {"mfmsr",  OpType::System, FL_OUT_D, 1, 0, 0, 0}},
+	{83,  Interpreter::mfmsr,       {"mfmsr",  OpType::System, FL_OUT_D | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
 	{144, Interpreter::mtcrf,       {"mtcrf",  OpType::System, FL_IN_S | FL_SET_CRn, 1, 0, 0, 0}},
-	{146, Interpreter::mtmsr,       {"mtmsr",  OpType::System, FL_IN_S | FL_ENDBLOCK, 1, 0, 0, 0}},
-	{210, Interpreter::mtsr,        {"mtsr",   OpType::System, FL_IN_S, 1, 0, 0, 0}},
-	{242, Interpreter::mtsrin,      {"mtsrin", OpType::System, FL_IN_SB, 1, 0, 0, 0}},
-	{339, Interpreter::mfspr,       {"mfspr",  OpType::SPR, FL_OUT_D, 1, 0, 0, 0}},
-	{467, Interpreter::mtspr,       {"mtspr",  OpType::SPR, FL_IN_S, 2, 0, 0, 0}},
-	{371, Interpreter::mftb,        {"mftb",   OpType::System, FL_OUT_D | FL_TIMER, 1, 0, 0, 0}},
+	{146, Interpreter::mtmsr,       {"mtmsr",  OpType::System, FL_IN_S | FL_ENDBLOCK | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{210, Interpreter::mtsr,        {"mtsr",   OpType::System, FL_IN_S | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{242, Interpreter::mtsrin,      {"mtsrin", OpType::System, FL_IN_SB | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{339, Interpreter::mfspr,       {"mfspr",  OpType::SPR, FL_OUT_D | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{467, Interpreter::mtspr,       {"mtspr",  OpType::SPR, FL_IN_S | FL_PROGRAMEXCEPTION, 2, 0, 0, 0}},
+	{371, Interpreter::mftb,        {"mftb",   OpType::System, FL_OUT_D | FL_TIMER | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
 	{512, Interpreter::mcrxr,       {"mcrxr",  OpType::System, FL_SET_CRn | FL_READ_CA | FL_SET_CA, 1, 0, 0, 0}},
-	{595, Interpreter::mfsr,        {"mfsr",   OpType::System, FL_OUT_D, 3, 0, 0, 0}},
-	{659, Interpreter::mfsrin,      {"mfsrin", OpType::System, FL_OUT_D | FL_IN_B, 3, 0, 0, 0}},
+	{595, Interpreter::mfsr,        {"mfsr",   OpType::System, FL_OUT_D | FL_PROGRAMEXCEPTION, 3, 0, 0, 0}},
+	{659, Interpreter::mfsrin,      {"mfsrin", OpType::System, FL_OUT_D | FL_IN_B | FL_PROGRAMEXCEPTION, 3, 0, 0, 0}},

 	{4,   Interpreter::tw,          {"tw",     OpType::System, FL_IN_AB | FL_ENDBLOCK, 2, 0, 0, 0}},
 	{598, Interpreter::sync,        {"sync",   OpType::System, 0, 3, 0, 0, 0}},
@ -299,8 +299,8 @@ static std::array<GekkoOPTemplate, 107> table31 =
 	{310, Interpreter::eciwx,       {"eciwx",   OpType::System, FL_IN_A0B | FL_OUT_D | FL_LOADSTORE, 1, 0, 0, 0}},
 	{438, Interpreter::ecowx,       {"ecowx",   OpType::System, FL_IN_A0B | FL_IN_S | FL_LOADSTORE, 1, 0, 0, 0}},
 	{854, Interpreter::eieio,       {"eieio",   OpType::System, 0, 1, 0, 0, 0}},
-	{306, Interpreter::tlbie,       {"tlbie",   OpType::System, FL_IN_B, 1, 0, 0, 0}},
-	{566, Interpreter::tlbsync,     {"tlbsync", OpType::System, 0, 1, 0, 0, 0}},
+	{306, Interpreter::tlbie,       {"tlbie",   OpType::System, FL_IN_B | FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
+	{566, Interpreter::tlbsync,     {"tlbsync", OpType::System, FL_PROGRAMEXCEPTION, 1, 0, 0, 0}},
 }};

 static std::array<GekkoOPTemplate, 9> table59 =
--- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp
@ -1104,8 +1104,8 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
        // output, which needs to be bound in the actual instruction compilation.
        // TODO: make this smarter in the case that we're actually register-starved, i.e.
        // prioritize the more important registers.
-        gpr.PreloadRegisters(op.regsIn & op.gprInReg);
-        fpr.PreloadRegisters(op.fregsIn & op.fprInXmm);
+        gpr.PreloadRegisters(op.regsIn & op.gprInUse & ~op.gprDiscardable);
+        fpr.PreloadRegisters(op.fregsIn & op.fprInXmm & ~op.fprDiscardable);
      }

      CompileInstruction(op);
@ -1151,7 +1151,12 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
      gpr.Commit();
      fpr.Commit();

-      // If we have a register that will never be used again, flush it.
+      // If we have a register that will never be used again, discard or flush it.
+      if (!SConfig::GetInstance().bJITRegisterCacheOff)
+      {
+        gpr.Discard(op.gprDiscardable);
+        fpr.Discard(op.fprDiscardable);
+      }
      gpr.Flush(~op.gprInUse);
      fpr.Flush(~op.fprInUse);

--- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
@ -298,11 +298,11 @@ void Jit64::reg_imm(UGeckoInstruction inst)
  {
  case 14:  // addi
    // occasionally used as MOV - emulate, with immediate propagation
-    if (gpr.IsImm(a) && d != a && a != 0)
+    if (a != 0 && d != a && gpr.IsImm(a))
    {
      gpr.SetImmediate32(d, gpr.Imm32(a) + (u32)(s32)inst.SIMM_16);
    }
-    else if (inst.SIMM_16 == 0 && d != a && a != 0)
+    else if (a != 0 && d != a && inst.SIMM_16 == 0)
    {
      RCOpArg Ra = gpr.Use(a, RCMode::Read);
      RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
--- a/Source/Core/Core/PowerPC/Jit64/RegCache/CachedReg.h
+++ b/Source/Core/Core/PowerPC/Jit64/RegCache/CachedReg.h
@ -5,6 +5,7 @@
 #pragma once

 #include <cstddef>
+#include <optional>

 #include "Common/Assert.h"
 #include "Common/CommonTypes.h"
@ -20,6 +21,8 @@ public:
  {
    /// Value is currently at its default location
    Default,
+    /// Value is not stored anywhere because we know it won't be read before the next write
+    Discarded,
    /// Value is currently bound to a x64 register
    Bound,
    /// Value is known as an immediate and has not been written back to its default location
@ -35,26 +38,30 @@ public:
  {
  }

-  const Gen::OpArg& Location() const { return location; }
+  const std::optional<Gen::OpArg>& Location() const { return location; }

  LocationType GetLocationType() const
  {
+    if (!location.has_value())
+      return LocationType::Discarded;
+
    if (!away)
    {
      ASSERT(!revertable);

-      if (location.IsImm())
+      if (location->IsImm())
        return LocationType::SpeculativeImmediate;

      ASSERT(location == default_location);
      return LocationType::Default;
    }

-    ASSERT(location.IsImm() || location.IsSimpleReg());
-    return location.IsImm() ? LocationType::Immediate : LocationType::Bound;
+    ASSERT(location->IsImm() || location->IsSimpleReg());
+    return location->IsImm() ? LocationType::Immediate : LocationType::Bound;
  }

  bool IsAway() const { return away; }
+  bool IsDiscarded() const { return !location.has_value(); }
  bool IsBound() const { return GetLocationType() == LocationType::Bound; }

  void SetBoundTo(Gen::X64Reg xreg)
@ -63,6 +70,13 @@ public:
    location = Gen::R(xreg);
  }

+  void SetDiscarded()
+  {
+    ASSERT(!revertable);
+    away = false;
+    location = std::nullopt;
+  }
+
  void SetFlushed()
  {
    ASSERT(!revertable);
@ -104,7 +118,7 @@ public:

 private:
  Gen::OpArg default_location{};
-  Gen::OpArg location{};
+  std::optional<Gen::OpArg> location{};
  bool away = false;  // value not in source register
  bool revertable = false;
  size_t locked = 0;
@ -122,7 +136,7 @@ public:
    dirty = dirty_;
  }

-  void SetFlushed()
+  void Unbind()
  {
    ppcReg = static_cast<preg_t>(Gen::INVALID_REG);
    free = true;
--- a/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp
@ -16,12 +16,14 @@ FPURegCache::FPURegCache(Jit64& jit) : RegCache{jit}

 void FPURegCache::StoreRegister(preg_t preg, const OpArg& new_loc)
 {
-  m_emitter->MOVAPD(new_loc, m_regs[preg].Location().GetSimpleReg());
+  ASSERT_MSG(DYNA_REC, m_regs[preg].IsBound(), "Unbound register - %zu", preg);
+  m_emitter->MOVAPD(new_loc, m_regs[preg].Location()->GetSimpleReg());
 }

 void FPURegCache::LoadRegister(preg_t preg, X64Reg new_loc)
 {
-  m_emitter->MOVAPD(new_loc, m_regs[preg].Location());
+  ASSERT_MSG(DYNA_REC, !m_regs[preg].IsDiscarded(), "Discarded register - %zu", preg);
+  m_emitter->MOVAPD(new_loc, m_regs[preg].Location().value());
 }

 const X64Reg* FPURegCache::GetAllocationOrder(size_t* count) const
--- a/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.cpp
@ -16,12 +16,14 @@ GPRRegCache::GPRRegCache(Jit64& jit) : RegCache{jit}

 void GPRRegCache::StoreRegister(preg_t preg, const OpArg& new_loc)
 {
-  m_emitter->MOV(32, new_loc, m_regs[preg].Location());
+  ASSERT_MSG(DYNA_REC, !m_regs[preg].IsDiscarded(), "Discarded register - %zu", preg);
+  m_emitter->MOV(32, new_loc, m_regs[preg].Location().value());
 }

 void GPRRegCache::LoadRegister(preg_t preg, X64Reg new_loc)
 {
-  m_emitter->MOV(32, ::Gen::R(new_loc), m_regs[preg].Location());
+  ASSERT_MSG(DYNA_REC, !m_regs[preg].IsDiscarded(), "Discarded register - %zu", preg);
+  m_emitter->MOV(32, ::Gen::R(new_loc), m_regs[preg].Location().value());
 }

 OpArg GPRRegCache::GetDefaultLocation(preg_t preg) const
@ -56,7 +58,7 @@ void GPRRegCache::SetImmediate32(preg_t preg, u32 imm_value, bool dirty)

 BitSet32 GPRRegCache::GetRegUtilization() const
 {
-  return m_jit.js.op->gprInReg;
+  return m_jit.js.op->gprInUse;
 }

 BitSet32 GPRRegCache::CountRegsIn(preg_t preg, u32 lookahead) const
--- a/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.cpp
@ -314,6 +314,7 @@ bool RegCache::SanityCheck() const
    switch (m_regs[i].GetLocationType())
    {
    case PPCCachedReg::LocationType::Default:
+    case PPCCachedReg::LocationType::Discarded:
    case PPCCachedReg::LocationType::SpeculativeImmediate:
    case PPCCachedReg::LocationType::Immediate:
      break;
@ -322,7 +323,7 @@ bool RegCache::SanityCheck() const
      if (m_regs[i].IsLocked() || m_regs[i].IsRevertable())
        return false;

-      Gen::X64Reg xr = m_regs[i].Location().GetSimpleReg();
+      Gen::X64Reg xr = m_regs[i].Location()->GetSimpleReg();
      if (m_xregs[xr].IsLocked())
        return false;
      if (m_xregs[xr].Contents() != i)
@ -380,6 +381,29 @@ RCForkGuard RegCache::Fork()
  return RCForkGuard{*this};
 }

+void RegCache::Discard(BitSet32 pregs)
+{
+  ASSERT_MSG(
+      DYNA_REC,
+      std::none_of(m_xregs.begin(), m_xregs.end(), [](const auto& x) { return x.IsLocked(); }),
+      "Someone forgot to unlock a X64 reg");
+
+  for (preg_t i : pregs)
+  {
+    ASSERT_MSG(DYNA_REC, !m_regs[i].IsLocked(),
+               "Someone forgot to unlock PPC reg %zu (X64 reg %i).", i, RX(i));
+    ASSERT_MSG(DYNA_REC, !m_regs[i].IsRevertable(), "Register transaction is in progress!");
+
+    if (m_regs[i].IsBound())
+    {
+      X64Reg xr = RX(i);
+      m_xregs[xr].Unbind();
+    }
+
+    m_regs[i].SetDiscarded();
+  }
+}
+
 void RegCache::Flush(BitSet32 pregs)
 {
  ASSERT_MSG(
@ -396,6 +420,7 @@ void RegCache::Flush(BitSet32 pregs)
    switch (m_regs[i].GetLocationType())
    {
    case PPCCachedReg::LocationType::Default:
+    case PPCCachedReg::LocationType::Discarded:
      break;
    case PPCCachedReg::LocationType::SpeculativeImmediate:
      // We can have a cached value without a host register through speculative constants.
@ -474,8 +499,8 @@ void RegCache::DiscardRegContentsIfCached(preg_t preg)
 {
  if (m_regs[preg].IsBound())
  {
-    X64Reg xr = m_regs[preg].Location().GetSimpleReg();
-    m_xregs[xr].SetFlushed();
+    X64Reg xr = m_regs[preg].Location()->GetSimpleReg();
+    m_xregs[xr].Unbind();
    m_regs[preg].SetFlushed();
  }
 }
@ -494,12 +519,15 @@ void RegCache::BindToRegister(preg_t i, bool doLoad, bool makeDirty)

    if (doLoad)
    {
+      ASSERT_MSG(DYNA_REC, !m_regs[i].IsDiscarded(), "Attempted to load a discarded value");
      LoadRegister(i, xr);
    }

    ASSERT_MSG(DYNA_REC,
               std::none_of(m_regs.begin(), m_regs.end(),
-                            [xr](const auto& r) { return r.Location().IsSimpleReg(xr); }),
+                            [xr](const auto& r) {
+                              return r.Location().has_value() && r.Location()->IsSimpleReg(xr);
+                            }),
               "Xreg %i already bound", xr);

    m_regs[i].SetBoundTo(xr);
@ -525,6 +553,7 @@ void RegCache::StoreFromRegister(preg_t i, FlushMode mode)
  switch (m_regs[i].GetLocationType())
  {
  case PPCCachedReg::LocationType::Default:
+  case PPCCachedReg::LocationType::Discarded:
  case PPCCachedReg::LocationType::SpeculativeImmediate:
    return;
  case PPCCachedReg::LocationType::Bound:
@ -532,7 +561,7 @@ void RegCache::StoreFromRegister(preg_t i, FlushMode mode)
    X64Reg xr = RX(i);
    doStore = m_xregs[xr].IsDirty();
    if (mode == FlushMode::Full)
-      m_xregs[xr].SetFlushed();
+      m_xregs[xr].Unbind();
    break;
  }
  case PPCCachedReg::LocationType::Immediate:
@ -635,13 +664,14 @@ float RegCache::ScoreRegister(X64Reg xreg) const

 const OpArg& RegCache::R(preg_t preg) const
 {
-  return m_regs[preg].Location();
+  ASSERT_MSG(DYNA_REC, !m_regs[preg].IsDiscarded(), "Discarded register - %zu", preg);
+  return m_regs[preg].Location().value();
 }

 X64Reg RegCache::RX(preg_t preg) const
 {
  ASSERT_MSG(DYNA_REC, m_regs[preg].IsBound(), "Unbound register - %zu", preg);
-  return m_regs[preg].Location().GetSimpleReg();
+  return m_regs[preg].Location()->GetSimpleReg();
 }

 void RegCache::Lock(preg_t preg)
@ -707,6 +737,7 @@ void RegCache::Realize(preg_t preg)
    }
    m_constraints[preg].Realized(RCConstraint::RealizedLoc::Mem);
    return;
+  case PPCCachedReg::LocationType::Discarded:
  case PPCCachedReg::LocationType::Bound:
    do_bind();
    return;
--- a/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.h
+++ b/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.h
@ -169,6 +169,7 @@ public:
  RCX64Reg Scratch(Gen::X64Reg xr);

  RCForkGuard Fork();
+  void Discard(BitSet32 pregs);
  void Flush(BitSet32 pregs = BitSet32::AllTrue(32));
  void Revert();
  void Commit();
--- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp
@ -828,7 +828,12 @@ void JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
      if (!CanMergeNextInstructions(1) || js.op[1].opinfo->type != ::OpType::Integer)
        FlushCarry();

-      // If we have a register that will never be used again, flush it.
+      // If we have a register that will never be used again, discard or flush it.
+      if (!SConfig::GetInstance().bJITRegisterCacheOff)
+      {
+        gpr.DiscardRegisters(op.gprDiscardable);
+        fpr.DiscardRegisters(op.fprDiscardable);
+      }
      gpr.StoreRegisters(~op.gprInUse);
      fpr.StoreRegisters(~op.fprInUse);

--- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp
@ -24,6 +24,12 @@ void Arm64RegCache::Init(ARM64XEmitter* emitter)
  GetAllocationOrder();
 }

+void Arm64RegCache::DiscardRegisters(BitSet32 regs)
+{
+  for (int j : regs)
+    DiscardRegister(j);
+}
+
 ARM64Reg Arm64RegCache::GetReg()
 {
  // If we have no registers left, dump the most stale register first
@ -96,8 +102,8 @@ void Arm64RegCache::FlushMostStaleRegister()
    const auto& reg = m_guest_registers[i];
    const u32 last_used = reg.GetLastUsed();

-    if (last_used > most_stale_amount &&
-        (reg.GetType() != RegType::NotLoaded && reg.GetType() != RegType::Immediate))
+    if (last_used > most_stale_amount && reg.GetType() != RegType::NotLoaded &&
+        reg.GetType() != RegType::Discarded && reg.GetType() != RegType::Immediate)
    {
      most_stale_preg = i;
      most_stale_amount = last_used;
@ -107,6 +113,16 @@ void Arm64RegCache::FlushMostStaleRegister()
  FlushRegister(most_stale_preg, false);
 }

+void Arm64RegCache::DiscardRegister(size_t preg)
+{
+  OpArg& reg = m_guest_registers[preg];
+  ARM64Reg host_reg = reg.GetReg();
+
+  reg.Discard();
+  if (host_reg != ARM64Reg::INVALID_REG)
+    UnlockRegister(host_reg);
+}
+
 // GPR Cache
 constexpr size_t GUEST_GPR_COUNT = 32;
 constexpr size_t GUEST_CR_COUNT = 8;
@ -284,6 +300,9 @@ ARM64Reg Arm64GPRCache::R(const GuestRegInfo& guest_reg)
    return host_reg;
  }
  break;
+  case RegType::Discarded:
+    ASSERT_MSG(DYNA_REC, false, "Attempted to read discarded register");
+    break;
  case RegType::NotLoaded:  // Register isn't loaded at /all/
  {
    // This is a bit annoying. We try to keep these preloaded as much as possible
@ -318,14 +337,18 @@ void Arm64GPRCache::BindToRegister(const GuestRegInfo& guest_reg, bool do_load)
  const size_t bitsize = guest_reg.bitsize;

  reg.ResetLastUsed();
-
  reg.SetDirty(true);
-  if (reg.GetType() == RegType::NotLoaded)
+
+  const RegType reg_type = reg.GetType();
+  if (reg_type == RegType::NotLoaded || reg_type == RegType::Discarded)
  {
    const ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg());
    reg.Load(host_reg);
    if (do_load)
+    {
+      ASSERT_MSG(DYNA_REC, reg_type != RegType::Discarded, "Attempted to load a discarded value");
      m_emit->LDR(IndexType::Unsigned, host_reg, PPC_REG, u32(guest_reg.ppc_offset));
+    }
  }
 }

@ -407,10 +430,9 @@ void Arm64FPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op)
  {
    const RegType reg_type = m_guest_registers[i].GetType();

-    if (reg_type != RegType::NotLoaded && reg_type != RegType::Immediate)
+    if (reg_type != RegType::NotLoaded && reg_type != RegType::Discarded &&
+        reg_type != RegType::Immediate)
    {
-      // XXX: Determine if we can keep a register in the lower 64bits
-      // Which will allow it to be callee saved.
      FlushRegister(i, mode == FlushMode::MaintainState);
    }
  }
@ -497,6 +519,9 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type)
    }
    return host_reg;
  }
+  case RegType::Discarded:
+    ASSERT_MSG(DYNA_REC, false, "Attempted to read discarded register");
+    break;
  case RegType::NotLoaded:  // Register isn't loaded at /all/
  {
    host_reg = GetReg();
@ -536,7 +561,7 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type)
  reg.SetDirty(true);

  // If not loaded at all, just alloc a new one.
-  if (reg.GetType() == RegType::NotLoaded)
+  if (reg.GetType() == RegType::NotLoaded || reg.GetType() == RegType::Discarded)
  {
    reg.Load(GetReg(), type);
    return reg.GetReg();
@ -637,8 +662,8 @@ void Arm64FPRCache::FlushByHost(ARM64Reg host_reg)
    const OpArg& reg = m_guest_registers[i];
    const RegType reg_type = reg.GetType();

-    if ((reg_type != RegType::NotLoaded && reg_type != RegType::Immediate) &&
-        reg.GetReg() == host_reg)
+    if (reg_type != RegType::NotLoaded && reg_type != RegType::Discarded &&
+        reg_type != RegType::Immediate && reg.GetReg() == host_reg)
    {
      FlushRegister(i, false);
      return;
--- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h
+++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h
@ -47,6 +47,7 @@ static_assert(PPCSTATE_OFF(xer_so_ov) < 4096, "STRB can't store xer_so_ov!");
 enum class RegType
 {
  NotLoaded,
+  Discarded,   // Reg is not loaded because we know it won't be read before the next write
  Register,    // Reg type is register
  Immediate,   // Reg is really a IMM
  LowerPair,   // Only the lower pair of a paired register
@ -86,6 +87,15 @@ public:

    m_reg = Arm64Gen::ARM64Reg::INVALID_REG;
  }
+  void Discard()
+  {
+    // Invalidate any previous information
+    m_type = RegType::Discarded;
+    m_reg = Arm64Gen::ARM64Reg::INVALID_REG;
+
+    // Arbitrarily large value that won't roll over on a lot of increments
+    m_last_used = 0xFFFF;
+  }
  void Flush()
  {
    // Invalidate any previous information
@ -143,6 +153,7 @@ public:
  void Init(Arm64Gen::ARM64XEmitter* emitter);

  virtual void Start(PPCAnalyst::BlockRegStats& stats) {}
+  void DiscardRegisters(BitSet32 regs);
  // Flushes the register cache in different ways depending on the mode
  virtual void Flush(FlushMode mode, PPCAnalyst::CodeOp* op) = 0;

@ -194,6 +205,7 @@ protected:
  // Flushes a guest register by host provided
  virtual void FlushByHost(Arm64Gen::ARM64Reg host_reg) = 0;

+  void DiscardRegister(size_t preg);
  virtual void FlushRegister(size_t preg, bool maintain_state) = 0;

  // Get available host registers
--- a/Source/Core/Core/PowerPC/PPCAnalyst.cpp
+++ b/Source/Core/Core/PowerPC/PPCAnalyst.cpp
@ -196,8 +196,8 @@ static bool CanSwapAdjacentOps(const CodeOp& a, const CodeOp& b)
 {
  const GekkoOPInfo* a_info = a.opinfo;
  const GekkoOPInfo* b_info = b.opinfo;
-  int a_flags = a_info->flags;
-  int b_flags = b_info->flags;
+  u64 a_flags = a_info->flags;
+  u64 b_flags = b_info->flags;

  // can't reorder around breakpoints
  if (SConfig::GetInstance().bEnableDebugging &&
@ -551,6 +551,10 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock* block, CodeOp* code, const Gekk
  code->outputFPRF = (opinfo->flags & FL_SET_FPRF) != 0;
  code->canEndBlock = (opinfo->flags & FL_ENDBLOCK) != 0;

+  // TODO: Is it possible to determine that some FPU instructions never cause exceptions?
+  code->canCauseException =
+      (opinfo->flags & (FL_LOADSTORE | FL_USE_FPU | FL_PROGRAMEXCEPTION)) != 0;
+
  code->wantsCA = (opinfo->flags & FL_READ_CA) != 0;
  code->outputCA = (opinfo->flags & FL_SET_CA) != 0;

@ -916,7 +920,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, std:
  // Scan for flag dependencies; assume the next block (or any branch that can leave the block)
  // wants flags, to be safe.
  bool wantsCR0 = true, wantsCR1 = true, wantsFPRF = true, wantsCA = true;
-  BitSet32 fprInUse, gprInUse, gprInReg, fprInXmm;
+  BitSet32 fprInUse, gprInUse, gprDiscardable, fprDiscardable, fprInXmm;
  for (int i = block->m_num_instructions - 1; i >= 0; i--)
  {
    CodeOp& op = code[i];
@ -939,21 +943,26 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, std:
    wantsCA &= !op.outputCA || opWantsCA;
    op.gprInUse = gprInUse;
    op.fprInUse = fprInUse;
-    op.gprInReg = gprInReg;
+    op.gprDiscardable = gprDiscardable;
+    op.fprDiscardable = fprDiscardable;
    op.fprInXmm = fprInXmm;
-    // TODO: if there's no possible endblocks or exceptions in between, tell the regcache
-    // we can throw away a register if it's going to be overwritten later.
    gprInUse |= op.regsIn;
-    gprInReg |= op.regsIn;
    fprInUse |= op.fregsIn;
+    if (op.canEndBlock || op.canCauseException)
+    {
+      gprDiscardable = BitSet32{};
+      fprDiscardable = BitSet32{};
+    }
+    else
+    {
+      gprDiscardable |= op.regsOut;
+      gprDiscardable &= ~op.regsIn;
+      if (op.fregOut >= 0)
+        fprDiscardable[op.fregOut] = true;
+      fprDiscardable &= ~op.fregsIn;
+    }
    if (strncmp(op.opinfo->opname, "stfd", 4))
      fprInXmm |= op.fregsIn;
-    // For now, we need to count output registers as "used" though; otherwise the flush
-    // will result in a redundant store (e.g. store to regcache, then store again to
-    // the same location later).
-    gprInUse |= op.regsOut;
-    if (op.fregOut >= 0)
-      fprInUse[op.fregOut] = true;
  }

  // Forward scan, for flags that need the other direction for calculation.
--- a/Source/Core/Core/PowerPC/PPCAnalyst.h
+++ b/Source/Core/Core/PowerPC/PPCAnalyst.h
@ -45,13 +45,15 @@ struct CodeOp  // 16B
  bool outputFPRF;
  bool outputCA;
  bool canEndBlock;
+  bool canCauseException;
  bool skipLRStack;
  bool skip;  // followed BL-s for example
  // which registers are still needed after this instruction in this block
  BitSet32 fprInUse;
  BitSet32 gprInUse;
-  // just because a register is in use doesn't mean we actually need or want it in an x86 register.
-  BitSet32 gprInReg;
+  // which registers have values which are known to be unused after this instruction
+  BitSet32 gprDiscardable;
+  BitSet32 fprDiscardable;
  // we do double stores from GPRs, so we don't want to load a PowerPC floating point register into
  // an XMM only to move it again to a GPR afterwards.
  BitSet32 fprInXmm;
--- a/Source/Core/Core/PowerPC/PPCTables.h
+++ b/Source/Core/Core/PowerPC/PPCTables.h
@ -7,63 +7,65 @@
 #include <array>
 #include <cstddef>

+#include "Common/CommonTypes.h"
 #include "Core/PowerPC/Gekko.h"
 #include "Core/PowerPC/Interpreter/Interpreter.h"

 // Flags that indicate what an instruction can do.
-enum
+enum InstructionFlags : u64
 {
-  FL_SET_CR0 = (1 << 0),  // Sets CR0.
-  FL_SET_CR1 = (1 << 1),  // Sets CR1.
-  FL_SET_CRn = (1 << 2),  // Encoding decides which CR can be set.
+  FL_SET_CR0 = (1ull << 0),  // Sets CR0.
+  FL_SET_CR1 = (1ull << 1),  // Sets CR1.
+  FL_SET_CRn = (1ull << 2),  // Encoding decides which CR can be set.
  FL_SET_CRx = FL_SET_CR0 | FL_SET_CR1 | FL_SET_CRn,
-  FL_SET_CA = (1 << 3),   // Sets the carry flag.
-  FL_READ_CA = (1 << 4),  // Reads the carry flag.
-  FL_RC_BIT = (1 << 5),   // Sets the record bit.
+  FL_SET_CA = (1ull << 3),   // Sets the carry flag.
+  FL_READ_CA = (1ull << 4),  // Reads the carry flag.
+  FL_RC_BIT = (1ull << 5),   // Sets the record bit.
  FL_RC_BIT_F =
-      (1 << 6),  // Sets the record bit. Used for floating point instructions that do this.
+      (1ull << 6),  // Sets the record bit. Used for floating point instructions that do this.
  FL_ENDBLOCK =
-      (1 << 7),  // Specifies that the instruction can be used as an exit point for a JIT block.
-  FL_IN_A = (1 << 8),   // Uses rA as an input.
-  FL_IN_A0 = (1 << 9),  // Uses rA as an input. Indicates that if rA is zero, the value zero is
-                        // used, not the contents of r0.
-  FL_IN_B = (1 << 10),  // Uses rB as an input.
-  FL_IN_C = (1 << 11),  // Uses rC as an input.
-  FL_IN_S = (1 << 12),  // Uses rS as an input.
+      (1ull << 7),  // Specifies that the instruction can be used as an exit point for a JIT block.
+  FL_IN_A = (1ull << 8),   // Uses rA as an input.
+  FL_IN_A0 = (1ull << 9),  // Uses rA as an input. Indicates that if rA is zero, the value zero is
+                           // used, not the contents of r0.
+  FL_IN_B = (1ull << 10),  // Uses rB as an input.
+  FL_IN_C = (1ull << 11),  // Uses rC as an input.
+  FL_IN_S = (1ull << 12),  // Uses rS as an input.
  FL_IN_AB = FL_IN_A | FL_IN_B,
  FL_IN_AC = FL_IN_A | FL_IN_C,
  FL_IN_ABC = FL_IN_A | FL_IN_B | FL_IN_C,
  FL_IN_SB = FL_IN_S | FL_IN_B,
  FL_IN_A0B = FL_IN_A0 | FL_IN_B,
  FL_IN_A0BC = FL_IN_A0 | FL_IN_B | FL_IN_C,
-  FL_OUT_D = (1 << 13),  // rD is used as a destination.
-  FL_OUT_A = (1 << 14),  // rA is used as a destination.
+  FL_OUT_D = (1ull << 13),  // rD is used as a destination.
+  FL_OUT_A = (1ull << 14),  // rA is used as a destination.
  FL_OUT_AD = FL_OUT_A | FL_OUT_D,
-  FL_TIMER = (1 << 15),            // Used only for mftb.
-  FL_CHECKEXCEPTIONS = (1 << 16),  // Used with rfi/rfid.
+  FL_TIMER = (1ull << 15),            // Used only for mftb.
+  FL_CHECKEXCEPTIONS = (1ull << 16),  // Used with rfi/rfid.
  FL_EVIL =
-      (1 << 17),  // Historically used to refer to instructions that messed up Super Monkey Ball.
-  FL_USE_FPU = (1 << 18),     // Used to indicate a floating point instruction.
-  FL_LOADSTORE = (1 << 19),   // Used to indicate a load/store instruction.
-  FL_SET_FPRF = (1 << 20),    // Sets bits in the FPRF.
-  FL_READ_FPRF = (1 << 21),   // Reads bits from the FPRF.
-  FL_SET_OE = (1 << 22),      // Sets the overflow flag.
-  FL_IN_FLOAT_A = (1 << 23),  // frA is used as an input.
-  FL_IN_FLOAT_B = (1 << 24),  // frB is used as an input.
-  FL_IN_FLOAT_C = (1 << 25),  // frC is used as an input.
-  FL_IN_FLOAT_S = (1 << 26),  // frS is used as an input.
-  FL_IN_FLOAT_D = (1 << 27),  // frD is used as an input.
+      (1ull << 17),  // Historically used to refer to instructions that messed up Super Monkey Ball.
+  FL_USE_FPU = (1ull << 18),     // Used to indicate a floating point instruction.
+  FL_LOADSTORE = (1ull << 19),   // Used to indicate a load/store instruction.
+  FL_SET_FPRF = (1ull << 20),    // Sets bits in the FPRF.
+  FL_READ_FPRF = (1ull << 21),   // Reads bits from the FPRF.
+  FL_SET_OE = (1ull << 22),      // Sets the overflow flag.
+  FL_IN_FLOAT_A = (1ull << 23),  // frA is used as an input.
+  FL_IN_FLOAT_B = (1ull << 24),  // frB is used as an input.
+  FL_IN_FLOAT_C = (1ull << 25),  // frC is used as an input.
+  FL_IN_FLOAT_S = (1ull << 26),  // frS is used as an input.
+  FL_IN_FLOAT_D = (1ull << 27),  // frD is used as an input.
  FL_IN_FLOAT_AB = FL_IN_FLOAT_A | FL_IN_FLOAT_B,
  FL_IN_FLOAT_AC = FL_IN_FLOAT_A | FL_IN_FLOAT_C,
  FL_IN_FLOAT_ABC = FL_IN_FLOAT_A | FL_IN_FLOAT_B | FL_IN_FLOAT_C,
-  FL_OUT_FLOAT_D = (1 << 28),  // frD is used as a destination.
+  FL_OUT_FLOAT_D = (1ull << 28),  // frD is used as a destination.
  // Used in the case of double ops (they don't modify the top half of the output)
  FL_INOUT_FLOAT_D = FL_IN_FLOAT_D | FL_OUT_FLOAT_D,
-  FL_IN_FLOAT_A_BITEXACT = (1 << 29),  // The output is based on the exact bits in frA.
-  FL_IN_FLOAT_B_BITEXACT = (1 << 30),  // The output is based on the exact bits in frB.
-  FL_IN_FLOAT_C_BITEXACT = (1 << 31),  // The output is based on the exact bits in frC.
+  FL_IN_FLOAT_A_BITEXACT = (1ull << 29),  // The output is based on the exact bits in frA.
+  FL_IN_FLOAT_B_BITEXACT = (1ull << 30),  // The output is based on the exact bits in frB.
+  FL_IN_FLOAT_C_BITEXACT = (1ull << 31),  // The output is based on the exact bits in frC.
  FL_IN_FLOAT_AB_BITEXACT = FL_IN_FLOAT_A_BITEXACT | FL_IN_FLOAT_B_BITEXACT,
  FL_IN_FLOAT_BC_BITEXACT = FL_IN_FLOAT_B_BITEXACT | FL_IN_FLOAT_C_BITEXACT,
+  FL_PROGRAMEXCEPTION = (1ull << 32),  // May generate a system exception.
 };

 enum class OpType
@ -94,7 +96,7 @@ struct GekkoOPInfo
 {
  const char* opname;
  OpType type;
-  int flags;
+  u64 flags;
  int numCycles;
  u64 runCount;
  int compileCount;