Merge pull request #843 from FioraAeterna/fprf
JIT: Initial FPRF support
This commit is contained in:
commit
0217fb2008
|
@ -383,6 +383,9 @@ union UReg_MSR
|
||||||
UReg_MSR() { Hex = 0; }
|
UReg_MSR() { Hex = 0; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define FPRF_SHIFT 12
|
||||||
|
#define FPRF_MASK (0x1F << FPRF_SHIFT)
|
||||||
|
|
||||||
// Floating Point Status and Control Register
|
// Floating Point Status and Control Register
|
||||||
union UReg_FPSCR
|
union UReg_FPSCR
|
||||||
{
|
{
|
||||||
|
|
|
@ -97,14 +97,14 @@ static GekkoOPTemplate primarytable[] =
|
||||||
|
|
||||||
static GekkoOPTemplate table4[] =
|
static GekkoOPTemplate table4[] =
|
||||||
{ //SUBOP10
|
{ //SUBOP10
|
||||||
{0, Interpreter::ps_cmpu0, {"ps_cmpu0", OPTYPE_PS, FL_SET_CRn | FL_USE_FPU, 1, 0, 0, 0}},
|
{0, Interpreter::ps_cmpu0, {"ps_cmpu0", OPTYPE_PS, FL_SET_CRn | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{32, Interpreter::ps_cmpo0, {"ps_cmpo0", OPTYPE_PS, FL_SET_CRn | FL_USE_FPU, 1, 0, 0, 0}},
|
{32, Interpreter::ps_cmpo0, {"ps_cmpo0", OPTYPE_PS, FL_SET_CRn | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{40, Interpreter::ps_neg, {"ps_neg", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
|
{40, Interpreter::ps_neg, {"ps_neg", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
{136, Interpreter::ps_nabs, {"ps_nabs", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
|
{136, Interpreter::ps_nabs, {"ps_nabs", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
{264, Interpreter::ps_abs, {"ps_abs", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
|
{264, Interpreter::ps_abs, {"ps_abs", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
{64, Interpreter::ps_cmpu1, {"ps_cmpu1", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
|
{64, Interpreter::ps_cmpu1, {"ps_cmpu1", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{72, Interpreter::ps_mr, {"ps_mr", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
|
{72, Interpreter::ps_mr, {"ps_mr", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
{96, Interpreter::ps_cmpo1, {"ps_cmpo1", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
|
{96, Interpreter::ps_cmpo1, {"ps_cmpo1", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{528, Interpreter::ps_merge00, {"ps_merge00", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
|
{528, Interpreter::ps_merge00, {"ps_merge00", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
{560, Interpreter::ps_merge01, {"ps_merge01", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
|
{560, Interpreter::ps_merge01, {"ps_merge01", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
{592, Interpreter::ps_merge10, {"ps_merge10", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
|
{592, Interpreter::ps_merge10, {"ps_merge10", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
|
@ -115,23 +115,23 @@ static GekkoOPTemplate table4[] =
|
||||||
|
|
||||||
static GekkoOPTemplate table4_2[] =
|
static GekkoOPTemplate table4_2[] =
|
||||||
{
|
{
|
||||||
{10, Interpreter::ps_sum0, {"ps_sum0", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
{10, Interpreter::ps_sum0, {"ps_sum0", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{11, Interpreter::ps_sum1, {"ps_sum1", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
{11, Interpreter::ps_sum1, {"ps_sum1", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{12, Interpreter::ps_muls0, {"ps_muls0", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
{12, Interpreter::ps_muls0, {"ps_muls0", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{13, Interpreter::ps_muls1, {"ps_muls1", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
{13, Interpreter::ps_muls1, {"ps_muls1", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{14, Interpreter::ps_madds0, {"ps_madds0", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
{14, Interpreter::ps_madds0, {"ps_madds0", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{15, Interpreter::ps_madds1, {"ps_madds1", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
{15, Interpreter::ps_madds1, {"ps_madds1", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{18, Interpreter::ps_div, {"ps_div", OPTYPE_PS, FL_USE_FPU, 17, 0, 0, 0}},
|
{18, Interpreter::ps_div, {"ps_div", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 17, 0, 0, 0}},
|
||||||
{20, Interpreter::ps_sub, {"ps_sub", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
{20, Interpreter::ps_sub, {"ps_sub", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{21, Interpreter::ps_add, {"ps_add", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
{21, Interpreter::ps_add, {"ps_add", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{23, Interpreter::ps_sel, {"ps_sel", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
{23, Interpreter::ps_sel, {"ps_sel", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
{24, Interpreter::ps_res, {"ps_res", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
{24, Interpreter::ps_res, {"ps_res", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{25, Interpreter::ps_mul, {"ps_mul", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
{25, Interpreter::ps_mul, {"ps_mul", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{26, Interpreter::ps_rsqrte, {"ps_rsqrte", OPTYPE_PS, FL_USE_FPU, 2, 0, 0, 0}},
|
{26, Interpreter::ps_rsqrte, {"ps_rsqrte", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 2, 0, 0, 0}},
|
||||||
{28, Interpreter::ps_msub, {"ps_msub", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
{28, Interpreter::ps_msub, {"ps_msub", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{29, Interpreter::ps_madd, {"ps_madd", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
{29, Interpreter::ps_madd, {"ps_madd", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{30, Interpreter::ps_nmsub, {"ps_nmsub", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
{30, Interpreter::ps_nmsub, {"ps_nmsub", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{31, Interpreter::ps_nmadd, {"ps_nmadd", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
{31, Interpreter::ps_nmadd, {"ps_nmadd", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -307,51 +307,51 @@ static GekkoOPTemplate table31_2[] =
|
||||||
|
|
||||||
static GekkoOPTemplate table59[] =
|
static GekkoOPTemplate table59[] =
|
||||||
{
|
{
|
||||||
{18, Interpreter::fdivsx, {"fdivsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 17, 0, 0, 0}}, // TODO
|
{18, Interpreter::fdivsx, {"fdivsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 17, 0, 0, 0}}, // TODO
|
||||||
{20, Interpreter::fsubsx, {"fsubsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{20, Interpreter::fsubsx, {"fsubsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{21, Interpreter::faddsx, {"faddsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{21, Interpreter::faddsx, {"faddsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
//{22, Interpreter::fsqrtsx, {"fsqrtsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, // Not implemented on gekko
|
//{22, Interpreter::fsqrtsx, {"fsqrtsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, // Not implemented on gekko
|
||||||
{24, Interpreter::fresx, {"fresx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{24, Interpreter::fresx, {"fresx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{25, Interpreter::fmulsx, {"fmulsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{25, Interpreter::fmulsx, {"fmulsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{28, Interpreter::fmsubsx, {"fmsubsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{28, Interpreter::fmsubsx, {"fmsubsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{29, Interpreter::fmaddsx, {"fmaddsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{29, Interpreter::fmaddsx, {"fmaddsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{30, Interpreter::fnmsubsx, {"fnmsubsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{30, Interpreter::fnmsubsx, {"fnmsubsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{31, Interpreter::fnmaddsx, {"fnmaddsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{31, Interpreter::fnmaddsx, {"fnmaddsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
};
|
};
|
||||||
|
|
||||||
static GekkoOPTemplate table63[] =
|
static GekkoOPTemplate table63[] =
|
||||||
{
|
{
|
||||||
{264, Interpreter::fabsx, {"fabsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{264, Interpreter::fabsx, {"fabsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
{32, Interpreter::fcmpo, {"fcmpo", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{32, Interpreter::fcmpo, {"fcmpo", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{0, Interpreter::fcmpu, {"fcmpu", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{0, Interpreter::fcmpu, {"fcmpu", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{14, Interpreter::fctiwx, {"fctiwx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{14, Interpreter::fctiwx, {"fctiwx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
{15, Interpreter::fctiwzx, {"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{15, Interpreter::fctiwzx, {"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
{72, Interpreter::fmrx, {"fmrx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{72, Interpreter::fmrx, {"fmrx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
{136, Interpreter::fnabsx, {"fnabsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{136, Interpreter::fnabsx, {"fnabsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
{40, Interpreter::fnegx, {"fnegx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{40, Interpreter::fnegx, {"fnegx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
{12, Interpreter::frspx, {"frspx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{12, Interpreter::frspx, {"frspx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
|
|
||||||
{64, Interpreter::mcrfs, {"mcrfs", OPTYPE_SYSTEMFP, FL_USE_FPU, 1, 0, 0, 0}},
|
{64, Interpreter::mcrfs, {"mcrfs", OPTYPE_SYSTEMFP, FL_USE_FPU | FL_READ_FPRF, 1, 0, 0, 0}},
|
||||||
{583, Interpreter::mffsx, {"mffsx", OPTYPE_SYSTEMFP, FL_USE_FPU, 1, 0, 0, 0}},
|
{583, Interpreter::mffsx, {"mffsx", OPTYPE_SYSTEMFP, FL_USE_FPU | FL_READ_FPRF, 1, 0, 0, 0}},
|
||||||
{70, Interpreter::mtfsb0x, {"mtfsb0x", OPTYPE_SYSTEMFP, FL_USE_FPU, 3, 0, 0, 0}},
|
{70, Interpreter::mtfsb0x, {"mtfsb0x", OPTYPE_SYSTEMFP, FL_USE_FPU | FL_READ_FPRF, 3, 0, 0, 0}},
|
||||||
{38, Interpreter::mtfsb1x, {"mtfsb1x", OPTYPE_SYSTEMFP, FL_USE_FPU, 3, 0, 0, 0}},
|
{38, Interpreter::mtfsb1x, {"mtfsb1x", OPTYPE_SYSTEMFP, FL_USE_FPU | FL_READ_FPRF, 3, 0, 0, 0}},
|
||||||
{134, Interpreter::mtfsfix, {"mtfsfix", OPTYPE_SYSTEMFP, FL_USE_FPU, 3, 0, 0, 0}},
|
{134, Interpreter::mtfsfix, {"mtfsfix", OPTYPE_SYSTEMFP, FL_USE_FPU | FL_READ_FPRF, 3, 0, 0, 0}},
|
||||||
{711, Interpreter::mtfsfx, {"mtfsfx", OPTYPE_SYSTEMFP, FL_USE_FPU, 3, 0, 0, 0}},
|
{711, Interpreter::mtfsfx, {"mtfsfx", OPTYPE_SYSTEMFP, FL_USE_FPU | FL_READ_FPRF, 3, 0, 0, 0}},
|
||||||
};
|
};
|
||||||
|
|
||||||
static GekkoOPTemplate table63_2[] =
|
static GekkoOPTemplate table63_2[] =
|
||||||
{
|
{
|
||||||
{18, Interpreter::fdivx, {"fdivx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 31, 0, 0, 0}},
|
{18, Interpreter::fdivx, {"fdivx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 31, 0, 0, 0}},
|
||||||
{20, Interpreter::fsubx, {"fsubx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{20, Interpreter::fsubx, {"fsubx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{21, Interpreter::faddx, {"faddx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{21, Interpreter::faddx, {"faddx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{22, Interpreter::fsqrtx, {"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{22, Interpreter::fsqrtx, {"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{23, Interpreter::fselx, {"fselx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{23, Interpreter::fselx, {"fselx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
{25, Interpreter::fmulx, {"fmulx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{25, Interpreter::fmulx, {"fmulx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{26, Interpreter::frsqrtex, {"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{26, Interpreter::frsqrtex, {"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{28, Interpreter::fmsubx, {"fmsubx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{28, Interpreter::fmsubx, {"fmsubx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{29, Interpreter::fmaddx, {"fmaddx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{29, Interpreter::fmaddx, {"fmaddx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{30, Interpreter::fnmsubx, {"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{30, Interpreter::fnmsubx, {"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{31, Interpreter::fnmaddx, {"fnmaddx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{31, Interpreter::fnmaddx, {"fnmaddx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
};
|
};
|
||||||
namespace InterpreterTables
|
namespace InterpreterTables
|
||||||
{
|
{
|
||||||
|
|
|
@ -116,11 +116,12 @@ public:
|
||||||
// Generates a branch that will check if a given bit of a CR register part
|
// Generates a branch that will check if a given bit of a CR register part
|
||||||
// is set or not.
|
// is set or not.
|
||||||
Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set = true);
|
Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set = true);
|
||||||
|
void SetFPRFIfNeeded(UGeckoInstruction inst, Gen::X64Reg xmm);
|
||||||
|
|
||||||
void tri_op(int d, int a, int b, bool reversible, void (Gen::XEmitter::*op)(Gen::X64Reg, Gen::OpArg), bool roundRHS = false);
|
void tri_op(int d, int a, int b, bool reversible, void (Gen::XEmitter::*op)(Gen::X64Reg, Gen::OpArg), UGeckoInstruction inst, bool roundRHS = false);
|
||||||
typedef u32 (*Operation)(u32 a, u32 b);
|
typedef u32 (*Operation)(u32 a, u32 b);
|
||||||
void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (Gen::XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false);
|
void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (Gen::XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false);
|
||||||
void fp_tri_op(int d, int a, int b, bool reversible, bool single, void (Gen::XEmitter::*op)(Gen::X64Reg, Gen::OpArg), bool roundRHS = false);
|
void fp_tri_op(int d, int a, int b, bool reversible, bool single, void (Gen::XEmitter::*op)(Gen::X64Reg, Gen::OpArg), UGeckoInstruction inst, bool roundRHS = false);
|
||||||
|
|
||||||
// OPCODES
|
// OPCODES
|
||||||
void unknown_instruction(UGeckoInstruction _inst);
|
void unknown_instruction(UGeckoInstruction _inst);
|
||||||
|
|
|
@ -14,7 +14,7 @@ static const u64 GC_ALIGNED16(psSignBits2[2]) = {0x8000000000000000ULL, 0x800000
|
||||||
static const u64 GC_ALIGNED16(psAbsMask2[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
|
static const u64 GC_ALIGNED16(psAbsMask2[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
|
||||||
static const double GC_ALIGNED16(half_qnan_and_s32_max[2]) = {0x7FFFFFFF, -0x80000};
|
static const double GC_ALIGNED16(half_qnan_and_s32_max[2]) = {0x7FFFFFFF, -0x80000};
|
||||||
|
|
||||||
void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool single, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg), bool roundRHS)
|
void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool single, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg), UGeckoInstruction inst, bool roundRHS)
|
||||||
{
|
{
|
||||||
fpr.Lock(d, a, b);
|
fpr.Lock(d, a, b);
|
||||||
if (roundRHS)
|
if (roundRHS)
|
||||||
|
@ -88,25 +88,35 @@ void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool single, void (X
|
||||||
UNPCKLPD(fpr.RX(d), R(fpr.RX(d)));
|
UNPCKLPD(fpr.RX(d), R(fpr.RX(d)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
SetFPRFIfNeeded(inst, fpr.RX(d));
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We can avoid calculating FPRF if it's not needed; every float operation resets it, so
|
||||||
|
// if it's going to be clobbered in a future instruction before being read, we can just
|
||||||
|
// not calculate it.
|
||||||
|
void Jit64::SetFPRFIfNeeded(UGeckoInstruction inst, X64Reg xmm)
|
||||||
|
{
|
||||||
|
// As far as we know, the games that use this flag only need FPRF for fmul and fmadd, but
|
||||||
|
// FPRF is fast enough in JIT that we might as well just enable it for every float instruction
|
||||||
|
// if the enableFPRF flag is set.
|
||||||
|
if (Core::g_CoreStartupParameter.bEnableFPRF && js.op->wantsFPRF)
|
||||||
|
SetFPRF(xmm);
|
||||||
|
}
|
||||||
|
|
||||||
void Jit64::fp_arith(UGeckoInstruction inst)
|
void Jit64::fp_arith(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(bJITFloatingPointOff);
|
JITDISABLE(bJITFloatingPointOff);
|
||||||
FALLBACK_IF(inst.Rc);
|
FALLBACK_IF(inst.Rc);
|
||||||
|
|
||||||
// Only the interpreter has "proper" support for (some) FP flags
|
|
||||||
FALLBACK_IF(inst.SUBOP5 == 25 && Core::g_CoreStartupParameter.bEnableFPRF);
|
|
||||||
|
|
||||||
bool single = inst.OPCD == 59;
|
bool single = inst.OPCD == 59;
|
||||||
switch (inst.SUBOP5)
|
switch (inst.SUBOP5)
|
||||||
{
|
{
|
||||||
case 18: fp_tri_op(inst.FD, inst.FA, inst.FB, false, single, &XEmitter::DIVSD); break; //div
|
case 18: fp_tri_op(inst.FD, inst.FA, inst.FB, false, single, &XEmitter::DIVSD, inst); break; //div
|
||||||
case 20: fp_tri_op(inst.FD, inst.FA, inst.FB, false, single, &XEmitter::SUBSD); break; //sub
|
case 20: fp_tri_op(inst.FD, inst.FA, inst.FB, false, single, &XEmitter::SUBSD, inst); break; //sub
|
||||||
case 21: fp_tri_op(inst.FD, inst.FA, inst.FB, true, single, &XEmitter::ADDSD); break; //add
|
case 21: fp_tri_op(inst.FD, inst.FA, inst.FB, true, single, &XEmitter::ADDSD, inst); break; //add
|
||||||
case 25: fp_tri_op(inst.FD, inst.FA, inst.FC, true, single, &XEmitter::MULSD, single); break; //mul
|
case 25: fp_tri_op(inst.FD, inst.FA, inst.FC, true, single, &XEmitter::MULSD, inst, single); break; //mul
|
||||||
default:
|
default:
|
||||||
_assert_msg_(DYNA_REC, 0, "fp_arith WTF!!!");
|
_assert_msg_(DYNA_REC, 0, "fp_arith WTF!!!");
|
||||||
}
|
}
|
||||||
|
@ -118,9 +128,6 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
|
||||||
JITDISABLE(bJITFloatingPointOff);
|
JITDISABLE(bJITFloatingPointOff);
|
||||||
FALLBACK_IF(inst.Rc);
|
FALLBACK_IF(inst.Rc);
|
||||||
|
|
||||||
// Only the interpreter has "proper" support for (some) FP flags
|
|
||||||
FALLBACK_IF(inst.SUBOP5 == 29 && Core::g_CoreStartupParameter.bEnableFPRF);
|
|
||||||
|
|
||||||
bool single_precision = inst.OPCD == 59;
|
bool single_precision = inst.OPCD == 59;
|
||||||
|
|
||||||
int a = inst.FA;
|
int a = inst.FA;
|
||||||
|
@ -165,9 +172,7 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
MOVSD(fpr.RX(d), R(XMM0));
|
MOVSD(fpr.RX(d), R(XMM0));
|
||||||
}
|
}
|
||||||
// SMB checks flags after this op. Let's lie.
|
SetFPRFIfNeeded(inst, fpr.RX(d));
|
||||||
//AND(32, M(&PowerPC::ppcState.fpscr), Imm32(~((0x80000000 >> 19) | (0x80000000 >> 15))));
|
|
||||||
//OR(32, M(&PowerPC::ppcState.fpscr), Imm32((0x80000000 >> 16)));
|
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -241,6 +246,7 @@ void Jit64::fcmpx(UGeckoInstruction inst)
|
||||||
fpr.Lock(a,b);
|
fpr.Lock(a,b);
|
||||||
fpr.BindToRegister(b, true);
|
fpr.BindToRegister(b, true);
|
||||||
|
|
||||||
|
AND(32, M(&FPSCR), Imm32(~FPRF_MASK));
|
||||||
// Are we masking sNaN invalid floating point exceptions? If not this could crash if we don't handle the exception?
|
// Are we masking sNaN invalid floating point exceptions? If not this could crash if we don't handle the exception?
|
||||||
UCOMISD(fpr.R(b).GetSimpleReg(), fpr.R(a));
|
UCOMISD(fpr.R(b).GetSimpleReg(), fpr.R(a));
|
||||||
|
|
||||||
|
@ -264,10 +270,13 @@ void Jit64::fcmpx(UGeckoInstruction inst)
|
||||||
}
|
}
|
||||||
|
|
||||||
MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_EQ)));
|
MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_EQ)));
|
||||||
|
OR(32, M(&FPSCR), Imm32(CR_EQ << FPRF_SHIFT));
|
||||||
|
|
||||||
continue1 = J();
|
continue1 = J();
|
||||||
|
|
||||||
SetJumpTarget(pNaN);
|
SetJumpTarget(pNaN);
|
||||||
MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_SO)));
|
MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_SO)));
|
||||||
|
OR(32, M(&FPSCR), Imm32(CR_SO << FPRF_SHIFT));
|
||||||
|
|
||||||
if (a != b)
|
if (a != b)
|
||||||
{
|
{
|
||||||
|
@ -275,10 +284,12 @@ void Jit64::fcmpx(UGeckoInstruction inst)
|
||||||
|
|
||||||
SetJumpTarget(pGreater);
|
SetJumpTarget(pGreater);
|
||||||
MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_GT)));
|
MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_GT)));
|
||||||
|
OR(32, M(&FPSCR), Imm32(CR_GT << FPRF_SHIFT));
|
||||||
continue3 = J();
|
continue3 = J();
|
||||||
|
|
||||||
SetJumpTarget(pLesser);
|
SetJumpTarget(pLesser);
|
||||||
MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_LT)));
|
MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_LT)));
|
||||||
|
OR(32, M(&FPSCR), Imm32(CR_LT << FPRF_SHIFT));
|
||||||
}
|
}
|
||||||
|
|
||||||
SetJumpTarget(continue1);
|
SetJumpTarget(continue1);
|
||||||
|
|
|
@ -113,7 +113,7 @@ add a,b,a
|
||||||
*/
|
*/
|
||||||
|
|
||||||
//There's still a little bit more optimization that can be squeezed out of this
|
//There's still a little bit more optimization that can be squeezed out of this
|
||||||
void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(X64Reg, OpArg), bool roundRHS)
|
void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(X64Reg, OpArg), UGeckoInstruction inst, bool roundRHS)
|
||||||
{
|
{
|
||||||
fpr.Lock(d, a, b);
|
fpr.Lock(d, a, b);
|
||||||
|
|
||||||
|
@ -163,6 +163,7 @@ void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(X6
|
||||||
(this->*op)(fpr.RX(d), fpr.R(b));
|
(this->*op)(fpr.RX(d), fpr.R(b));
|
||||||
}
|
}
|
||||||
ForceSinglePrecisionP(fpr.RX(d));
|
ForceSinglePrecisionP(fpr.RX(d));
|
||||||
|
SetFPRFIfNeeded(inst, fpr.RX(d));
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -175,16 +176,16 @@ void Jit64::ps_arith(UGeckoInstruction inst)
|
||||||
switch (inst.SUBOP5)
|
switch (inst.SUBOP5)
|
||||||
{
|
{
|
||||||
case 18: // div
|
case 18: // div
|
||||||
tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::DIVPD);
|
tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::DIVPD, inst);
|
||||||
break;
|
break;
|
||||||
case 20: // sub
|
case 20: // sub
|
||||||
tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::SUBPD);
|
tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::SUBPD, inst);
|
||||||
break;
|
break;
|
||||||
case 21: // add
|
case 21: // add
|
||||||
tri_op(inst.FD, inst.FA, inst.FB, true, &XEmitter::ADDPD);
|
tri_op(inst.FD, inst.FA, inst.FB, true, &XEmitter::ADDPD, inst);
|
||||||
break;
|
break;
|
||||||
case 25: // mul
|
case 25: // mul
|
||||||
tri_op(inst.FD, inst.FA, inst.FC, true, &XEmitter::MULPD, true);
|
tri_op(inst.FD, inst.FA, inst.FC, true, &XEmitter::MULPD, inst, true);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
_assert_msg_(DYNA_REC, 0, "ps_arith WTF!!!");
|
_assert_msg_(DYNA_REC, 0, "ps_arith WTF!!!");
|
||||||
|
@ -228,6 +229,7 @@ void Jit64::ps_sum(UGeckoInstruction inst)
|
||||||
PanicAlert("ps_sum WTF!!!");
|
PanicAlert("ps_sum WTF!!!");
|
||||||
}
|
}
|
||||||
ForceSinglePrecisionP(fpr.RX(d));
|
ForceSinglePrecisionP(fpr.RX(d));
|
||||||
|
SetFPRFIfNeeded(inst, fpr.RX(d));
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -267,6 +269,7 @@ void Jit64::ps_muls(UGeckoInstruction inst)
|
||||||
PanicAlert("ps_muls WTF!!!");
|
PanicAlert("ps_muls WTF!!!");
|
||||||
}
|
}
|
||||||
ForceSinglePrecisionP(fpr.RX(d));
|
ForceSinglePrecisionP(fpr.RX(d));
|
||||||
|
SetFPRFIfNeeded(inst, fpr.RX(d));
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -372,5 +375,6 @@ void Jit64::ps_maddXX(UGeckoInstruction inst)
|
||||||
fpr.BindToRegister(d, false);
|
fpr.BindToRegister(d, false);
|
||||||
MOVAPD(fpr.RX(d), Gen::R(XMM0));
|
MOVAPD(fpr.RX(d), Gen::R(XMM0));
|
||||||
ForceSinglePrecisionP(fpr.RX(d));
|
ForceSinglePrecisionP(fpr.RX(d));
|
||||||
|
SetFPRFIfNeeded(inst, fpr.RX(d));
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
|
|
@ -284,15 +284,6 @@ void Jit64::mfcr(UGeckoInstruction inst)
|
||||||
gpr.UnlockAllX();
|
gpr.UnlockAllX();
|
||||||
}
|
}
|
||||||
|
|
||||||
// convert flags into 64-bit CR values with a lookup table
|
|
||||||
static const u64 m_crTable[16] =
|
|
||||||
{
|
|
||||||
PPCCRToInternal(0x0), PPCCRToInternal(0x1), PPCCRToInternal(0x2), PPCCRToInternal(0x3),
|
|
||||||
PPCCRToInternal(0x4), PPCCRToInternal(0x5), PPCCRToInternal(0x6), PPCCRToInternal(0x7),
|
|
||||||
PPCCRToInternal(0x8), PPCCRToInternal(0x9), PPCCRToInternal(0xA), PPCCRToInternal(0xB),
|
|
||||||
PPCCRToInternal(0xC), PPCCRToInternal(0xD), PPCCRToInternal(0xE), PPCCRToInternal(0xF),
|
|
||||||
};
|
|
||||||
|
|
||||||
void Jit64::mtcrf(UGeckoInstruction inst)
|
void Jit64::mtcrf(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
|
|
||||||
#include "Common/Common.h"
|
#include "Common/Common.h"
|
||||||
#include "Common/CPUDetect.h"
|
#include "Common/CPUDetect.h"
|
||||||
|
#include "Common/MathUtil.h"
|
||||||
|
|
||||||
#include "Core/HW/MMIO.h"
|
#include "Core/HW/MMIO.h"
|
||||||
#include "Core/PowerPC/JitCommon/Jit_Util.h"
|
#include "Core/PowerPC/JitCommon/Jit_Util.h"
|
||||||
|
@ -705,6 +706,103 @@ void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr
|
||||||
MOVDDUP(dst, R(dst));
|
MOVDDUP(dst, R(dst));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const u64 GC_ALIGNED16(psDoubleExp[2]) = {0x7FF0000000000000ULL, 0};
|
||||||
|
static const u64 GC_ALIGNED16(psDoubleFrac[2]) = {0x000FFFFFFFFFFFFFULL, 0};
|
||||||
|
static const u64 GC_ALIGNED16(psDoubleNoSign[2]) = {0x7FFFFFFFFFFFFFFFULL, 0};
|
||||||
|
|
||||||
|
// TODO: it might be faster to handle FPRF in the same way as CR is currently handled for integer, storing
|
||||||
|
// the result of each floating point op and calculating it when needed. This is trickier than for integers
|
||||||
|
// though, because there's 32 possible FPRF bit combinations but only 9 categories of floating point values,
|
||||||
|
// which makes the whole thing rather trickier.
|
||||||
|
// Fortunately, PPCAnalyzer can optimize out a large portion of FPRF calculations, so maybe this isn't
|
||||||
|
// quite that necessary.
|
||||||
|
void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
|
||||||
|
{
|
||||||
|
AND(32, M(&FPSCR), Imm32(~FPRF_MASK));
|
||||||
|
|
||||||
|
FixupBranch continue1, continue2, continue3, continue4;
|
||||||
|
if (cpu_info.bSSE4_1)
|
||||||
|
{
|
||||||
|
MOVQ_xmm(R(RAX), xmm);
|
||||||
|
SHR(64, R(RAX), Imm8(63)); // Get the sign bit; almost all the branches need it.
|
||||||
|
PTEST(xmm, M((void*)psDoubleExp));
|
||||||
|
FixupBranch maxExponent = J_CC(CC_C);
|
||||||
|
FixupBranch zeroExponent = J_CC(CC_Z);
|
||||||
|
|
||||||
|
// Nice normalized number: sign ? PPC_FPCLASS_NN : PPC_FPCLASS_PN;
|
||||||
|
LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_NN - MathUtil::PPC_FPCLASS_PN, MathUtil::PPC_FPCLASS_PN));
|
||||||
|
continue1 = J();
|
||||||
|
|
||||||
|
SetJumpTarget(maxExponent);
|
||||||
|
PTEST(xmm, M((void*)psDoubleFrac));
|
||||||
|
FixupBranch notNAN = J_CC(CC_Z);
|
||||||
|
|
||||||
|
// Max exponent + mantissa: PPC_FPCLASS_QNAN
|
||||||
|
MOV(32, R(EAX), Imm32(MathUtil::PPC_FPCLASS_QNAN));
|
||||||
|
continue2 = J();
|
||||||
|
|
||||||
|
// Max exponent + no mantissa: sign ? PPC_FPCLASS_NINF : PPC_FPCLASS_PINF;
|
||||||
|
SetJumpTarget(notNAN);
|
||||||
|
LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_NINF - MathUtil::PPC_FPCLASS_PINF, MathUtil::PPC_FPCLASS_NINF));
|
||||||
|
continue3 = J();
|
||||||
|
|
||||||
|
SetJumpTarget(zeroExponent);
|
||||||
|
PTEST(xmm, R(xmm));
|
||||||
|
FixupBranch zero = J_CC(CC_Z);
|
||||||
|
|
||||||
|
// No exponent + mantissa: sign ? PPC_FPCLASS_ND : PPC_FPCLASS_PD;
|
||||||
|
LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_ND - MathUtil::PPC_FPCLASS_PD, MathUtil::PPC_FPCLASS_ND));
|
||||||
|
continue4 = J();
|
||||||
|
|
||||||
|
// Zero: sign ? PPC_FPCLASS_NZ : PPC_FPCLASS_PZ;
|
||||||
|
SetJumpTarget(zero);
|
||||||
|
SHL(32, R(EAX), Imm8(4));
|
||||||
|
ADD(32, R(EAX), Imm8(MathUtil::PPC_FPCLASS_PZ));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
MOVQ_xmm(R(RAX), xmm);
|
||||||
|
TEST(64, R(RAX), M((void*)psDoubleExp));
|
||||||
|
FixupBranch zeroExponent = J_CC(CC_Z);
|
||||||
|
AND(64, R(RAX), M((void*)psDoubleNoSign));
|
||||||
|
CMP(64, R(RAX), M((void*)psDoubleExp));
|
||||||
|
FixupBranch nan = J_CC(CC_G); // This works because if the sign bit is set, RAX is negative
|
||||||
|
FixupBranch infinity = J_CC(CC_E);
|
||||||
|
MOVQ_xmm(R(RAX), xmm);
|
||||||
|
SHR(64, R(RAX), Imm8(63));
|
||||||
|
LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_NN - MathUtil::PPC_FPCLASS_PN, MathUtil::PPC_FPCLASS_PN));
|
||||||
|
continue1 = J();
|
||||||
|
SetJumpTarget(nan);
|
||||||
|
MOVQ_xmm(R(RAX), xmm);
|
||||||
|
SHR(64, R(RAX), Imm8(63));
|
||||||
|
MOV(32, R(EAX), Imm32(MathUtil::PPC_FPCLASS_QNAN));
|
||||||
|
continue2 = J();
|
||||||
|
SetJumpTarget(infinity);
|
||||||
|
MOVQ_xmm(R(RAX), xmm);
|
||||||
|
SHR(64, R(RAX), Imm8(63));
|
||||||
|
LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_NINF - MathUtil::PPC_FPCLASS_PINF, MathUtil::PPC_FPCLASS_NINF));
|
||||||
|
continue3 = J();
|
||||||
|
SetJumpTarget(zeroExponent);
|
||||||
|
TEST(64, R(RAX), R(RAX));
|
||||||
|
FixupBranch zero = J_CC(CC_Z);
|
||||||
|
SHR(64, R(RAX), Imm8(63));
|
||||||
|
LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_ND - MathUtil::PPC_FPCLASS_PD, MathUtil::PPC_FPCLASS_ND));
|
||||||
|
continue4 = J();
|
||||||
|
SetJumpTarget(zero);
|
||||||
|
SHR(64, R(RAX), Imm8(63));
|
||||||
|
SHL(32, R(EAX), Imm8(4));
|
||||||
|
ADD(32, R(EAX), Imm8(MathUtil::PPC_FPCLASS_PZ));
|
||||||
|
}
|
||||||
|
|
||||||
|
SetJumpTarget(continue1);
|
||||||
|
SetJumpTarget(continue2);
|
||||||
|
SetJumpTarget(continue3);
|
||||||
|
SetJumpTarget(continue4);
|
||||||
|
SHL(32, R(EAX), Imm8(FPRF_SHIFT));
|
||||||
|
OR(32, M(&FPSCR), R(EAX));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void EmuCodeBlock::JitClearCA()
|
void EmuCodeBlock::JitClearCA()
|
||||||
{
|
{
|
||||||
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0
|
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0
|
||||||
|
|
|
@ -62,6 +62,7 @@ public:
|
||||||
void ConvertSingleToDouble(Gen::X64Reg dst, Gen::X64Reg src, bool src_is_gpr = false);
|
void ConvertSingleToDouble(Gen::X64Reg dst, Gen::X64Reg src, bool src_is_gpr = false);
|
||||||
// EAX might get trashed
|
// EAX might get trashed
|
||||||
void ConvertDoubleToSingle(Gen::X64Reg dst, Gen::X64Reg src);
|
void ConvertDoubleToSingle(Gen::X64Reg dst, Gen::X64Reg src);
|
||||||
|
void SetFPRF(Gen::X64Reg xmm);
|
||||||
protected:
|
protected:
|
||||||
std::unordered_map<u8 *, u32> registersInUseAtLoc;
|
std::unordered_map<u8 *, u32> registersInUseAtLoc;
|
||||||
std::unordered_map<u8 *, u32> pcAtLoc;
|
std::unordered_map<u8 *, u32> pcAtLoc;
|
||||||
|
|
|
@ -453,6 +453,10 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInf
|
||||||
else
|
else
|
||||||
code->outputCR1 = (opinfo->flags & FL_SET_CR1) ? true : false;
|
code->outputCR1 = (opinfo->flags & FL_SET_CR1) ? true : false;
|
||||||
|
|
||||||
|
code->wantsFPRF = (opinfo->flags & FL_READ_FPRF) ? true : false;
|
||||||
|
code->outputFPRF = (opinfo->flags & FL_SET_FPRF) ? true : false;
|
||||||
|
code->canEndBlock = (opinfo->flags & FL_ENDBLOCK) ? true : false;
|
||||||
|
|
||||||
int numOut = 0;
|
int numOut = 0;
|
||||||
int numIn = 0;
|
int numIn = 0;
|
||||||
if (opinfo->flags & FL_OUT_A)
|
if (opinfo->flags & FL_OUT_A)
|
||||||
|
@ -710,24 +714,25 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scan for CR0 dependency
|
// Scan for CR0 dependency
|
||||||
// assume next block wants CR0 to be safe
|
// assume next block wants flags to be safe
|
||||||
bool wantsCR0 = true;
|
bool wantsCR0 = true;
|
||||||
bool wantsCR1 = true;
|
bool wantsCR1 = true;
|
||||||
bool wantsPS1 = true;
|
bool wantsPS1 = true;
|
||||||
|
bool wantsFPRF = true;
|
||||||
for (int i = block->m_num_instructions - 1; i >= 0; i--)
|
for (int i = block->m_num_instructions - 1; i >= 0; i--)
|
||||||
{
|
{
|
||||||
if (code[i].outputCR0)
|
wantsCR0 |= code[i].wantsCR0 || code[i].canEndBlock;
|
||||||
wantsCR0 = false;
|
wantsCR1 |= code[i].wantsCR1 || code[i].canEndBlock;
|
||||||
if (code[i].outputCR1)
|
wantsPS1 |= code[i].wantsPS1 || code[i].canEndBlock;
|
||||||
wantsCR1 = false;
|
wantsFPRF |= code[i].wantsFPRF || code[i].canEndBlock;
|
||||||
if (code[i].outputPS1)
|
|
||||||
wantsPS1 = false;
|
|
||||||
wantsCR0 |= code[i].wantsCR0;
|
|
||||||
wantsCR1 |= code[i].wantsCR1;
|
|
||||||
wantsPS1 |= code[i].wantsPS1;
|
|
||||||
code[i].wantsCR0 = wantsCR0;
|
code[i].wantsCR0 = wantsCR0;
|
||||||
code[i].wantsCR1 = wantsCR1;
|
code[i].wantsCR1 = wantsCR1;
|
||||||
code[i].wantsPS1 = wantsPS1;
|
code[i].wantsPS1 = wantsPS1;
|
||||||
|
code[i].wantsFPRF = wantsFPRF;
|
||||||
|
wantsCR0 &= !code[i].outputCR0;
|
||||||
|
wantsCR1 &= !code[i].outputCR1;
|
||||||
|
wantsPS1 &= !code[i].outputPS1;
|
||||||
|
wantsFPRF &= !code[i].outputFPRF;
|
||||||
}
|
}
|
||||||
return address;
|
return address;
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,9 +34,12 @@ struct CodeOp //16B
|
||||||
bool wantsCR0;
|
bool wantsCR0;
|
||||||
bool wantsCR1;
|
bool wantsCR1;
|
||||||
bool wantsPS1;
|
bool wantsPS1;
|
||||||
|
bool wantsFPRF;
|
||||||
bool outputCR0;
|
bool outputCR0;
|
||||||
bool outputCR1;
|
bool outputCR1;
|
||||||
bool outputPS1;
|
bool outputPS1;
|
||||||
|
bool outputFPRF;
|
||||||
|
bool canEndBlock;
|
||||||
bool skip; // followed BL-s for example
|
bool skip; // followed BL-s for example
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -25,6 +25,14 @@ GekkoOPInfo *m_infoTable63[1024];
|
||||||
GekkoOPInfo *m_allInstructions[512];
|
GekkoOPInfo *m_allInstructions[512];
|
||||||
int m_numInstructions;
|
int m_numInstructions;
|
||||||
|
|
||||||
|
const u64 m_crTable[16] =
|
||||||
|
{
|
||||||
|
PPCCRToInternal(0x0), PPCCRToInternal(0x1), PPCCRToInternal(0x2), PPCCRToInternal(0x3),
|
||||||
|
PPCCRToInternal(0x4), PPCCRToInternal(0x5), PPCCRToInternal(0x6), PPCCRToInternal(0x7),
|
||||||
|
PPCCRToInternal(0x8), PPCCRToInternal(0x9), PPCCRToInternal(0xA), PPCCRToInternal(0xB),
|
||||||
|
PPCCRToInternal(0xC), PPCCRToInternal(0xD), PPCCRToInternal(0xE), PPCCRToInternal(0xF),
|
||||||
|
};
|
||||||
|
|
||||||
GekkoOPInfo *GetOpInfo(UGeckoInstruction _inst)
|
GekkoOPInfo *GetOpInfo(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
GekkoOPInfo *info = m_infoTable[_inst.OPCD];
|
GekkoOPInfo *info = m_infoTable[_inst.OPCD];
|
||||||
|
|
|
@ -36,6 +36,8 @@ enum
|
||||||
FL_EVIL = (1<<17),
|
FL_EVIL = (1<<17),
|
||||||
FL_USE_FPU = (1<<18),
|
FL_USE_FPU = (1<<18),
|
||||||
FL_LOADSTORE = (1<<19),
|
FL_LOADSTORE = (1<<19),
|
||||||
|
FL_SET_FPRF = (1<<20),
|
||||||
|
FL_READ_FPRF = (1<<21),
|
||||||
};
|
};
|
||||||
|
|
||||||
enum
|
enum
|
||||||
|
|
|
@ -187,11 +187,15 @@ inline u64 PPCCRToInternal(u8 value)
|
||||||
return cr_val;
|
return cr_val;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// convert flags into 64-bit CR values with a lookup table
|
||||||
|
extern const u64 m_crTable[16];
|
||||||
|
|
||||||
// Warning: these CR operations are fairly slow since they need to convert from
|
// Warning: these CR operations are fairly slow since they need to convert from
|
||||||
// PowerPC format (4 bit) to our internal 64 bit format. See the definition of
|
// PowerPC format (4 bit) to our internal 64 bit format. See the definition of
|
||||||
// ppcState.cr_val for more explanations.
|
// ppcState.cr_val for more explanations.
|
||||||
inline void SetCRField(int cr_field, int value) {
|
inline void SetCRField(int cr_field, int value)
|
||||||
PowerPC::ppcState.cr_val[cr_field] = PPCCRToInternal(value);
|
{
|
||||||
|
PowerPC::ppcState.cr_val[cr_field] = m_crTable[value];
|
||||||
}
|
}
|
||||||
|
|
||||||
inline u32 GetCRField(int cr_field) {
|
inline u32 GetCRField(int cr_field) {
|
||||||
|
|
Loading…
Reference in New Issue