Merge pull request #843 from FioraAeterna/fprf
JIT: Initial FPRF support
This commit is contained in:
commit
0217fb2008
|
@ -383,6 +383,9 @@ union UReg_MSR
|
|||
UReg_MSR() { Hex = 0; }
|
||||
};
|
||||
|
||||
#define FPRF_SHIFT 12
|
||||
#define FPRF_MASK (0x1F << FPRF_SHIFT)
|
||||
|
||||
// Floating Point Status and Control Register
|
||||
union UReg_FPSCR
|
||||
{
|
||||
|
|
|
@ -97,14 +97,14 @@ static GekkoOPTemplate primarytable[] =
|
|||
|
||||
static GekkoOPTemplate table4[] =
|
||||
{ //SUBOP10
|
||||
{0, Interpreter::ps_cmpu0, {"ps_cmpu0", OPTYPE_PS, FL_SET_CRn | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{32, Interpreter::ps_cmpo0, {"ps_cmpo0", OPTYPE_PS, FL_SET_CRn | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{0, Interpreter::ps_cmpu0, {"ps_cmpu0", OPTYPE_PS, FL_SET_CRn | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{32, Interpreter::ps_cmpo0, {"ps_cmpo0", OPTYPE_PS, FL_SET_CRn | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{40, Interpreter::ps_neg, {"ps_neg", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{136, Interpreter::ps_nabs, {"ps_nabs", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{264, Interpreter::ps_abs, {"ps_abs", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{64, Interpreter::ps_cmpu1, {"ps_cmpu1", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{64, Interpreter::ps_cmpu1, {"ps_cmpu1", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{72, Interpreter::ps_mr, {"ps_mr", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{96, Interpreter::ps_cmpo1, {"ps_cmpo1", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{96, Interpreter::ps_cmpo1, {"ps_cmpo1", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{528, Interpreter::ps_merge00, {"ps_merge00", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{560, Interpreter::ps_merge01, {"ps_merge01", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{592, Interpreter::ps_merge10, {"ps_merge10", OPTYPE_PS, FL_RC_BIT | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
|
@ -115,23 +115,23 @@ static GekkoOPTemplate table4[] =
|
|||
|
||||
static GekkoOPTemplate table4_2[] =
|
||||
{
|
||||
{10, Interpreter::ps_sum0, {"ps_sum0", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{11, Interpreter::ps_sum1, {"ps_sum1", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{12, Interpreter::ps_muls0, {"ps_muls0", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{13, Interpreter::ps_muls1, {"ps_muls1", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{14, Interpreter::ps_madds0, {"ps_madds0", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{15, Interpreter::ps_madds1, {"ps_madds1", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{18, Interpreter::ps_div, {"ps_div", OPTYPE_PS, FL_USE_FPU, 17, 0, 0, 0}},
|
||||
{20, Interpreter::ps_sub, {"ps_sub", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{21, Interpreter::ps_add, {"ps_add", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{10, Interpreter::ps_sum0, {"ps_sum0", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{11, Interpreter::ps_sum1, {"ps_sum1", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{12, Interpreter::ps_muls0, {"ps_muls0", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{13, Interpreter::ps_muls1, {"ps_muls1", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{14, Interpreter::ps_madds0, {"ps_madds0", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{15, Interpreter::ps_madds1, {"ps_madds1", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{18, Interpreter::ps_div, {"ps_div", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 17, 0, 0, 0}},
|
||||
{20, Interpreter::ps_sub, {"ps_sub", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{21, Interpreter::ps_add, {"ps_add", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{23, Interpreter::ps_sel, {"ps_sel", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{24, Interpreter::ps_res, {"ps_res", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{25, Interpreter::ps_mul, {"ps_mul", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{26, Interpreter::ps_rsqrte, {"ps_rsqrte", OPTYPE_PS, FL_USE_FPU, 2, 0, 0, 0}},
|
||||
{28, Interpreter::ps_msub, {"ps_msub", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{29, Interpreter::ps_madd, {"ps_madd", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{30, Interpreter::ps_nmsub, {"ps_nmsub", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{31, Interpreter::ps_nmadd, {"ps_nmadd", OPTYPE_PS, FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{24, Interpreter::ps_res, {"ps_res", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{25, Interpreter::ps_mul, {"ps_mul", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{26, Interpreter::ps_rsqrte, {"ps_rsqrte", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 2, 0, 0, 0}},
|
||||
{28, Interpreter::ps_msub, {"ps_msub", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{29, Interpreter::ps_madd, {"ps_madd", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{30, Interpreter::ps_nmsub, {"ps_nmsub", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{31, Interpreter::ps_nmadd, {"ps_nmadd", OPTYPE_PS, FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
};
|
||||
|
||||
|
||||
|
@ -307,51 +307,51 @@ static GekkoOPTemplate table31_2[] =
|
|||
|
||||
static GekkoOPTemplate table59[] =
|
||||
{
|
||||
{18, Interpreter::fdivsx, {"fdivsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 17, 0, 0, 0}}, // TODO
|
||||
{20, Interpreter::fsubsx, {"fsubsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{21, Interpreter::faddsx, {"faddsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
//{22, Interpreter::fsqrtsx, {"fsqrtsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}}, // Not implemented on gekko
|
||||
{24, Interpreter::fresx, {"fresx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{25, Interpreter::fmulsx, {"fmulsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{28, Interpreter::fmsubsx, {"fmsubsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{29, Interpreter::fmaddsx, {"fmaddsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{30, Interpreter::fnmsubsx, {"fnmsubsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{31, Interpreter::fnmaddsx, {"fnmaddsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{18, Interpreter::fdivsx, {"fdivsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 17, 0, 0, 0}}, // TODO
|
||||
{20, Interpreter::fsubsx, {"fsubsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{21, Interpreter::faddsx, {"faddsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
//{22, Interpreter::fsqrtsx, {"fsqrtsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}}, // Not implemented on gekko
|
||||
{24, Interpreter::fresx, {"fresx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{25, Interpreter::fmulsx, {"fmulsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{28, Interpreter::fmsubsx, {"fmsubsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{29, Interpreter::fmaddsx, {"fmaddsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{30, Interpreter::fnmsubsx, {"fnmsubsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{31, Interpreter::fnmaddsx, {"fnmaddsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
};
|
||||
|
||||
static GekkoOPTemplate table63[] =
|
||||
{
|
||||
{264, Interpreter::fabsx, {"fabsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{32, Interpreter::fcmpo, {"fcmpo", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{0, Interpreter::fcmpu, {"fcmpu", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{32, Interpreter::fcmpo, {"fcmpo", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{0, Interpreter::fcmpu, {"fcmpu", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{14, Interpreter::fctiwx, {"fctiwx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{15, Interpreter::fctiwzx, {"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{72, Interpreter::fmrx, {"fmrx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{136, Interpreter::fnabsx, {"fnabsx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{40, Interpreter::fnegx, {"fnegx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{12, Interpreter::frspx, {"frspx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{12, Interpreter::frspx, {"frspx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
|
||||
{64, Interpreter::mcrfs, {"mcrfs", OPTYPE_SYSTEMFP, FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{583, Interpreter::mffsx, {"mffsx", OPTYPE_SYSTEMFP, FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{70, Interpreter::mtfsb0x, {"mtfsb0x", OPTYPE_SYSTEMFP, FL_USE_FPU, 3, 0, 0, 0}},
|
||||
{38, Interpreter::mtfsb1x, {"mtfsb1x", OPTYPE_SYSTEMFP, FL_USE_FPU, 3, 0, 0, 0}},
|
||||
{134, Interpreter::mtfsfix, {"mtfsfix", OPTYPE_SYSTEMFP, FL_USE_FPU, 3, 0, 0, 0}},
|
||||
{711, Interpreter::mtfsfx, {"mtfsfx", OPTYPE_SYSTEMFP, FL_USE_FPU, 3, 0, 0, 0}},
|
||||
{64, Interpreter::mcrfs, {"mcrfs", OPTYPE_SYSTEMFP, FL_USE_FPU | FL_READ_FPRF, 1, 0, 0, 0}},
|
||||
{583, Interpreter::mffsx, {"mffsx", OPTYPE_SYSTEMFP, FL_USE_FPU | FL_READ_FPRF, 1, 0, 0, 0}},
|
||||
{70, Interpreter::mtfsb0x, {"mtfsb0x", OPTYPE_SYSTEMFP, FL_USE_FPU | FL_READ_FPRF, 3, 0, 0, 0}},
|
||||
{38, Interpreter::mtfsb1x, {"mtfsb1x", OPTYPE_SYSTEMFP, FL_USE_FPU | FL_READ_FPRF, 3, 0, 0, 0}},
|
||||
{134, Interpreter::mtfsfix, {"mtfsfix", OPTYPE_SYSTEMFP, FL_USE_FPU | FL_READ_FPRF, 3, 0, 0, 0}},
|
||||
{711, Interpreter::mtfsfx, {"mtfsfx", OPTYPE_SYSTEMFP, FL_USE_FPU | FL_READ_FPRF, 3, 0, 0, 0}},
|
||||
};
|
||||
|
||||
static GekkoOPTemplate table63_2[] =
|
||||
{
|
||||
{18, Interpreter::fdivx, {"fdivx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 31, 0, 0, 0}},
|
||||
{20, Interpreter::fsubx, {"fsubx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{21, Interpreter::faddx, {"faddx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{22, Interpreter::fsqrtx, {"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{18, Interpreter::fdivx, {"fdivx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 31, 0, 0, 0}},
|
||||
{20, Interpreter::fsubx, {"fsubx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{21, Interpreter::faddx, {"faddx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{22, Interpreter::fsqrtx, {"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{23, Interpreter::fselx, {"fselx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{25, Interpreter::fmulx, {"fmulx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{26, Interpreter::frsqrtex, {"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{28, Interpreter::fmsubx, {"fmsubx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{29, Interpreter::fmaddx, {"fmaddx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{30, Interpreter::fnmsubx, {"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{31, Interpreter::fnmaddx, {"fnmaddx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||
{25, Interpreter::fmulx, {"fmulx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{26, Interpreter::frsqrtex, {"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{28, Interpreter::fmsubx, {"fmsubx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{29, Interpreter::fmaddx, {"fmaddx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{30, Interpreter::fnmsubx, {"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
{31, Interpreter::fnmaddx, {"fnmaddx", OPTYPE_FPU, FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||
};
|
||||
namespace InterpreterTables
|
||||
{
|
||||
|
|
|
@ -116,11 +116,12 @@ public:
|
|||
// Generates a branch that will check if a given bit of a CR register part
|
||||
// is set or not.
|
||||
Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set = true);
|
||||
void SetFPRFIfNeeded(UGeckoInstruction inst, Gen::X64Reg xmm);
|
||||
|
||||
void tri_op(int d, int a, int b, bool reversible, void (Gen::XEmitter::*op)(Gen::X64Reg, Gen::OpArg), bool roundRHS = false);
|
||||
void tri_op(int d, int a, int b, bool reversible, void (Gen::XEmitter::*op)(Gen::X64Reg, Gen::OpArg), UGeckoInstruction inst, bool roundRHS = false);
|
||||
typedef u32 (*Operation)(u32 a, u32 b);
|
||||
void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (Gen::XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false);
|
||||
void fp_tri_op(int d, int a, int b, bool reversible, bool single, void (Gen::XEmitter::*op)(Gen::X64Reg, Gen::OpArg), bool roundRHS = false);
|
||||
void fp_tri_op(int d, int a, int b, bool reversible, bool single, void (Gen::XEmitter::*op)(Gen::X64Reg, Gen::OpArg), UGeckoInstruction inst, bool roundRHS = false);
|
||||
|
||||
// OPCODES
|
||||
void unknown_instruction(UGeckoInstruction _inst);
|
||||
|
|
|
@ -14,7 +14,7 @@ static const u64 GC_ALIGNED16(psSignBits2[2]) = {0x8000000000000000ULL, 0x800000
|
|||
static const u64 GC_ALIGNED16(psAbsMask2[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
|
||||
static const double GC_ALIGNED16(half_qnan_and_s32_max[2]) = {0x7FFFFFFF, -0x80000};
|
||||
|
||||
void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool single, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg), bool roundRHS)
|
||||
void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool single, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg), UGeckoInstruction inst, bool roundRHS)
|
||||
{
|
||||
fpr.Lock(d, a, b);
|
||||
if (roundRHS)
|
||||
|
@ -88,25 +88,35 @@ void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool single, void (X
|
|||
UNPCKLPD(fpr.RX(d), R(fpr.RX(d)));
|
||||
}
|
||||
}
|
||||
SetFPRFIfNeeded(inst, fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
// We can avoid calculating FPRF if it's not needed; every float operation resets it, so
|
||||
// if it's going to be clobbered in a future instruction before being read, we can just
|
||||
// not calculate it.
|
||||
void Jit64::SetFPRFIfNeeded(UGeckoInstruction inst, X64Reg xmm)
|
||||
{
|
||||
// As far as we know, the games that use this flag only need FPRF for fmul and fmadd, but
|
||||
// FPRF is fast enough in JIT that we might as well just enable it for every float instruction
|
||||
// if the enableFPRF flag is set.
|
||||
if (Core::g_CoreStartupParameter.bEnableFPRF && js.op->wantsFPRF)
|
||||
SetFPRF(xmm);
|
||||
}
|
||||
|
||||
void Jit64::fp_arith(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITFloatingPointOff);
|
||||
FALLBACK_IF(inst.Rc);
|
||||
|
||||
// Only the interpreter has "proper" support for (some) FP flags
|
||||
FALLBACK_IF(inst.SUBOP5 == 25 && Core::g_CoreStartupParameter.bEnableFPRF);
|
||||
|
||||
bool single = inst.OPCD == 59;
|
||||
switch (inst.SUBOP5)
|
||||
{
|
||||
case 18: fp_tri_op(inst.FD, inst.FA, inst.FB, false, single, &XEmitter::DIVSD); break; //div
|
||||
case 20: fp_tri_op(inst.FD, inst.FA, inst.FB, false, single, &XEmitter::SUBSD); break; //sub
|
||||
case 21: fp_tri_op(inst.FD, inst.FA, inst.FB, true, single, &XEmitter::ADDSD); break; //add
|
||||
case 25: fp_tri_op(inst.FD, inst.FA, inst.FC, true, single, &XEmitter::MULSD, single); break; //mul
|
||||
case 18: fp_tri_op(inst.FD, inst.FA, inst.FB, false, single, &XEmitter::DIVSD, inst); break; //div
|
||||
case 20: fp_tri_op(inst.FD, inst.FA, inst.FB, false, single, &XEmitter::SUBSD, inst); break; //sub
|
||||
case 21: fp_tri_op(inst.FD, inst.FA, inst.FB, true, single, &XEmitter::ADDSD, inst); break; //add
|
||||
case 25: fp_tri_op(inst.FD, inst.FA, inst.FC, true, single, &XEmitter::MULSD, inst, single); break; //mul
|
||||
default:
|
||||
_assert_msg_(DYNA_REC, 0, "fp_arith WTF!!!");
|
||||
}
|
||||
|
@ -118,9 +128,6 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
|
|||
JITDISABLE(bJITFloatingPointOff);
|
||||
FALLBACK_IF(inst.Rc);
|
||||
|
||||
// Only the interpreter has "proper" support for (some) FP flags
|
||||
FALLBACK_IF(inst.SUBOP5 == 29 && Core::g_CoreStartupParameter.bEnableFPRF);
|
||||
|
||||
bool single_precision = inst.OPCD == 59;
|
||||
|
||||
int a = inst.FA;
|
||||
|
@ -165,9 +172,7 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
|
|||
{
|
||||
MOVSD(fpr.RX(d), R(XMM0));
|
||||
}
|
||||
// SMB checks flags after this op. Let's lie.
|
||||
//AND(32, M(&PowerPC::ppcState.fpscr), Imm32(~((0x80000000 >> 19) | (0x80000000 >> 15))));
|
||||
//OR(32, M(&PowerPC::ppcState.fpscr), Imm32((0x80000000 >> 16)));
|
||||
SetFPRFIfNeeded(inst, fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
@ -241,6 +246,7 @@ void Jit64::fcmpx(UGeckoInstruction inst)
|
|||
fpr.Lock(a,b);
|
||||
fpr.BindToRegister(b, true);
|
||||
|
||||
AND(32, M(&FPSCR), Imm32(~FPRF_MASK));
|
||||
// Are we masking sNaN invalid floating point exceptions? If not this could crash if we don't handle the exception?
|
||||
UCOMISD(fpr.R(b).GetSimpleReg(), fpr.R(a));
|
||||
|
||||
|
@ -264,10 +270,13 @@ void Jit64::fcmpx(UGeckoInstruction inst)
|
|||
}
|
||||
|
||||
MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_EQ)));
|
||||
OR(32, M(&FPSCR), Imm32(CR_EQ << FPRF_SHIFT));
|
||||
|
||||
continue1 = J();
|
||||
|
||||
SetJumpTarget(pNaN);
|
||||
MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_SO)));
|
||||
OR(32, M(&FPSCR), Imm32(CR_SO << FPRF_SHIFT));
|
||||
|
||||
if (a != b)
|
||||
{
|
||||
|
@ -275,10 +284,12 @@ void Jit64::fcmpx(UGeckoInstruction inst)
|
|||
|
||||
SetJumpTarget(pGreater);
|
||||
MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_GT)));
|
||||
OR(32, M(&FPSCR), Imm32(CR_GT << FPRF_SHIFT));
|
||||
continue3 = J();
|
||||
|
||||
SetJumpTarget(pLesser);
|
||||
MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_LT)));
|
||||
OR(32, M(&FPSCR), Imm32(CR_LT << FPRF_SHIFT));
|
||||
}
|
||||
|
||||
SetJumpTarget(continue1);
|
||||
|
|
|
@ -113,7 +113,7 @@ add a,b,a
|
|||
*/
|
||||
|
||||
//There's still a little bit more optimization that can be squeezed out of this
|
||||
void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(X64Reg, OpArg), bool roundRHS)
|
||||
void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(X64Reg, OpArg), UGeckoInstruction inst, bool roundRHS)
|
||||
{
|
||||
fpr.Lock(d, a, b);
|
||||
|
||||
|
@ -163,6 +163,7 @@ void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(X6
|
|||
(this->*op)(fpr.RX(d), fpr.R(b));
|
||||
}
|
||||
ForceSinglePrecisionP(fpr.RX(d));
|
||||
SetFPRFIfNeeded(inst, fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
@ -175,16 +176,16 @@ void Jit64::ps_arith(UGeckoInstruction inst)
|
|||
switch (inst.SUBOP5)
|
||||
{
|
||||
case 18: // div
|
||||
tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::DIVPD);
|
||||
tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::DIVPD, inst);
|
||||
break;
|
||||
case 20: // sub
|
||||
tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::SUBPD);
|
||||
tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::SUBPD, inst);
|
||||
break;
|
||||
case 21: // add
|
||||
tri_op(inst.FD, inst.FA, inst.FB, true, &XEmitter::ADDPD);
|
||||
tri_op(inst.FD, inst.FA, inst.FB, true, &XEmitter::ADDPD, inst);
|
||||
break;
|
||||
case 25: // mul
|
||||
tri_op(inst.FD, inst.FA, inst.FC, true, &XEmitter::MULPD, true);
|
||||
tri_op(inst.FD, inst.FA, inst.FC, true, &XEmitter::MULPD, inst, true);
|
||||
break;
|
||||
default:
|
||||
_assert_msg_(DYNA_REC, 0, "ps_arith WTF!!!");
|
||||
|
@ -228,6 +229,7 @@ void Jit64::ps_sum(UGeckoInstruction inst)
|
|||
PanicAlert("ps_sum WTF!!!");
|
||||
}
|
||||
ForceSinglePrecisionP(fpr.RX(d));
|
||||
SetFPRFIfNeeded(inst, fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
@ -267,6 +269,7 @@ void Jit64::ps_muls(UGeckoInstruction inst)
|
|||
PanicAlert("ps_muls WTF!!!");
|
||||
}
|
||||
ForceSinglePrecisionP(fpr.RX(d));
|
||||
SetFPRFIfNeeded(inst, fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
@ -372,5 +375,6 @@ void Jit64::ps_maddXX(UGeckoInstruction inst)
|
|||
fpr.BindToRegister(d, false);
|
||||
MOVAPD(fpr.RX(d), Gen::R(XMM0));
|
||||
ForceSinglePrecisionP(fpr.RX(d));
|
||||
SetFPRFIfNeeded(inst, fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
|
|
@ -284,15 +284,6 @@ void Jit64::mfcr(UGeckoInstruction inst)
|
|||
gpr.UnlockAllX();
|
||||
}
|
||||
|
||||
// convert flags into 64-bit CR values with a lookup table
|
||||
static const u64 m_crTable[16] =
|
||||
{
|
||||
PPCCRToInternal(0x0), PPCCRToInternal(0x1), PPCCRToInternal(0x2), PPCCRToInternal(0x3),
|
||||
PPCCRToInternal(0x4), PPCCRToInternal(0x5), PPCCRToInternal(0x6), PPCCRToInternal(0x7),
|
||||
PPCCRToInternal(0x8), PPCCRToInternal(0x9), PPCCRToInternal(0xA), PPCCRToInternal(0xB),
|
||||
PPCCRToInternal(0xC), PPCCRToInternal(0xD), PPCCRToInternal(0xE), PPCCRToInternal(0xF),
|
||||
};
|
||||
|
||||
void Jit64::mtcrf(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include "Common/Common.h"
|
||||
#include "Common/CPUDetect.h"
|
||||
#include "Common/MathUtil.h"
|
||||
|
||||
#include "Core/HW/MMIO.h"
|
||||
#include "Core/PowerPC/JitCommon/Jit_Util.h"
|
||||
|
@ -705,6 +706,103 @@ void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr
|
|||
MOVDDUP(dst, R(dst));
|
||||
}
|
||||
|
||||
static const u64 GC_ALIGNED16(psDoubleExp[2]) = {0x7FF0000000000000ULL, 0};
|
||||
static const u64 GC_ALIGNED16(psDoubleFrac[2]) = {0x000FFFFFFFFFFFFFULL, 0};
|
||||
static const u64 GC_ALIGNED16(psDoubleNoSign[2]) = {0x7FFFFFFFFFFFFFFFULL, 0};
|
||||
|
||||
// TODO: it might be faster to handle FPRF in the same way as CR is currently handled for integer, storing
|
||||
// the result of each floating point op and calculating it when needed. This is trickier than for integers
|
||||
// though, because there's 32 possible FPRF bit combinations but only 9 categories of floating point values,
|
||||
// which makes the whole thing rather trickier.
|
||||
// Fortunately, PPCAnalyzer can optimize out a large portion of FPRF calculations, so maybe this isn't
|
||||
// quite that necessary.
|
||||
void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
|
||||
{
|
||||
AND(32, M(&FPSCR), Imm32(~FPRF_MASK));
|
||||
|
||||
FixupBranch continue1, continue2, continue3, continue4;
|
||||
if (cpu_info.bSSE4_1)
|
||||
{
|
||||
MOVQ_xmm(R(RAX), xmm);
|
||||
SHR(64, R(RAX), Imm8(63)); // Get the sign bit; almost all the branches need it.
|
||||
PTEST(xmm, M((void*)psDoubleExp));
|
||||
FixupBranch maxExponent = J_CC(CC_C);
|
||||
FixupBranch zeroExponent = J_CC(CC_Z);
|
||||
|
||||
// Nice normalized number: sign ? PPC_FPCLASS_NN : PPC_FPCLASS_PN;
|
||||
LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_NN - MathUtil::PPC_FPCLASS_PN, MathUtil::PPC_FPCLASS_PN));
|
||||
continue1 = J();
|
||||
|
||||
SetJumpTarget(maxExponent);
|
||||
PTEST(xmm, M((void*)psDoubleFrac));
|
||||
FixupBranch notNAN = J_CC(CC_Z);
|
||||
|
||||
// Max exponent + mantissa: PPC_FPCLASS_QNAN
|
||||
MOV(32, R(EAX), Imm32(MathUtil::PPC_FPCLASS_QNAN));
|
||||
continue2 = J();
|
||||
|
||||
// Max exponent + no mantissa: sign ? PPC_FPCLASS_NINF : PPC_FPCLASS_PINF;
|
||||
SetJumpTarget(notNAN);
|
||||
LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_NINF - MathUtil::PPC_FPCLASS_PINF, MathUtil::PPC_FPCLASS_NINF));
|
||||
continue3 = J();
|
||||
|
||||
SetJumpTarget(zeroExponent);
|
||||
PTEST(xmm, R(xmm));
|
||||
FixupBranch zero = J_CC(CC_Z);
|
||||
|
||||
// No exponent + mantissa: sign ? PPC_FPCLASS_ND : PPC_FPCLASS_PD;
|
||||
LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_ND - MathUtil::PPC_FPCLASS_PD, MathUtil::PPC_FPCLASS_ND));
|
||||
continue4 = J();
|
||||
|
||||
// Zero: sign ? PPC_FPCLASS_NZ : PPC_FPCLASS_PZ;
|
||||
SetJumpTarget(zero);
|
||||
SHL(32, R(EAX), Imm8(4));
|
||||
ADD(32, R(EAX), Imm8(MathUtil::PPC_FPCLASS_PZ));
|
||||
}
|
||||
else
|
||||
{
|
||||
MOVQ_xmm(R(RAX), xmm);
|
||||
TEST(64, R(RAX), M((void*)psDoubleExp));
|
||||
FixupBranch zeroExponent = J_CC(CC_Z);
|
||||
AND(64, R(RAX), M((void*)psDoubleNoSign));
|
||||
CMP(64, R(RAX), M((void*)psDoubleExp));
|
||||
FixupBranch nan = J_CC(CC_G); // This works because if the sign bit is set, RAX is negative
|
||||
FixupBranch infinity = J_CC(CC_E);
|
||||
MOVQ_xmm(R(RAX), xmm);
|
||||
SHR(64, R(RAX), Imm8(63));
|
||||
LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_NN - MathUtil::PPC_FPCLASS_PN, MathUtil::PPC_FPCLASS_PN));
|
||||
continue1 = J();
|
||||
SetJumpTarget(nan);
|
||||
MOVQ_xmm(R(RAX), xmm);
|
||||
SHR(64, R(RAX), Imm8(63));
|
||||
MOV(32, R(EAX), Imm32(MathUtil::PPC_FPCLASS_QNAN));
|
||||
continue2 = J();
|
||||
SetJumpTarget(infinity);
|
||||
MOVQ_xmm(R(RAX), xmm);
|
||||
SHR(64, R(RAX), Imm8(63));
|
||||
LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_NINF - MathUtil::PPC_FPCLASS_PINF, MathUtil::PPC_FPCLASS_NINF));
|
||||
continue3 = J();
|
||||
SetJumpTarget(zeroExponent);
|
||||
TEST(64, R(RAX), R(RAX));
|
||||
FixupBranch zero = J_CC(CC_Z);
|
||||
SHR(64, R(RAX), Imm8(63));
|
||||
LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_ND - MathUtil::PPC_FPCLASS_PD, MathUtil::PPC_FPCLASS_ND));
|
||||
continue4 = J();
|
||||
SetJumpTarget(zero);
|
||||
SHR(64, R(RAX), Imm8(63));
|
||||
SHL(32, R(EAX), Imm8(4));
|
||||
ADD(32, R(EAX), Imm8(MathUtil::PPC_FPCLASS_PZ));
|
||||
}
|
||||
|
||||
SetJumpTarget(continue1);
|
||||
SetJumpTarget(continue2);
|
||||
SetJumpTarget(continue3);
|
||||
SetJumpTarget(continue4);
|
||||
SHL(32, R(EAX), Imm8(FPRF_SHIFT));
|
||||
OR(32, M(&FPSCR), R(EAX));
|
||||
}
|
||||
|
||||
|
||||
void EmuCodeBlock::JitClearCA()
|
||||
{
|
||||
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0
|
||||
|
|
|
@ -62,6 +62,7 @@ public:
|
|||
void ConvertSingleToDouble(Gen::X64Reg dst, Gen::X64Reg src, bool src_is_gpr = false);
|
||||
// EAX might get trashed
|
||||
void ConvertDoubleToSingle(Gen::X64Reg dst, Gen::X64Reg src);
|
||||
void SetFPRF(Gen::X64Reg xmm);
|
||||
protected:
|
||||
std::unordered_map<u8 *, u32> registersInUseAtLoc;
|
||||
std::unordered_map<u8 *, u32> pcAtLoc;
|
||||
|
|
|
@ -453,6 +453,10 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInf
|
|||
else
|
||||
code->outputCR1 = (opinfo->flags & FL_SET_CR1) ? true : false;
|
||||
|
||||
code->wantsFPRF = (opinfo->flags & FL_READ_FPRF) ? true : false;
|
||||
code->outputFPRF = (opinfo->flags & FL_SET_FPRF) ? true : false;
|
||||
code->canEndBlock = (opinfo->flags & FL_ENDBLOCK) ? true : false;
|
||||
|
||||
int numOut = 0;
|
||||
int numIn = 0;
|
||||
if (opinfo->flags & FL_OUT_A)
|
||||
|
@ -710,24 +714,25 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32
|
|||
}
|
||||
|
||||
// Scan for CR0 dependency
|
||||
// assume next block wants CR0 to be safe
|
||||
// assume next block wants flags to be safe
|
||||
bool wantsCR0 = true;
|
||||
bool wantsCR1 = true;
|
||||
bool wantsPS1 = true;
|
||||
bool wantsFPRF = true;
|
||||
for (int i = block->m_num_instructions - 1; i >= 0; i--)
|
||||
{
|
||||
if (code[i].outputCR0)
|
||||
wantsCR0 = false;
|
||||
if (code[i].outputCR1)
|
||||
wantsCR1 = false;
|
||||
if (code[i].outputPS1)
|
||||
wantsPS1 = false;
|
||||
wantsCR0 |= code[i].wantsCR0;
|
||||
wantsCR1 |= code[i].wantsCR1;
|
||||
wantsPS1 |= code[i].wantsPS1;
|
||||
wantsCR0 |= code[i].wantsCR0 || code[i].canEndBlock;
|
||||
wantsCR1 |= code[i].wantsCR1 || code[i].canEndBlock;
|
||||
wantsPS1 |= code[i].wantsPS1 || code[i].canEndBlock;
|
||||
wantsFPRF |= code[i].wantsFPRF || code[i].canEndBlock;
|
||||
code[i].wantsCR0 = wantsCR0;
|
||||
code[i].wantsCR1 = wantsCR1;
|
||||
code[i].wantsPS1 = wantsPS1;
|
||||
code[i].wantsFPRF = wantsFPRF;
|
||||
wantsCR0 &= !code[i].outputCR0;
|
||||
wantsCR1 &= !code[i].outputCR1;
|
||||
wantsPS1 &= !code[i].outputPS1;
|
||||
wantsFPRF &= !code[i].outputFPRF;
|
||||
}
|
||||
return address;
|
||||
}
|
||||
|
|
|
@ -34,9 +34,12 @@ struct CodeOp //16B
|
|||
bool wantsCR0;
|
||||
bool wantsCR1;
|
||||
bool wantsPS1;
|
||||
bool wantsFPRF;
|
||||
bool outputCR0;
|
||||
bool outputCR1;
|
||||
bool outputPS1;
|
||||
bool outputFPRF;
|
||||
bool canEndBlock;
|
||||
bool skip; // followed BL-s for example
|
||||
};
|
||||
|
||||
|
|
|
@ -25,6 +25,14 @@ GekkoOPInfo *m_infoTable63[1024];
|
|||
GekkoOPInfo *m_allInstructions[512];
|
||||
int m_numInstructions;
|
||||
|
||||
const u64 m_crTable[16] =
|
||||
{
|
||||
PPCCRToInternal(0x0), PPCCRToInternal(0x1), PPCCRToInternal(0x2), PPCCRToInternal(0x3),
|
||||
PPCCRToInternal(0x4), PPCCRToInternal(0x5), PPCCRToInternal(0x6), PPCCRToInternal(0x7),
|
||||
PPCCRToInternal(0x8), PPCCRToInternal(0x9), PPCCRToInternal(0xA), PPCCRToInternal(0xB),
|
||||
PPCCRToInternal(0xC), PPCCRToInternal(0xD), PPCCRToInternal(0xE), PPCCRToInternal(0xF),
|
||||
};
|
||||
|
||||
GekkoOPInfo *GetOpInfo(UGeckoInstruction _inst)
|
||||
{
|
||||
GekkoOPInfo *info = m_infoTable[_inst.OPCD];
|
||||
|
|
|
@ -36,6 +36,8 @@ enum
|
|||
FL_EVIL = (1<<17),
|
||||
FL_USE_FPU = (1<<18),
|
||||
FL_LOADSTORE = (1<<19),
|
||||
FL_SET_FPRF = (1<<20),
|
||||
FL_READ_FPRF = (1<<21),
|
||||
};
|
||||
|
||||
enum
|
||||
|
|
|
@ -187,11 +187,15 @@ inline u64 PPCCRToInternal(u8 value)
|
|||
return cr_val;
|
||||
}
|
||||
|
||||
// convert flags into 64-bit CR values with a lookup table
|
||||
extern const u64 m_crTable[16];
|
||||
|
||||
// Warning: these CR operations are fairly slow since they need to convert from
|
||||
// PowerPC format (4 bit) to our internal 64 bit format. See the definition of
|
||||
// ppcState.cr_val for more explanations.
|
||||
inline void SetCRField(int cr_field, int value) {
|
||||
PowerPC::ppcState.cr_val[cr_field] = PPCCRToInternal(value);
|
||||
inline void SetCRField(int cr_field, int value)
|
||||
{
|
||||
PowerPC::ppcState.cr_val[cr_field] = m_crTable[value];
|
||||
}
|
||||
|
||||
inline u32 GetCRField(int cr_field) {
|
||||
|
|
Loading…
Reference in New Issue