Merge pull request #372 from Tilka/fctiwzx
Jit64: implement fctiwx/fctiwzx
This commit is contained in:
commit
2daa83da48
|
@ -1405,6 +1405,7 @@ void XEmitter::CVTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(64, 0x5B, true, reg
|
||||||
|
|
||||||
void XEmitter::CVTTSS2SI(X64Reg xregdest, OpArg arg) {WriteSSEOp(32, 0x2C, false, xregdest, arg);}
|
void XEmitter::CVTTSS2SI(X64Reg xregdest, OpArg arg) {WriteSSEOp(32, 0x2C, false, xregdest, arg);}
|
||||||
void XEmitter::CVTTPS2DQ(X64Reg xregdest, OpArg arg) {WriteSSEOp(32, 0x5B, false, xregdest, arg);}
|
void XEmitter::CVTTPS2DQ(X64Reg xregdest, OpArg arg) {WriteSSEOp(32, 0x5B, false, xregdest, arg);}
|
||||||
|
void XEmitter::CVTTPD2DQ(X64Reg xregdest, OpArg arg) {WriteSSEOp(64, 0xE6, true, xregdest, arg);}
|
||||||
|
|
||||||
void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) {WriteSSEOp(64, sseMASKMOVDQU, true, dest, R(src));}
|
void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) {WriteSSEOp(64, sseMASKMOVDQU, true, dest, R(src));}
|
||||||
|
|
||||||
|
|
|
@ -576,6 +576,7 @@ public:
|
||||||
|
|
||||||
void CVTTSS2SI(X64Reg xregdest, OpArg arg); // Yeah, destination really is a GPR like EAX!
|
void CVTTSS2SI(X64Reg xregdest, OpArg arg); // Yeah, destination really is a GPR like EAX!
|
||||||
void CVTTPS2DQ(X64Reg regOp, OpArg arg);
|
void CVTTPS2DQ(X64Reg regOp, OpArg arg);
|
||||||
|
void CVTTPD2DQ(X64Reg regOp, OpArg arg);
|
||||||
|
|
||||||
// SSE2: Packed integer instructions
|
// SSE2: Packed integer instructions
|
||||||
void PACKSSDW(X64Reg dest, OpArg arg);
|
void PACKSSDW(X64Reg dest, OpArg arg);
|
||||||
|
|
|
@ -174,6 +174,7 @@ public:
|
||||||
void fp_arith(UGeckoInstruction inst);
|
void fp_arith(UGeckoInstruction inst);
|
||||||
|
|
||||||
void fcmpx(UGeckoInstruction inst);
|
void fcmpx(UGeckoInstruction inst);
|
||||||
|
void fctiwx(UGeckoInstruction inst);
|
||||||
void fmrx(UGeckoInstruction inst);
|
void fmrx(UGeckoInstruction inst);
|
||||||
|
|
||||||
void cmpXX(UGeckoInstruction inst);
|
void cmpXX(UGeckoInstruction inst);
|
||||||
|
|
|
@ -337,8 +337,8 @@ static GekkoOPTemplate table63[] =
|
||||||
{264, &Jit64::fsign}, //"fabsx", OPTYPE_FPU, FL_RC_BIT_F}},
|
{264, &Jit64::fsign}, //"fabsx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||||
{32, &Jit64::fcmpx}, //"fcmpo", OPTYPE_FPU, FL_RC_BIT_F}},
|
{32, &Jit64::fcmpx}, //"fcmpo", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||||
{0, &Jit64::fcmpx}, //"fcmpu", OPTYPE_FPU, FL_RC_BIT_F}},
|
{0, &Jit64::fcmpx}, //"fcmpu", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||||
{14, &Jit64::FallBackToInterpreter}, //"fctiwx", OPTYPE_FPU, FL_RC_BIT_F}},
|
{14, &Jit64::fctiwx}, //"fctiwx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||||
{15, &Jit64::FallBackToInterpreter}, //"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F}},
|
{15, &Jit64::fctiwx}, //"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||||
{72, &Jit64::fmrx}, //"fmrx", OPTYPE_FPU, FL_RC_BIT_F}},
|
{72, &Jit64::fmrx}, //"fmrx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||||
{136, &Jit64::fsign}, //"fnabsx", OPTYPE_FPU, FL_RC_BIT_F}},
|
{136, &Jit64::fsign}, //"fnabsx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||||
{40, &Jit64::fsign}, //"fnegx", OPTYPE_FPU, FL_RC_BIT_F}},
|
{40, &Jit64::fsign}, //"fnegx", OPTYPE_FPU, FL_RC_BIT_F}},
|
||||||
|
|
|
@ -10,6 +10,7 @@
|
||||||
|
|
||||||
static const u64 GC_ALIGNED16(psSignBits2[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL};
|
static const u64 GC_ALIGNED16(psSignBits2[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL};
|
||||||
static const u64 GC_ALIGNED16(psAbsMask2[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
|
static const u64 GC_ALIGNED16(psAbsMask2[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
|
||||||
|
static const double GC_ALIGNED16(half_qnan_and_s32_max[2]) = {0x7FFFFFFF, -0x80000};
|
||||||
|
|
||||||
void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool single, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg))
|
void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool single, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg))
|
||||||
{
|
{
|
||||||
|
@ -267,3 +268,44 @@ void Jit64::fcmpx(UGeckoInstruction inst)
|
||||||
|
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Jit64::fctiwx(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
INSTRUCTION_START
|
||||||
|
JITDISABLE(bJITFloatingPointOff);
|
||||||
|
FALLBACK_IF(inst.Rc);
|
||||||
|
|
||||||
|
int d = inst.RD;
|
||||||
|
int b = inst.RB;
|
||||||
|
fpr.Lock(d, b);
|
||||||
|
fpr.BindToRegister(d, d == b);
|
||||||
|
|
||||||
|
// Intel uses 0x80000000 as a generic error code while PowerPC uses clamping:
|
||||||
|
//
|
||||||
|
// input | output fctiw | output CVTPD2DQ
|
||||||
|
// ------------+--------------+----------------
|
||||||
|
// > +2^31 - 1 | 0x7fffffff | 0x80000000
|
||||||
|
// < -2^31 | 0x80000000 | 0x80000000
|
||||||
|
// any NaN | 0x80000000 | 0x80000000
|
||||||
|
//
|
||||||
|
// The upper 32 bits of the result are set to 0xfff80000,
|
||||||
|
// except for -0.0 where they are set to 0xfff80001 (TODO).
|
||||||
|
|
||||||
|
MOVAPD(XMM0, M(&half_qnan_and_s32_max));
|
||||||
|
MINSD(XMM0, fpr.R(b));
|
||||||
|
switch (inst.SUBOP10)
|
||||||
|
{
|
||||||
|
// fctiwx
|
||||||
|
case 14:
|
||||||
|
CVTPD2DQ(XMM0, R(XMM0));
|
||||||
|
break;
|
||||||
|
|
||||||
|
// fctiwzx
|
||||||
|
case 15:
|
||||||
|
CVTTPD2DQ(XMM0, R(XMM0));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// d[64+] must not be modified
|
||||||
|
MOVSD(fpr.R(d), XMM0);
|
||||||
|
fpr.UnlockAll();
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue