PowerPC: Fixed moronic bug in mcrfs - together with correctly computing FPRF (which is a new option, slows down slightly) in a few ops this fixes most remaining math errors in Super Monkey Ball and eliminates the need for the HLE hacks. Make sure to have up-to-date gameconfigs. Unfortunately, F-Zero still doesn't work. Misc other small changes. Indent some code.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@3426 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
dd640a4937
commit
10442c8faa
|
@ -1030,6 +1030,8 @@ enum NormalSSEOps
|
||||||
}
|
}
|
||||||
|
|
||||||
void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src) {
|
void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src) {
|
||||||
|
if (arg.IsSimpleReg())
|
||||||
|
PanicAlert("Emitter: MOVQ_xmm doesn't support single registers as destination");
|
||||||
if (src > 7)
|
if (src > 7)
|
||||||
{
|
{
|
||||||
// Alternate encoding
|
// Alternate encoding
|
||||||
|
|
|
@ -43,6 +43,7 @@ void SCoreStartupParameter::LoadDefaults()
|
||||||
bRunCompareServer = false;
|
bRunCompareServer = false;
|
||||||
bDSPThread = true;
|
bDSPThread = true;
|
||||||
bLockThreads = true;
|
bLockThreads = true;
|
||||||
|
bEnableFPRF = false;
|
||||||
bWii = false;
|
bWii = false;
|
||||||
SelectedLanguage = 0;
|
SelectedLanguage = 0;
|
||||||
iTLBHack = 0;
|
iTLBHack = 0;
|
||||||
|
|
|
@ -65,6 +65,7 @@ struct SCoreStartupParameter
|
||||||
bool bUseFastMem;
|
bool bUseFastMem;
|
||||||
bool bLockThreads;
|
bool bLockThreads;
|
||||||
bool bOptimizeQuantizers;
|
bool bOptimizeQuantizers;
|
||||||
|
bool bEnableFPRF;
|
||||||
bool bEnableCheats;
|
bool bEnableCheats;
|
||||||
bool bEnableIsoCache;
|
bool bEnableIsoCache;
|
||||||
|
|
||||||
|
|
|
@ -68,19 +68,21 @@ static const SPatch OSPatches[] =
|
||||||
// wii only
|
// wii only
|
||||||
{ "__OSInitAudioSystem", HLE_Misc::UnimplementedFunction },
|
{ "__OSInitAudioSystem", HLE_Misc::UnimplementedFunction },
|
||||||
|
|
||||||
// Super Monkey Ball
|
// Super Monkey Ball - no longer needed.
|
||||||
{ ".evil_vec_cosine", HLE_Misc::SMB_EvilVecCosine },
|
//{ ".evil_vec_cosine", HLE_Misc::SMB_EvilVecCosine },
|
||||||
{ ".evil_normalize", HLE_Misc::SMB_EvilNormalize },
|
//{ ".evil_normalize", HLE_Misc::SMB_EvilNormalize },
|
||||||
{ ".evil_vec_setlength", HLE_Misc::SMB_evil_vec_setlength },
|
//{ ".evil_vec_setlength", HLE_Misc::SMB_evil_vec_setlength },
|
||||||
{ ".evil_vec_something", HLE_Misc::FZero_evil_vec_normalize },
|
//{ ".evil_vec_something", HLE_Misc::FZero_evil_vec_normalize },
|
||||||
{ "PanicAlert", HLE_Misc::HLEPanicAlert },
|
{ "PanicAlert", HLE_Misc::HLEPanicAlert },
|
||||||
{ ".sqrt_internal_needs_cr1", HLE_Misc::SMB_sqrt_internal },
|
//{ ".sqrt_internal_needs_cr1", HLE_Misc::SMB_sqrt_internal },
|
||||||
{ ".rsqrt_internal_needs_cr1", HLE_Misc::SMB_rsqrt_internal },
|
//{ ".rsqrt_internal_needs_cr1", HLE_Misc::SMB_rsqrt_internal },
|
||||||
{ ".atan2", HLE_Misc::SMB_atan2},
|
//{ ".atan2", HLE_Misc::SMB_atan2},
|
||||||
{ ".sqrt_fz", HLE_Misc::FZ_sqrt},
|
//{ ".sqrt_fz", HLE_Misc::FZ_sqrt},
|
||||||
|
|
||||||
{ ".sqrt_internal_fz", HLE_Misc::FZ_sqrt_internal },
|
// F-zero still isn't working correctly, but these aren't really helping.
|
||||||
{ ".rsqrt_internal_fz", HLE_Misc::FZ_rsqrt_internal },
|
|
||||||
|
//{ ".sqrt_internal_fz", HLE_Misc::FZ_sqrt_internal },
|
||||||
|
//{ ".rsqrt_internal_fz", HLE_Misc::FZ_rsqrt_internal },
|
||||||
|
|
||||||
//{ ".kill_infinites", HLE_Misc::FZero_kill_infinites },
|
//{ ".kill_infinites", HLE_Misc::FZero_kill_infinites },
|
||||||
// special
|
// special
|
||||||
|
|
|
@ -110,7 +110,12 @@ void SMB_EvilNormalize()
|
||||||
float x = F(r3);
|
float x = F(r3);
|
||||||
float y = F(r3 + 4);
|
float y = F(r3 + 4);
|
||||||
float z = F(r3 + 8);
|
float z = F(r3 + 8);
|
||||||
float inv_len = 1.0f / sqrtf(x*x + y*y + z*z);
|
float len = x*x + y*y + z*z;
|
||||||
|
float inv_len;
|
||||||
|
if (len <= 0)
|
||||||
|
inv_len = 0;
|
||||||
|
else
|
||||||
|
inv_len = 1.0f / sqrtf(len);
|
||||||
x *= inv_len;
|
x *= inv_len;
|
||||||
y *= inv_len;
|
y *= inv_len;
|
||||||
z *= inv_len;
|
z *= inv_len;
|
||||||
|
|
|
@ -100,6 +100,9 @@ public:
|
||||||
static void Init();
|
static void Init();
|
||||||
static void DoState(PointerWrap &p);
|
static void DoState(PointerWrap &p);
|
||||||
|
|
||||||
|
static u32 GetMask() { return m_InterruptMask; }
|
||||||
|
static u32 GetCause() { return m_InterruptCause; }
|
||||||
|
|
||||||
static void SetInterrupt(InterruptCause _causemask, bool _bSet=true);
|
static void SetInterrupt(InterruptCause _causemask, bool _bSet=true);
|
||||||
|
|
||||||
// Read32
|
// Read32
|
||||||
|
|
|
@ -71,67 +71,25 @@ namespace Interpreter
|
||||||
void UpdateFPSCR(UReg_FPSCR fp);
|
void UpdateFPSCR(UReg_FPSCR fp);
|
||||||
void UpdateSSEState();
|
void UpdateSSEState();
|
||||||
|
|
||||||
void UpdateFPRF(double value)
|
|
||||||
|
// start of unit test - Dolphin needs more of these!
|
||||||
|
/*
|
||||||
|
void TestFPRF()
|
||||||
{
|
{
|
||||||
u64 ivalue = *((u64*)&value);
|
UpdateFPRF(1.0);
|
||||||
// 5 bits (C, <, >, =, ?)
|
if (FPSCR.FPRF != 0x4)
|
||||||
// top: class descriptor
|
PanicAlert("Error 1");
|
||||||
FPSCR.FPRF = 4;
|
UpdateFPRF(-1.0);
|
||||||
// easy cases first
|
if (FPSCR.FPRF != 0x8)
|
||||||
if (ivalue == 0) {
|
PanicAlert("Error 2");
|
||||||
// positive zero
|
PanicAlert("Test done");
|
||||||
FPSCR.FPRF = 0x2;
|
}*/
|
||||||
} else if (ivalue == 0x8000000000000000ULL) {
|
|
||||||
// negative zero
|
|
||||||
FPSCR.FPRF = 0x12;
|
|
||||||
} else if (ivalue == 0x7FF0000000000000ULL) {
|
|
||||||
// positive inf
|
|
||||||
FPSCR.FPRF = 0x5;
|
|
||||||
} else if (ivalue == 0xFFF0000000000000ULL) {
|
|
||||||
// negative inf
|
|
||||||
FPSCR.FPRF = 0x9;
|
|
||||||
} else {
|
|
||||||
// OK let's dissect this thing.
|
|
||||||
int sign = (int)(ivalue >> 63);
|
|
||||||
int exp = (int)((ivalue >> 52) & 0x7FF);
|
|
||||||
if (exp >= 1 && exp <= 2046) {
|
|
||||||
// Nice normalized number.
|
|
||||||
if (sign) {
|
|
||||||
FPSCR.FPRF = 0x8; // negative
|
|
||||||
} else {
|
|
||||||
FPSCR.FPRF = 0x4; // positive
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
u64 mantissa = ivalue & 0x000FFFFFFFFFFFFFULL;
|
|
||||||
// int mantissa_top = (int)(mantissa >> 51);
|
|
||||||
if (exp == 0 && mantissa) {
|
|
||||||
// Denormalized number.
|
|
||||||
if (sign) {
|
|
||||||
FPSCR.FPRF = 0x18;
|
|
||||||
} else {
|
|
||||||
FPSCR.FPRF = 0x14;
|
|
||||||
}
|
|
||||||
} else if (exp == 0x7FF && mantissa /* && mantissa_top*/) {
|
|
||||||
FPSCR.FPRF = 0x11; // Quiet NAN
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// extremely rare
|
// extremely rare
|
||||||
void Helper_UpdateCR1(double _fValue)
|
void Helper_UpdateCR1(double _fValue)
|
||||||
{
|
{
|
||||||
FPSCR.FPRF = 0;
|
// Should just update exception flags, not do any compares.
|
||||||
if (_fValue == 0.0 || _fValue == -0.0)
|
|
||||||
FPSCR.FPRF |= 2;
|
|
||||||
if (_fValue > 0.0)
|
|
||||||
FPSCR.FPRF |= 4;
|
|
||||||
if (_fValue < 0.0)
|
|
||||||
FPSCR.FPRF |= 8;
|
|
||||||
SetCRField(1, (FPSCR.Hex & 0x0000F000) >> 12);
|
|
||||||
|
|
||||||
PanicAlert("CR1");
|
PanicAlert("CR1");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -218,7 +176,7 @@ void fcmpu(UGeckoInstruction _inst)
|
||||||
// Apply current rounding mode
|
// Apply current rounding mode
|
||||||
void fctiwx(UGeckoInstruction _inst)
|
void fctiwx(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
UpdateSSEState();
|
//UpdateSSEState();
|
||||||
const double b = rPS0(_inst.FB);
|
const double b = rPS0(_inst.FB);
|
||||||
u32 value;
|
u32 value;
|
||||||
if (b > (double)0x7fffffff)
|
if (b > (double)0x7fffffff)
|
||||||
|
@ -257,7 +215,7 @@ largest representable int on PowerPC. */
|
||||||
// Always round toward zero
|
// Always round toward zero
|
||||||
void fctiwzx(UGeckoInstruction _inst)
|
void fctiwzx(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
//UpdateFPSCR(FPSCR);
|
//UpdateSSEState();
|
||||||
const double b = rPS0(_inst.FB);
|
const double b = rPS0(_inst.FB);
|
||||||
u32 value;
|
u32 value;
|
||||||
if (b > (double)0x7fffffff)
|
if (b > (double)0x7fffffff)
|
||||||
|
@ -279,7 +237,6 @@ void fctiwzx(UGeckoInstruction _inst)
|
||||||
// FPSCR.XX |= FPSCR.FI;
|
// FPSCR.XX |= FPSCR.FI;
|
||||||
// FPSCR.FR = 1; //fabs(d_value) > fabs(b);
|
// FPSCR.FR = 1; //fabs(d_value) > fabs(b);
|
||||||
}
|
}
|
||||||
//FPRF undefined
|
|
||||||
|
|
||||||
riPS0(_inst.FD) = (u64)value;
|
riPS0(_inst.FD) = (u64)value;
|
||||||
if (_inst.Rc)
|
if (_inst.Rc)
|
||||||
|
@ -305,7 +262,7 @@ void fnabsx(UGeckoInstruction _inst)
|
||||||
riPS0(_inst.FD) = riPS0(_inst.FB) | (1ULL << 63);
|
riPS0(_inst.FD) = riPS0(_inst.FB) | (1ULL << 63);
|
||||||
// This is a binary instruction. Does not alter FPSCR
|
// This is a binary instruction. Does not alter FPSCR
|
||||||
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
||||||
}
|
}
|
||||||
|
|
||||||
void fnegx(UGeckoInstruction _inst)
|
void fnegx(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
|
@ -331,11 +288,12 @@ void frspx(UGeckoInstruction _inst) // round to single
|
||||||
if (true || FPSCR.RN != 0)
|
if (true || FPSCR.RN != 0)
|
||||||
{
|
{
|
||||||
// Not used in Super Monkey Ball
|
// Not used in Super Monkey Ball
|
||||||
UpdateSSEState();
|
// UpdateSSEState();
|
||||||
double b = rPS0(_inst.FB);
|
double b = rPS0(_inst.FB);
|
||||||
double rounded = (double)(float)b;
|
double rounded = (double)(float)b;
|
||||||
FPSCR.FI = b != rounded; // changing both of these affect Super Monkey Ball behaviour greatly.
|
//FPSCR.FI = b != rounded; // changing both of these affect Super Monkey Ball behaviour greatly.
|
||||||
FPSCR.FR = 1; // WHY? fabs(rounded) > fabs(b);
|
if (Core::g_CoreStartupParameter.bEnableFPRF)
|
||||||
|
UpdateFPRF(rounded);
|
||||||
rPS0(_inst.FD) = rPS1(_inst.FD) = rounded;
|
rPS0(_inst.FD) = rPS1(_inst.FD) = rounded;
|
||||||
return;
|
return;
|
||||||
// PanicAlert("frspx: FPSCR.RN=%i", FPSCR.RN);
|
// PanicAlert("frspx: FPSCR.RN=%i", FPSCR.RN);
|
||||||
|
@ -389,8 +347,8 @@ void frspx(UGeckoInstruction _inst) // round to single
|
||||||
//PanicAlert("NAN %08x %08x", in.i >> 32, in.i);
|
//PanicAlert("NAN %08x %08x", in.i >> 32, in.i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
UpdateFPRF(out.d);
|
UpdateFPRF(out.d);
|
||||||
FPSCR.FR = 1; // SUPER MONKEY BALL HACK
|
|
||||||
rPS0(_inst.FD) = rPS1(_inst.FD) = out.d;
|
rPS0(_inst.FD) = rPS1(_inst.FD) = out.d;
|
||||||
|
|
||||||
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
||||||
|
@ -416,19 +374,19 @@ void fmulsx(UGeckoInstruction _inst)
|
||||||
|
|
||||||
void fmaddx(UGeckoInstruction _inst)
|
void fmaddx(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
rPS0(_inst.FD) = (rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB);
|
double result = (rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB);
|
||||||
FPSCR.FI = 0;
|
rPS0(_inst.FD) = result;
|
||||||
FPSCR.FR = 0;
|
UpdateFPRF(result);
|
||||||
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
||||||
}
|
}
|
||||||
|
|
||||||
void fmaddsx(UGeckoInstruction _inst)
|
void fmaddsx(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
double d_value = (rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB);
|
double d_value = (rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB);
|
||||||
rPS0(_inst.FD) = rPS1(_inst.FD) =
|
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(d_value);
|
||||||
static_cast<float>(d_value);
|
|
||||||
FPSCR.FI = d_value != rPS0(_inst.FD);
|
FPSCR.FI = d_value != rPS0(_inst.FD);
|
||||||
FPSCR.FR = 0;
|
FPSCR.FR = 0;
|
||||||
UpdateFPRF(rPS0(_inst.FD));
|
UpdateFPRF(rPS0(_inst.FD));
|
||||||
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -436,16 +394,11 @@ void fmaddsx(UGeckoInstruction _inst)
|
||||||
void faddx(UGeckoInstruction _inst)
|
void faddx(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
rPS0(_inst.FD) = rPS0(_inst.FA) + rPS0(_inst.FB);
|
rPS0(_inst.FD) = rPS0(_inst.FA) + rPS0(_inst.FB);
|
||||||
// FPSCR.FI = 0;
|
|
||||||
// FPSCR.FR = 1;
|
|
||||||
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
||||||
}
|
}
|
||||||
void faddsx(UGeckoInstruction _inst)
|
void faddsx(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) + rPS0(_inst.FB));
|
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) + rPS0(_inst.FB));
|
||||||
// FPSCR.FI = 0;
|
|
||||||
// FPSCR.FR = 1;
|
|
||||||
// FPSCR.Hex = (rand() ^ (rand() << 8) ^ (rand() << 16)) & ~(0x000000F8);
|
|
||||||
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -453,8 +406,6 @@ void faddsx(UGeckoInstruction _inst)
|
||||||
void fdivx(UGeckoInstruction _inst)
|
void fdivx(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
rPS0(_inst.FD) = rPS0(_inst.FA) / rPS0(_inst.FB);
|
rPS0(_inst.FD) = rPS0(_inst.FA) / rPS0(_inst.FB);
|
||||||
// FPSCR.FI = 0;
|
|
||||||
// FPSCR.FR = 1;
|
|
||||||
if (fabs(rPS0(_inst.FB)) == 0.0) {
|
if (fabs(rPS0(_inst.FB)) == 0.0) {
|
||||||
FPSCR.ZX = 1;
|
FPSCR.ZX = 1;
|
||||||
}
|
}
|
||||||
|
@ -463,8 +414,6 @@ void fdivx(UGeckoInstruction _inst)
|
||||||
void fdivsx(UGeckoInstruction _inst)
|
void fdivsx(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) / rPS0(_inst.FB));
|
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) / rPS0(_inst.FB));
|
||||||
// FPSCR.FI = 0;
|
|
||||||
// FPSCR.FR = 1;
|
|
||||||
if (fabs(rPS0(_inst.FB)) == 0.0) {
|
if (fabs(rPS0(_inst.FB)) == 0.0) {
|
||||||
FPSCR.ZX = 1;
|
FPSCR.ZX = 1;
|
||||||
}
|
}
|
||||||
|
@ -473,8 +422,6 @@ void fdivsx(UGeckoInstruction _inst)
|
||||||
void fresx(UGeckoInstruction _inst)
|
void fresx(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(1.0f / rPS0(_inst.FB));
|
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(1.0f / rPS0(_inst.FB));
|
||||||
// FPSCR.FI = 0;
|
|
||||||
// FPSCR.FR = 1;
|
|
||||||
if (fabs(rPS0(_inst.FB)) == 0.0) {
|
if (fabs(rPS0(_inst.FB)) == 0.0) {
|
||||||
FPSCR.ZX = 1;
|
FPSCR.ZX = 1;
|
||||||
}
|
}
|
||||||
|
@ -485,8 +432,6 @@ void fresx(UGeckoInstruction _inst)
|
||||||
void fmsubx(UGeckoInstruction _inst)
|
void fmsubx(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
rPS0(_inst.FD) = (rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB);
|
rPS0(_inst.FD) = (rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB);
|
||||||
// FPSCR.FI = 0;
|
|
||||||
// FPSCR.FR = 0;
|
|
||||||
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -494,8 +439,6 @@ void fmsubsx(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
rPS0(_inst.FD) = rPS1(_inst.FD) =
|
rPS0(_inst.FD) = rPS1(_inst.FD) =
|
||||||
static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
|
static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
|
||||||
// FPSCR.FI = 0;
|
|
||||||
// FPSCR.FR = 0;
|
|
||||||
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -503,16 +446,12 @@ void fmsubsx(UGeckoInstruction _inst)
|
||||||
void fnmaddx(UGeckoInstruction _inst)
|
void fnmaddx(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB));
|
rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB));
|
||||||
// FPSCR.FI = 0;
|
|
||||||
// FPSCR.FR = 0;
|
|
||||||
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
||||||
}
|
}
|
||||||
void fnmaddsx(UGeckoInstruction _inst)
|
void fnmaddsx(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
rPS0(_inst.FD) = rPS1(_inst.FD) =
|
rPS0(_inst.FD) = rPS1(_inst.FD) =
|
||||||
static_cast<float>(-((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB)));
|
static_cast<float>(-((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB)));
|
||||||
// FPSCR.FI = 0;
|
|
||||||
// FPSCR.FR = 0;
|
|
||||||
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -520,16 +459,12 @@ void fnmaddsx(UGeckoInstruction _inst)
|
||||||
void fnmsubx(UGeckoInstruction _inst)
|
void fnmsubx(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
|
rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
|
||||||
// FPSCR.FI = 0;
|
|
||||||
// FPSCR.FR = 0;
|
|
||||||
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
||||||
}
|
}
|
||||||
void fnmsubsx(UGeckoInstruction _inst)
|
void fnmsubsx(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
rPS0(_inst.FD) = rPS1(_inst.FD) =
|
rPS0(_inst.FD) = rPS1(_inst.FD) =
|
||||||
static_cast<float>(-((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB)));
|
static_cast<float>(-((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB)));
|
||||||
// FPSCR.FI = 0;
|
|
||||||
// FPSCR.FR = 0;
|
|
||||||
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -537,15 +472,11 @@ void fnmsubsx(UGeckoInstruction _inst)
|
||||||
void fsubx(UGeckoInstruction _inst)
|
void fsubx(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
rPS0(_inst.FD) = rPS0(_inst.FA) - rPS0(_inst.FB);
|
rPS0(_inst.FD) = rPS0(_inst.FA) - rPS0(_inst.FB);
|
||||||
// FPSCR.FI = 0;
|
|
||||||
// FPSCR.FR = 0;
|
|
||||||
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
||||||
}
|
}
|
||||||
void fsubsx(UGeckoInstruction _inst)
|
void fsubsx(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) - rPS0(_inst.FB));
|
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) - rPS0(_inst.FB));
|
||||||
// FPSCR.FI = 0;
|
|
||||||
// FPSCR.FR = 0;
|
|
||||||
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -553,17 +484,12 @@ void fsubsx(UGeckoInstruction _inst)
|
||||||
void frsqrtex(UGeckoInstruction _inst)
|
void frsqrtex(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
rPS0(_inst.FD) = 1.0f / (sqrt(rPS0(_inst.FB)));
|
rPS0(_inst.FD) = 1.0f / (sqrt(rPS0(_inst.FB)));
|
||||||
// FPSCR.FI = 0;
|
|
||||||
// FPSCR.FR = 0;
|
|
||||||
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
||||||
}
|
}
|
||||||
|
|
||||||
void fsqrtx(UGeckoInstruction _inst)
|
void fsqrtx(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
rPS0(_inst.FD) = sqrt(rPS0(_inst.FB));
|
rPS0(_inst.FD) = sqrt(rPS0(_inst.FB));
|
||||||
// FPSCR.FI = 0;
|
|
||||||
// FPSCR.FR = 0;
|
|
||||||
|
|
||||||
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -130,7 +130,7 @@ void UpdateFPSCR(UReg_FPSCR fp)
|
||||||
|
|
||||||
void mcrfs(UGeckoInstruction _inst)
|
void mcrfs(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
u32 fpflags = ((FPSCR.Hex >> (4*(_inst.CRFS))) & 0xF);
|
u32 fpflags = ((FPSCR.Hex >> (4*(7 - _inst.CRFS))) & 0xF);
|
||||||
switch (_inst.CRFS) {
|
switch (_inst.CRFS) {
|
||||||
case 0:
|
case 0:
|
||||||
FPSCR.FX = 0;
|
FPSCR.FX = 0;
|
||||||
|
@ -216,7 +216,7 @@ void mtfsfx(UGeckoInstruction _inst)
|
||||||
u32 m = 0;
|
u32 m = 0;
|
||||||
for (int i = 0; i < 8; i++) { //7?? todo check
|
for (int i = 0; i < 8; i++) { //7?? todo check
|
||||||
if (fm & (1 << i))
|
if (fm & (1 << i))
|
||||||
m |= (0xf << (i*4));
|
m |= (0xF << (i*4));
|
||||||
}
|
}
|
||||||
|
|
||||||
FPSCR.Hex = (FPSCR.Hex & ~m) | ((u32)(riPS0(_inst.FB)) & m);
|
FPSCR.Hex = (FPSCR.Hex & ~m) | ((u32)(riPS0(_inst.FB)) & m);
|
||||||
|
@ -238,12 +238,15 @@ void mfcr(UGeckoInstruction _inst)
|
||||||
|
|
||||||
void mtcrf(UGeckoInstruction _inst)
|
void mtcrf(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
u32 mask = 0;
|
|
||||||
u32 crm = _inst.CRM;
|
u32 crm = _inst.CRM;
|
||||||
if (crm == 0xFF) {
|
if (crm == 0xFF)
|
||||||
|
{
|
||||||
SetCR(m_GPR[_inst.RS]);
|
SetCR(m_GPR[_inst.RS]);
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
//TODO: use lookup table? probably not worth it
|
//TODO: use lookup table? probably not worth it
|
||||||
|
u32 mask = 0;
|
||||||
for (int i = 0; i < 8; i++) {
|
for (int i = 0; i < 8; i++) {
|
||||||
if (crm & (1 << i))
|
if (crm & (1 << i))
|
||||||
mask |= 0xF << (i*4);
|
mask |= 0xF << (i*4);
|
||||||
|
@ -470,10 +473,8 @@ void crxor(UGeckoInstruction _inst)
|
||||||
|
|
||||||
void mcrf(UGeckoInstruction _inst)
|
void mcrf(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
u32 cr = GetCR();
|
int cr_f = GetCRField(_inst.CRFS);
|
||||||
u32 crmask = ~(0xF0000000 >> (4*_inst.CRFD));
|
SetCRField(_inst.CRFD, cr_f);
|
||||||
u32 flags = ((cr << (4*_inst.CRFS)) & 0xF0000000) >> (4*_inst.CRFD);
|
|
||||||
SetCR((cr & crmask) | flags);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void isync(UGeckoInstruction _inst)
|
void isync(UGeckoInstruction _inst)
|
||||||
|
|
|
@ -25,196 +25,209 @@
|
||||||
#include "Jit.h"
|
#include "Jit.h"
|
||||||
#include "JitRegCache.h"
|
#include "JitRegCache.h"
|
||||||
|
|
||||||
const u64 GC_ALIGNED16(psSignBits2[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL};
|
const u64 GC_ALIGNED16(psSignBits2[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL};
|
||||||
const u64 GC_ALIGNED16(psAbsMask2[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
|
const u64 GC_ALIGNED16(psAbsMask2[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
|
||||||
const double GC_ALIGNED16(psOneOne2[2]) = {1.0, 1.0};
|
const double GC_ALIGNED16(psOneOne2[2]) = {1.0, 1.0};
|
||||||
|
|
||||||
void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg))
|
void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg))
|
||||||
|
{
|
||||||
|
fpr.Lock(d, a, b);
|
||||||
|
if (d == a)
|
||||||
{
|
{
|
||||||
fpr.Lock(d, a, b);
|
fpr.LoadToX64(d, true);
|
||||||
if (d == a)
|
(this->*op)(fpr.RX(d), fpr.R(b));
|
||||||
{
|
|
||||||
fpr.LoadToX64(d, true);
|
|
||||||
(this->*op)(fpr.RX(d), fpr.R(b));
|
|
||||||
}
|
|
||||||
else if (d == b && reversible)
|
|
||||||
{
|
|
||||||
fpr.LoadToX64(d, true);
|
|
||||||
(this->*op)(fpr.RX(d), fpr.R(a));
|
|
||||||
}
|
|
||||||
else if (a != d && b != d)
|
|
||||||
{
|
|
||||||
// Sources different from d, can use rather quick solution
|
|
||||||
fpr.LoadToX64(d, !dupe);
|
|
||||||
MOVSD(fpr.RX(d), fpr.R(a));
|
|
||||||
(this->*op)(fpr.RX(d), fpr.R(b));
|
|
||||||
}
|
|
||||||
else if (b != d)
|
|
||||||
{
|
|
||||||
fpr.LoadToX64(d, !dupe);
|
|
||||||
MOVSD(XMM0, fpr.R(b));
|
|
||||||
MOVSD(fpr.RX(d), fpr.R(a));
|
|
||||||
(this->*op)(fpr.RX(d), Gen::R(XMM0));
|
|
||||||
}
|
|
||||||
else // Other combo, must use two temps :(
|
|
||||||
{
|
|
||||||
MOVSD(XMM0, fpr.R(a));
|
|
||||||
MOVSD(XMM1, fpr.R(b));
|
|
||||||
fpr.LoadToX64(d, !dupe);
|
|
||||||
(this->*op)(XMM0, Gen::R(XMM1));
|
|
||||||
MOVSD(fpr.RX(d), Gen::R(XMM0));
|
|
||||||
}
|
|
||||||
if (dupe) {
|
|
||||||
ForceSinglePrecisionS(fpr.RX(d));
|
|
||||||
MOVDDUP(fpr.RX(d), fpr.R(d));
|
|
||||||
}
|
|
||||||
fpr.UnlockAll();
|
|
||||||
}
|
}
|
||||||
|
else if (d == b && reversible)
|
||||||
void Jit64::fp_arith_s(UGeckoInstruction inst)
|
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff)
|
fpr.LoadToX64(d, true);
|
||||||
{Default(inst); return;} // turn off from debugger
|
(this->*op)(fpr.RX(d), fpr.R(a));
|
||||||
INSTRUCTION_START;
|
|
||||||
if (inst.Rc) {
|
|
||||||
Default(inst); return;
|
|
||||||
}
|
|
||||||
bool dupe = inst.OPCD == 59;
|
|
||||||
switch (inst.SUBOP5)
|
|
||||||
{
|
|
||||||
case 18: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &XEmitter::DIVSD); break; //div
|
|
||||||
case 20: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &XEmitter::SUBSD); break; //sub
|
|
||||||
case 21: fp_tri_op(inst.FD, inst.FA, inst.FB, true, dupe, &XEmitter::ADDSD); break; //add
|
|
||||||
case 23: //sel
|
|
||||||
Default(inst);
|
|
||||||
break;
|
|
||||||
case 24: //res
|
|
||||||
Default(inst);
|
|
||||||
break;
|
|
||||||
case 25: fp_tri_op(inst.FD, inst.FA, inst.FC, true, dupe, &XEmitter::MULSD); break; //mul
|
|
||||||
default:
|
|
||||||
_assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
else if (a != d && b != d)
|
||||||
void Jit64::fmaddXX(UGeckoInstruction inst)
|
{
|
||||||
|
// Sources different from d, can use rather quick solution
|
||||||
|
fpr.LoadToX64(d, !dupe);
|
||||||
|
MOVSD(fpr.RX(d), fpr.R(a));
|
||||||
|
(this->*op)(fpr.RX(d), fpr.R(b));
|
||||||
|
}
|
||||||
|
else if (b != d)
|
||||||
|
{
|
||||||
|
fpr.LoadToX64(d, !dupe);
|
||||||
|
MOVSD(XMM0, fpr.R(b));
|
||||||
|
MOVSD(fpr.RX(d), fpr.R(a));
|
||||||
|
(this->*op)(fpr.RX(d), Gen::R(XMM0));
|
||||||
|
}
|
||||||
|
else // Other combo, must use two temps :(
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff)
|
|
||||||
{Default(inst); return;} // turn off from debugger
|
|
||||||
INSTRUCTION_START;
|
|
||||||
if (inst.Rc) {
|
|
||||||
Default(inst); return;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool single_precision = inst.OPCD == 59;
|
|
||||||
|
|
||||||
int a = inst.FA;
|
|
||||||
int b = inst.FB;
|
|
||||||
int c = inst.FC;
|
|
||||||
int d = inst.FD;
|
|
||||||
|
|
||||||
fpr.Lock(a, b, c, d);
|
|
||||||
MOVSD(XMM0, fpr.R(a));
|
MOVSD(XMM0, fpr.R(a));
|
||||||
switch (inst.SUBOP5)
|
MOVSD(XMM1, fpr.R(b));
|
||||||
{
|
fpr.LoadToX64(d, !dupe);
|
||||||
case 28: //msub
|
(this->*op)(XMM0, Gen::R(XMM1));
|
||||||
MULSD(XMM0, fpr.R(c));
|
MOVSD(fpr.RX(d), Gen::R(XMM0));
|
||||||
SUBSD(XMM0, fpr.R(b));
|
|
||||||
break;
|
|
||||||
case 29: //madd
|
|
||||||
MULSD(XMM0, fpr.R(c));
|
|
||||||
ADDSD(XMM0, fpr.R(b));
|
|
||||||
break;
|
|
||||||
case 30: //nmsub
|
|
||||||
MULSD(XMM0, fpr.R(c));
|
|
||||||
SUBSD(XMM0, fpr.R(b));
|
|
||||||
XORPD(XMM0, M((void*)&psSignBits2));
|
|
||||||
break;
|
|
||||||
case 31: //nmadd
|
|
||||||
MULSD(XMM0, fpr.R(c));
|
|
||||||
ADDSD(XMM0, fpr.R(b));
|
|
||||||
XORPD(XMM0, M((void*)&psSignBits2));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
fpr.LoadToX64(d, false);
|
|
||||||
//YES it is necessary to dupe the result :(
|
|
||||||
//TODO : analysis - does the top reg get used? If so, dupe, if not, don't.
|
|
||||||
if (single_precision) {
|
|
||||||
ForceSinglePrecisionS(XMM0);
|
|
||||||
MOVDDUP(fpr.RX(d), R(XMM0));
|
|
||||||
} else {
|
|
||||||
MOVSD(fpr.RX(d), R(XMM0));
|
|
||||||
}
|
|
||||||
fpr.UnlockAll();
|
|
||||||
}
|
}
|
||||||
|
if (dupe) {
|
||||||
void Jit64::fmrx(UGeckoInstruction inst)
|
ForceSinglePrecisionS(fpr.RX(d));
|
||||||
{
|
MOVDDUP(fpr.RX(d), fpr.R(d));
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff)
|
}
|
||||||
{Default(inst); return;} // turn off from debugger
|
fpr.UnlockAll();
|
||||||
INSTRUCTION_START;
|
}
|
||||||
if (inst.Rc) {
|
|
||||||
Default(inst); return;
|
void Jit64::fp_arith_s(UGeckoInstruction inst)
|
||||||
}
|
{
|
||||||
int d = inst.FD;
|
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff)
|
||||||
int b = inst.FB;
|
{Default(inst); return;} // turn off from debugger
|
||||||
fpr.LoadToX64(d, true); // we don't want to destroy the high bit
|
INSTRUCTION_START;
|
||||||
MOVSD(fpr.RX(d), fpr.R(b));
|
if (inst.Rc) {
|
||||||
|
Default(inst); return;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::fcmpx(UGeckoInstruction inst)
|
// Only the interpreter has "proper" support for (some) FP flags
|
||||||
|
if (inst.SUBOP5 == 25 && Core::g_CoreStartupParameter.bEnableFPRF) {
|
||||||
|
Default(inst); return;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool dupe = inst.OPCD == 59;
|
||||||
|
switch (inst.SUBOP5)
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff)
|
case 18: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &XEmitter::DIVSD); break; //div
|
||||||
{Default(inst); return;} // turn off from debugger
|
case 20: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &XEmitter::SUBSD); break; //sub
|
||||||
INSTRUCTION_START;
|
case 21: fp_tri_op(inst.FD, inst.FA, inst.FB, true, dupe, &XEmitter::ADDSD); break; //add
|
||||||
if (jo.fpAccurateFlags)
|
case 23: //sel
|
||||||
{
|
Default(inst);
|
||||||
Default(inst);
|
break;
|
||||||
return;
|
case 24: //res
|
||||||
}
|
Default(inst);
|
||||||
bool ordered = inst.SUBOP10 == 32;
|
break;
|
||||||
/*
|
case 25: fp_tri_op(inst.FD, inst.FA, inst.FC, true, dupe, &XEmitter::MULSD); break; //mul
|
||||||
double fa = rPS0(_inst.FA);
|
default:
|
||||||
double fb = rPS0(_inst.FB);
|
_assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!");
|
||||||
u32 compareResult;
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if(IsNAN(fa) || IsNAN(fb)) compareResult = 1;
|
void Jit64::fmaddXX(UGeckoInstruction inst)
|
||||||
else if(fa < fb) compareResult = 8;
|
{
|
||||||
else if(fa > fb) compareResult = 4;
|
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff)
|
||||||
else compareResult = 2;
|
{Default(inst); return;} // turn off from debugger
|
||||||
|
INSTRUCTION_START;
|
||||||
|
if (inst.Rc) {
|
||||||
|
Default(inst); return;
|
||||||
|
}
|
||||||
|
// Only the interpreter has "proper" support for (some) FP flags
|
||||||
|
if (inst.SUBOP5 == 29 && Core::g_CoreStartupParameter.bEnableFPRF) {
|
||||||
|
Default(inst); return;
|
||||||
|
}
|
||||||
|
|
||||||
FPSCR.FPRF = compareResult;
|
bool single_precision = inst.OPCD == 59;
|
||||||
CR = (CR & (~(0xf0000000 >> (_inst.CRFD * 4)))) | (compareResult << ((7 - _inst.CRFD) * 4));
|
|
||||||
|
int a = inst.FA;
|
||||||
|
int b = inst.FB;
|
||||||
|
int c = inst.FC;
|
||||||
|
int d = inst.FD;
|
||||||
|
|
||||||
|
fpr.Lock(a, b, c, d);
|
||||||
|
MOVSD(XMM0, fpr.R(a));
|
||||||
|
switch (inst.SUBOP5)
|
||||||
|
{
|
||||||
|
case 28: //msub
|
||||||
|
MULSD(XMM0, fpr.R(c));
|
||||||
|
SUBSD(XMM0, fpr.R(b));
|
||||||
|
break;
|
||||||
|
case 29: //madd
|
||||||
|
MULSD(XMM0, fpr.R(c));
|
||||||
|
ADDSD(XMM0, fpr.R(b));
|
||||||
|
break;
|
||||||
|
case 30: //nmsub
|
||||||
|
MULSD(XMM0, fpr.R(c));
|
||||||
|
SUBSD(XMM0, fpr.R(b));
|
||||||
|
XORPD(XMM0, M((void*)&psSignBits2));
|
||||||
|
break;
|
||||||
|
case 31: //nmadd
|
||||||
|
MULSD(XMM0, fpr.R(c));
|
||||||
|
ADDSD(XMM0, fpr.R(b));
|
||||||
|
XORPD(XMM0, M((void*)&psSignBits2));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
fpr.LoadToX64(d, false);
|
||||||
|
//YES it is necessary to dupe the result :(
|
||||||
|
//TODO : analysis - does the top reg get used? If so, dupe, if not, don't.
|
||||||
|
if (single_precision) {
|
||||||
|
ForceSinglePrecisionS(XMM0);
|
||||||
|
MOVDDUP(fpr.RX(d), R(XMM0));
|
||||||
|
} else {
|
||||||
|
MOVSD(fpr.RX(d), R(XMM0));
|
||||||
|
}
|
||||||
|
// SMB checks flags after this op. Let's lie.
|
||||||
|
//AND(32, M(&PowerPC::ppcState.fpscr), Imm32(~((0x80000000 >> 19) | (0x80000000 >> 15))));
|
||||||
|
//OR(32, M(&PowerPC::ppcState.fpscr), Imm32((0x80000000 >> 16)));
|
||||||
|
fpr.UnlockAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Jit64::fmrx(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff)
|
||||||
|
{Default(inst); return;} // turn off from debugger
|
||||||
|
INSTRUCTION_START;
|
||||||
|
if (inst.Rc) {
|
||||||
|
Default(inst); return;
|
||||||
|
}
|
||||||
|
int d = inst.FD;
|
||||||
|
int b = inst.FB;
|
||||||
|
fpr.LoadToX64(d, true); // we don't want to destroy the high bit
|
||||||
|
MOVSD(fpr.RX(d), fpr.R(b));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Jit64::fcmpx(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff)
|
||||||
|
{Default(inst); return;} // turn off from debugger
|
||||||
|
INSTRUCTION_START;
|
||||||
|
if (jo.fpAccurateFlags)
|
||||||
|
{
|
||||||
|
Default(inst);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
bool ordered = inst.SUBOP10 == 32;
|
||||||
|
/*
|
||||||
|
double fa = rPS0(_inst.FA);
|
||||||
|
double fb = rPS0(_inst.FB);
|
||||||
|
u32 compareResult;
|
||||||
|
|
||||||
|
if(IsNAN(fa) || IsNAN(fb)) compareResult = 1;
|
||||||
|
else if(fa < fb) compareResult = 8;
|
||||||
|
else if(fa > fb) compareResult = 4;
|
||||||
|
else compareResult = 2;
|
||||||
|
|
||||||
|
FPSCR.FPRF = compareResult;
|
||||||
|
CR = (CR & (~(0xf0000000 >> (_inst.CRFD * 4)))) | (compareResult << ((7 - _inst.CRFD) * 4));
|
||||||
*/
|
*/
|
||||||
int a = inst.FA;
|
int a = inst.FA;
|
||||||
int b = inst.FB;
|
int b = inst.FB;
|
||||||
int crf = inst.CRFD;
|
int crf = inst.CRFD;
|
||||||
int shift = crf * 4;
|
int shift = crf * 4;
|
||||||
//FPSCR
|
//FPSCR
|
||||||
//XOR(32,R(EAX),R(EAX));
|
//XOR(32,R(EAX),R(EAX));
|
||||||
|
|
||||||
fpr.Lock(a,b);
|
fpr.Lock(a,b);
|
||||||
if (a != b)
|
if (a != b)
|
||||||
fpr.LoadToX64(a, true);
|
fpr.LoadToX64(a, true);
|
||||||
|
|
||||||
// USES_CR
|
// USES_CR
|
||||||
if (ordered)
|
if (ordered)
|
||||||
COMISD(fpr.R(a).GetSimpleReg(), fpr.R(b));
|
COMISD(fpr.R(a).GetSimpleReg(), fpr.R(b));
|
||||||
else
|
else
|
||||||
UCOMISD(fpr.R(a).GetSimpleReg(), fpr.R(b));
|
UCOMISD(fpr.R(a).GetSimpleReg(), fpr.R(b));
|
||||||
FixupBranch pLesser = J_CC(CC_B);
|
FixupBranch pLesser = J_CC(CC_B);
|
||||||
FixupBranch pGreater = J_CC(CC_A);
|
FixupBranch pGreater = J_CC(CC_A);
|
||||||
// _x86Reg == 0
|
// _x86Reg == 0
|
||||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2));
|
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2));
|
||||||
FixupBranch continue1 = J();
|
FixupBranch continue1 = J();
|
||||||
// _x86Reg > 0
|
// _x86Reg > 0
|
||||||
SetJumpTarget(pGreater);
|
SetJumpTarget(pGreater);
|
||||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4));
|
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4));
|
||||||
FixupBranch continue2 = J();
|
FixupBranch continue2 = J();
|
||||||
// _x86Reg < 0
|
// _x86Reg < 0
|
||||||
SetJumpTarget(pLesser);
|
SetJumpTarget(pLesser);
|
||||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8));
|
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8));
|
||||||
SetJumpTarget(continue1);
|
SetJumpTarget(continue1);
|
||||||
SetJumpTarget(continue2);
|
SetJumpTarget(continue2);
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
|
|
@ -1878,7 +1878,7 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile, bool Mak
|
||||||
case StorePaired: {
|
case StorePaired: {
|
||||||
regSpill(RI, EAX);
|
regSpill(RI, EAX);
|
||||||
regSpill(RI, EDX);
|
regSpill(RI, EDX);
|
||||||
unsigned quantreg = *I >> 24;
|
u32 quantreg = *I >> 24;
|
||||||
Jit->MOVZX(32, 16, EAX, M(&PowerPC::ppcState.spr[SPR_GQR0 + quantreg]));
|
Jit->MOVZX(32, 16, EAX, M(&PowerPC::ppcState.spr[SPR_GQR0 + quantreg]));
|
||||||
Jit->MOVZX(32, 8, EDX, R(AL));
|
Jit->MOVZX(32, 8, EDX, R(AL));
|
||||||
// FIXME: Fix ModR/M encoding to allow [EDX*4+disp32]!
|
// FIXME: Fix ModR/M encoding to allow [EDX*4+disp32]!
|
||||||
|
|
|
@ -39,8 +39,6 @@
|
||||||
//#define INSTRUCTION_START Default(inst); return;
|
//#define INSTRUCTION_START Default(inst); return;
|
||||||
#define INSTRUCTION_START
|
#define INSTRUCTION_START
|
||||||
|
|
||||||
// The big problem is likely instructions that set the quantizers in the same block.
|
|
||||||
// We will have to break block after quantizers are written to.
|
|
||||||
void Jit64::psq_st(UGeckoInstruction inst)
|
void Jit64::psq_st(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
|
|
|
@ -687,6 +687,7 @@ void InitTables()
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OPLOG
|
#define OPLOG
|
||||||
|
#define OP_TO_LOG "mcrfs"
|
||||||
|
|
||||||
#ifdef OPLOG
|
#ifdef OPLOG
|
||||||
namespace {
|
namespace {
|
||||||
|
@ -700,7 +701,7 @@ void CompileInstruction(UGeckoInstruction _inst)
|
||||||
GekkoOPInfo *info = GetOpInfo(_inst);
|
GekkoOPInfo *info = GetOpInfo(_inst);
|
||||||
if (info) {
|
if (info) {
|
||||||
#ifdef OPLOG
|
#ifdef OPLOG
|
||||||
if (!strcmp(info->opname, "mffsx")) { ///"mcrfs"
|
if (!strcmp(info->opname, OP_TO_LOG)){ ///"mcrfs"
|
||||||
rsplocations.push_back(jit.js.compilerPC);
|
rsplocations.push_back(jit.js.compilerPC);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -764,9 +765,9 @@ void LogCompiledInstructions()
|
||||||
}
|
}
|
||||||
fclose(f);
|
fclose(f);
|
||||||
#ifdef OPLOG
|
#ifdef OPLOG
|
||||||
f = fopen(StringFromFormat(FULL_LOGS_DIR "mcrfs_at.txt", time).c_str(), "w");
|
f = fopen(StringFromFormat(FULL_LOGS_DIR OP_TO_LOG "_at.txt", time).c_str(), "w");
|
||||||
for (size_t i = 0; i < rsplocations.size(); i++) {
|
for (size_t i = 0; i < rsplocations.size(); i++) {
|
||||||
fprintf(f, "mffsx: %08x\n", rsplocations[i]);
|
fprintf(f, OP_TO_LOG ": %08x\n", rsplocations[i]);
|
||||||
}
|
}
|
||||||
fclose(f);
|
fclose(f);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -43,20 +43,22 @@ static CoreMode mode;
|
||||||
|
|
||||||
void CompactCR()
|
void CompactCR()
|
||||||
{
|
{
|
||||||
ppcState.cr = 0;
|
u32 new_cr = ppcState.cr_fast[0] << 28;
|
||||||
for (int i = 0; i < 8; i++) {
|
for (int i = 1; i < 8; i++)
|
||||||
ppcState.cr |= ppcState.cr_fast[i] << (28 - i * 4);
|
{
|
||||||
|
new_cr |= ppcState.cr_fast[i] << (28 - i * 4);
|
||||||
}
|
}
|
||||||
|
ppcState.cr = new_cr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ExpandCR()
|
void ExpandCR()
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 8; i++) {
|
for (int i = 0; i < 8; i++)
|
||||||
|
{
|
||||||
ppcState.cr_fast[i] = (ppcState.cr >> (28 - i * 4)) & 0xF;
|
ppcState.cr_fast[i] = (ppcState.cr >> (28 - i * 4)) & 0xF;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void DoState(PointerWrap &p)
|
void DoState(PointerWrap &p)
|
||||||
{
|
{
|
||||||
p.Do(ppcState);
|
p.Do(ppcState);
|
||||||
|
@ -304,7 +306,7 @@ void CheckExceptions()
|
||||||
ppcState.Exceptions &= ~EXCEPTION_ALIGNMENT;
|
ppcState.Exceptions &= ~EXCEPTION_ALIGNMENT;
|
||||||
}
|
}
|
||||||
|
|
||||||
// EXTERNAL INTTERUPT
|
// EXTERNAL INTERRUPT
|
||||||
else if (MSR & 0x0008000) //hacky...the exception shouldn't be generated if EE isn't set...
|
else if (MSR & 0x0008000) //hacky...the exception shouldn't be generated if EE isn't set...
|
||||||
{
|
{
|
||||||
if (ppcState.Exceptions & EXCEPTION_EXTERNAL_INT)
|
if (ppcState.Exceptions & EXCEPTION_EXTERNAL_INT)
|
||||||
|
@ -353,4 +355,78 @@ void OnIdleIL()
|
||||||
CoreTiming::Idle();
|
CoreTiming::Idle();
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
int PPCFPClass(double dvalue)
|
||||||
|
{
|
||||||
|
#ifdef _WIN32
|
||||||
|
switch (_fpclass(dvalue))
|
||||||
|
{
|
||||||
|
case _FPCLASS_SNAN:
|
||||||
|
case _FPCLASS_QNAN: return 0x11;
|
||||||
|
case _FPCLASS_NINF: return 0x9;
|
||||||
|
case _FPCLASS_NN: return 0x8;
|
||||||
|
case _FPCLASS_ND: return 0x18;
|
||||||
|
case _FPCLASS_NZ: return 0x12;
|
||||||
|
case _FPCLASS_PZ: return 0x2;
|
||||||
|
case _FPCLASS_PD: return 0x14;
|
||||||
|
case _FPCLASS_PN: return 0x4;
|
||||||
|
case _FPCLASS_PINF: return 0x5;
|
||||||
|
default: return 0x4;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
// TODO: Make sure the below is equivalent to the above - then switch win32 implementation to it.
|
||||||
|
union {
|
||||||
|
double d;
|
||||||
|
u64 i;
|
||||||
|
} value;
|
||||||
|
value.d = dvalue;
|
||||||
|
// 5 bits (C, <, >, =, ?)
|
||||||
|
// top: class descriptor
|
||||||
|
FPSCR.FPRF = 4;
|
||||||
|
// easy cases first
|
||||||
|
if (value.i == 0) {
|
||||||
|
// positive zero
|
||||||
|
FPSCR.FPRF = 0x2;
|
||||||
|
} else if (value.i == 0x8000000000000000ULL) {
|
||||||
|
// negative zero
|
||||||
|
FPSCR.FPRF = 0x12;
|
||||||
|
} else if (value.i == 0x7FF0000000000000ULL) {
|
||||||
|
// positive inf
|
||||||
|
FPSCR.FPRF = 0x5;
|
||||||
|
} else if (value.i == 0xFFF0000000000000ULL) {
|
||||||
|
// negative inf
|
||||||
|
FPSCR.FPRF = 0x9;
|
||||||
|
} else {
|
||||||
|
// OK let's dissect this thing.
|
||||||
|
int sign = (int)(value.i & 0x8000000000000000ULL) ? 1 : 0;
|
||||||
|
int exp = (int)((value.i >> 52) & 0x7FF);
|
||||||
|
if (exp >= 1 && exp <= 2046) {
|
||||||
|
// Nice normalized number.
|
||||||
|
if (sign) {
|
||||||
|
FPSCR.FPRF = 0x8; // negative
|
||||||
|
} else {
|
||||||
|
FPSCR.FPRF = 0x4; // positive
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
u64 mantissa = value.i & 0x000FFFFFFFFFFFFFULL;
|
||||||
|
if (exp == 0 && mantissa) {
|
||||||
|
// Denormalized number.
|
||||||
|
if (sign) {
|
||||||
|
FPSCR.FPRF = 0x18;
|
||||||
|
} else {
|
||||||
|
FPSCR.FPRF = 0x14;
|
||||||
|
}
|
||||||
|
} else if (exp == 0x7FF && mantissa /* && mantissa_top*/) {
|
||||||
|
FPSCR.FPRF = 0x11; // Quiet NAN
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
void UpdateFPRF(double dvalue)
|
||||||
|
{
|
||||||
|
FPSCR.FPRF = PowerPC::PPCFPClass(dvalue);
|
||||||
|
}
|
|
@ -127,9 +127,8 @@ void OnIdleIL();
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
// Wrappers to make it easier to in the future completely replace the storage of CR and Carry bits
|
// Fast CR system - store them in single bytes instead of nibbles to not have to
|
||||||
// to something more x86-friendly. These are not used 100% consistently yet - and if we do this, we
|
// mask/shift them out.
|
||||||
// need the corresponding stuff on the JIT side too.
|
|
||||||
|
|
||||||
// These are intended to stay fast, probably become faster, and are not likely to slow down much if at all.
|
// These are intended to stay fast, probably become faster, and are not likely to slow down much if at all.
|
||||||
inline void SetCRField(int cr_field, int value) {
|
inline void SetCRField(int cr_field, int value) {
|
||||||
|
@ -187,4 +186,6 @@ inline void SetXER_SO(int value) {
|
||||||
((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).SO = value;
|
((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).SO = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void UpdateFPRF(double dvalue);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
#include "Debugger.h"
|
#include "Debugger.h"
|
||||||
#include "RegisterView.h"
|
#include "RegisterView.h"
|
||||||
#include "PowerPC/PowerPC.h"
|
#include "PowerPC/PowerPC.h"
|
||||||
|
#include "HW/PeripheralInterface.h"
|
||||||
|
|
||||||
// F-zero 80005e60 wtf??
|
// F-zero 80005e60 wtf??
|
||||||
|
|
||||||
|
@ -25,7 +26,7 @@ extern const char* GetGPRName(unsigned int index);
|
||||||
extern const char* GetFPRName(unsigned int index);
|
extern const char* GetFPRName(unsigned int index);
|
||||||
|
|
||||||
static const char *special_reg_names[] = {
|
static const char *special_reg_names[] = {
|
||||||
"PC", "LR", "CTR", "CR", "FPSCR", "SRR0", "SRR1",
|
"PC", "LR", "CTR", "CR", "FPSCR", "SRR0", "SRR1", "Exceptions", "Int Mask", "Int Cause",
|
||||||
};
|
};
|
||||||
|
|
||||||
static u32 GetSpecialRegValue(int reg) {
|
static u32 GetSpecialRegValue(int reg) {
|
||||||
|
@ -37,7 +38,10 @@ static u32 GetSpecialRegValue(int reg) {
|
||||||
case 4: return PowerPC::ppcState.fpscr;
|
case 4: return PowerPC::ppcState.fpscr;
|
||||||
case 5: return PowerPC::ppcState.spr[SPR_SRR0];
|
case 5: return PowerPC::ppcState.spr[SPR_SRR0];
|
||||||
case 6: return PowerPC::ppcState.spr[SPR_SRR1];
|
case 6: return PowerPC::ppcState.spr[SPR_SRR1];
|
||||||
default: return 0;
|
case 7: return PowerPC::ppcState.Exceptions;
|
||||||
|
case 8: return CPeripheralInterface::GetMask();
|
||||||
|
case 9: return CPeripheralInterface::GetCause();
|
||||||
|
default: return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -29,14 +29,16 @@
|
||||||
// PC (specials)
|
// PC (specials)
|
||||||
// LR
|
// LR
|
||||||
// CTR
|
// CTR
|
||||||
// CR0
|
// CR0-7
|
||||||
|
// FPSCR
|
||||||
// SRR0
|
// SRR0
|
||||||
// SRR1
|
// SRR1
|
||||||
|
// Exceptions
|
||||||
|
|
||||||
class CRegTable : public wxGridTableBase
|
class CRegTable : public wxGridTableBase
|
||||||
{
|
{
|
||||||
enum {
|
enum {
|
||||||
NUM_SPECIALS = 7,
|
NUM_SPECIALS = 10,
|
||||||
};
|
};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
|
@ -131,6 +131,7 @@ bool BootCore(const std::string& _rFilename)
|
||||||
ini->Get("Core", "UseDualCore", &StartUp.bUseDualCore, StartUp.bUseDualCore);
|
ini->Get("Core", "UseDualCore", &StartUp.bUseDualCore, StartUp.bUseDualCore);
|
||||||
ini->Get("Core", "SkipIdle", &StartUp.bSkipIdle, StartUp.bSkipIdle);
|
ini->Get("Core", "SkipIdle", &StartUp.bSkipIdle, StartUp.bSkipIdle);
|
||||||
ini->Get("Core", "OptimizeQuantizers", &StartUp.bOptimizeQuantizers, StartUp.bOptimizeQuantizers);
|
ini->Get("Core", "OptimizeQuantizers", &StartUp.bOptimizeQuantizers, StartUp.bOptimizeQuantizers);
|
||||||
|
ini->Get("Core", "EnableFPRF", &StartUp.bEnableFPRF, StartUp.bEnableFPRF);
|
||||||
ini->Get("Core", "TLBHack", &StartUp.iTLBHack, StartUp.iTLBHack);
|
ini->Get("Core", "TLBHack", &StartUp.iTLBHack, StartUp.iTLBHack);
|
||||||
|
|
||||||
// ------------------------------------------------
|
// ------------------------------------------------
|
||||||
|
|
Loading…
Reference in New Issue