Merge branch 'misc-speedups'
* misc-speedups: fixed and reenabled and slightly optimized the JIT version of fcmpo/fcmpu. slightly more precise speed percent display (this is really minor) a small thread synchronization speedup for dual core mode. it's most noticeable in games where the CPU is running behind compared to the GPU. Conflicts: Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp The Fifo.cpp changes from rdaefb3b550e2 was not merged as there was no performance benefit.
This commit is contained in:
commit
a54e72640f
|
@ -281,7 +281,7 @@ namespace this_thread
|
||||||
inline void yield()
|
inline void yield()
|
||||||
{
|
{
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
Sleep(0);
|
SwitchToThread();
|
||||||
#else
|
#else
|
||||||
sleep(0);
|
sleep(0);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -585,7 +585,7 @@ void VideoThrottle()
|
||||||
|
|
||||||
u32 FPS = Common::AtomicLoad(DrawnFrame) * 1000 / ElapseTime;
|
u32 FPS = Common::AtomicLoad(DrawnFrame) * 1000 / ElapseTime;
|
||||||
u32 VPS = DrawnVideo * 1000 / ElapseTime;
|
u32 VPS = DrawnVideo * 1000 / ElapseTime;
|
||||||
u32 Speed = VPS * 100 / VideoInterface::TargetRefreshRate;
|
u32 Speed = DrawnVideo * (100 * 1000) / (VideoInterface::TargetRefreshRate * ElapseTime);
|
||||||
|
|
||||||
// Settings are shown the same for both extended and summary info
|
// Settings are shown the same for both extended and summary info
|
||||||
std::string SSettings = StringFromFormat("%s %s", cpu_core_base->GetName(), _CoreParameter.bCPUThread ? "DC" : "SC");
|
std::string SSettings = StringFromFormat("%s %s", cpu_core_base->GetName(), _CoreParameter.bCPUThread ? "DC" : "SC");
|
||||||
|
|
|
@ -191,8 +191,7 @@ void Jit64::Init()
|
||||||
else
|
else
|
||||||
jo.enableBlocklink = !Core::g_CoreStartupParameter.bMMU;
|
jo.enableBlocklink = !Core::g_CoreStartupParameter.bMMU;
|
||||||
}
|
}
|
||||||
|
jo.fpAccurateFcmp = Core::g_CoreStartupParameter.bEnableFPRF;
|
||||||
jo.fpAccurateFcmp = true; // Fallback to Interpreter
|
|
||||||
jo.optimizeGatherPipe = true;
|
jo.optimizeGatherPipe = true;
|
||||||
jo.fastInterrupts = false;
|
jo.fastInterrupts = false;
|
||||||
jo.accurateSinglePrecision = true;
|
jo.accurateSinglePrecision = true;
|
||||||
|
|
|
@ -229,8 +229,6 @@ void Jit64::fmrx(UGeckoInstruction inst)
|
||||||
|
|
||||||
void Jit64::fcmpx(UGeckoInstruction inst)
|
void Jit64::fcmpx(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
// TODO : This still causes crashes in Nights, and broken graphics
|
|
||||||
// in Paper Mario, Super Paper Mario as well as SoulCalibur 2 prolly others too.. :(
|
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(FloatingPoint)
|
JITDISABLE(FloatingPoint)
|
||||||
if (jo.fpAccurateFcmp) {
|
if (jo.fpAccurateFcmp) {
|
||||||
|
@ -243,36 +241,59 @@ void Jit64::fcmpx(UGeckoInstruction inst)
|
||||||
int crf = inst.CRFD;
|
int crf = inst.CRFD;
|
||||||
|
|
||||||
fpr.Lock(a,b);
|
fpr.Lock(a,b);
|
||||||
if (a != b) fpr.BindToRegister(a, true);
|
fpr.BindToRegister(b, true);
|
||||||
|
|
||||||
// Are we masking sNaN invalid floating point exceptions? If not this could crash if we don't handle the exception?
|
// Are we masking sNaN invalid floating point exceptions? If not this could crash if we don't handle the exception?
|
||||||
UCOMISD(fpr.R(a).GetSimpleReg(), fpr.R(b));
|
UCOMISD(fpr.R(b).GetSimpleReg(), fpr.R(a));
|
||||||
|
|
||||||
FixupBranch pNaN = J_CC(CC_P);
|
FixupBranch pNaN, pLesser, pGreater;
|
||||||
FixupBranch pLesser = J_CC(CC_B);
|
FixupBranch continue1, continue2, continue3;
|
||||||
FixupBranch pGreater = J_CC(CC_A);
|
|
||||||
|
if (a != b)
|
||||||
|
{
|
||||||
|
// if B > A, goto Lesser's jump target
|
||||||
|
pLesser = J_CC(CC_A);
|
||||||
|
}
|
||||||
|
|
||||||
|
// if (B != B) or (A != A), goto NaN's jump target
|
||||||
|
pNaN = J_CC(CC_P);
|
||||||
|
|
||||||
|
if (a != b)
|
||||||
|
{
|
||||||
|
// if B < A, goto Greater's jump target
|
||||||
|
// JB can't precede the NaN check because it doesn't test ZF
|
||||||
|
pGreater = J_CC(CC_B);
|
||||||
|
}
|
||||||
|
|
||||||
// Equal
|
// Equal
|
||||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2));
|
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2));
|
||||||
FixupBranch continue1 = J();
|
continue1 = J();
|
||||||
|
|
||||||
// Greater Than
|
|
||||||
SetJumpTarget(pGreater);
|
|
||||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4));
|
|
||||||
FixupBranch continue2 = J();
|
|
||||||
|
|
||||||
// Less Than
|
|
||||||
SetJumpTarget(pLesser);
|
|
||||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8));
|
|
||||||
FixupBranch continue3 = J();
|
|
||||||
|
|
||||||
// NAN
|
// NAN
|
||||||
SetJumpTarget(pNaN);
|
SetJumpTarget(pNaN);
|
||||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x1));
|
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x1));
|
||||||
|
|
||||||
|
if (a != b)
|
||||||
|
{
|
||||||
|
continue2 = J();
|
||||||
|
|
||||||
|
// Greater Than
|
||||||
|
SetJumpTarget(pGreater);
|
||||||
|
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4));
|
||||||
|
continue3 = J();
|
||||||
|
|
||||||
|
// Less Than
|
||||||
|
SetJumpTarget(pLesser);
|
||||||
|
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8));
|
||||||
|
}
|
||||||
|
|
||||||
SetJumpTarget(continue1);
|
SetJumpTarget(continue1);
|
||||||
SetJumpTarget(continue2);
|
if (a != b)
|
||||||
SetJumpTarget(continue3);
|
{
|
||||||
|
SetJumpTarget(continue2);
|
||||||
|
SetJumpTarget(continue3);
|
||||||
|
}
|
||||||
|
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue