Merge pull request #5144 from MerryMage/mfspr

Jit_SystemRegisters: Make mfspr PIE-compliant
This commit is contained in:
Markus Wick 2017-04-10 22:30:56 +02:00 committed by GitHub
commit 34ac749b8b
5 changed files with 52 additions and 49 deletions

View File

@ -65,8 +65,6 @@ static std::mutex s_ts_write_lock;
static Common::FifoQueue<Event, false> s_ts_queue;
static float s_last_OC_factor;
float g_last_OC_factor_inverted;
int g_slice_length;
static constexpr int MAX_SLICE_LENGTH = 20000;
static s64 s_idled_cycles;
@ -76,9 +74,7 @@ static u64 s_fake_dec_start_ticks;
// Are we in a function that has been called from Advance()
static bool s_is_global_timer_sane;
s64 g_global_timer;
u64 g_fake_TB_start_value;
u64 g_fake_TB_start_ticks;
Globals g;
static EventType* s_ev_lost = nullptr;
@ -95,7 +91,7 @@ static void EmptyTimedCallback(u64 userdata, s64 cyclesLate)
// but the effect is largely the same.
static int DowncountToCycles(int downcount)
{
return static_cast<int>(downcount * g_last_OC_factor_inverted);
return static_cast<int>(downcount * g.last_OC_factor_inverted);
}
static int CyclesToDowncount(int cycles)
@ -127,10 +123,10 @@ void UnregisterAllEvents()
void Init()
{
s_last_OC_factor = SConfig::GetInstance().m_OCEnable ? SConfig::GetInstance().m_OCFactor : 1.0f;
g_last_OC_factor_inverted = 1.0f / s_last_OC_factor;
g.last_OC_factor_inverted = 1.0f / s_last_OC_factor;
PowerPC::ppcState.downcount = CyclesToDowncount(MAX_SLICE_LENGTH);
g_slice_length = MAX_SLICE_LENGTH;
g_global_timer = 0;
g.slice_length = MAX_SLICE_LENGTH;
g.global_timer = 0;
s_idled_cycles = 0;
// The time between CoreTiming being intialized and the first call to Advance() is considered
@ -154,15 +150,15 @@ void Shutdown()
void DoState(PointerWrap& p)
{
std::lock_guard<std::mutex> lk(s_ts_write_lock);
p.Do(g_slice_length);
p.Do(g_global_timer);
p.Do(g.slice_length);
p.Do(g.global_timer);
p.Do(s_idled_cycles);
p.Do(s_fake_dec_start_value);
p.Do(s_fake_dec_start_ticks);
p.Do(g_fake_TB_start_value);
p.Do(g_fake_TB_start_ticks);
p.Do(g.fake_TB_start_value);
p.Do(g.fake_TB_start_ticks);
p.Do(s_last_OC_factor);
g_last_OC_factor_inverted = 1.0f / s_last_OC_factor;
g.last_OC_factor_inverted = 1.0f / s_last_OC_factor;
p.Do(s_event_fifo_id);
p.DoMarker("CoreTimingData");
@ -212,11 +208,11 @@ void DoState(PointerWrap& p)
// it from any other thread, you are doing something evil
u64 GetTicks()
{
u64 ticks = static_cast<u64>(g_global_timer);
u64 ticks = static_cast<u64>(g.global_timer);
if (!s_is_global_timer_sane)
{
int downcount = DowncountToCycles(PowerPC::ppcState.downcount);
ticks += g_slice_length - downcount;
ticks += g.slice_length - downcount;
}
return ticks;
}
@ -268,7 +264,7 @@ void ScheduleEvent(s64 cycles_into_future, EventType* event_type, u64 userdata,
}
std::lock_guard<std::mutex> lk(s_ts_write_lock);
s_ts_queue.Push(Event{g_global_timer + cycles_into_future, 0, userdata, event_type});
s_ts_queue.Push(Event{g.global_timer + cycles_into_future, 0, userdata, event_type});
}
}
@ -297,8 +293,8 @@ void ForceExceptionCheck(s64 cycles)
if (DowncountToCycles(PowerPC::ppcState.downcount) > cycles)
{
// downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int here.
// Account for cycles already executed by adjusting the g_slice_length
g_slice_length -= DowncountToCycles(PowerPC::ppcState.downcount) - static_cast<int>(cycles);
// Account for cycles already executed by adjusting the g.slice_length
g.slice_length -= DowncountToCycles(PowerPC::ppcState.downcount) - static_cast<int>(cycles);
PowerPC::ppcState.downcount = CyclesToDowncount(static_cast<int>(cycles));
}
}
@ -317,22 +313,22 @@ void Advance()
{
MoveEvents();
int cyclesExecuted = g_slice_length - DowncountToCycles(PowerPC::ppcState.downcount);
g_global_timer += cyclesExecuted;
int cyclesExecuted = g.slice_length - DowncountToCycles(PowerPC::ppcState.downcount);
g.global_timer += cyclesExecuted;
s_last_OC_factor = SConfig::GetInstance().m_OCEnable ? SConfig::GetInstance().m_OCFactor : 1.0f;
g_last_OC_factor_inverted = 1.0f / s_last_OC_factor;
g_slice_length = MAX_SLICE_LENGTH;
g.last_OC_factor_inverted = 1.0f / s_last_OC_factor;
g.slice_length = MAX_SLICE_LENGTH;
s_is_global_timer_sane = true;
while (!s_event_queue.empty() && s_event_queue.front().time <= g_global_timer)
while (!s_event_queue.empty() && s_event_queue.front().time <= g.global_timer)
{
Event evt = std::move(s_event_queue.front());
std::pop_heap(s_event_queue.begin(), s_event_queue.end(), std::greater<Event>());
s_event_queue.pop_back();
// NOTICE_LOG(POWERPC, "[Scheduler] %-20s (%lld, %lld)", evt.type->name->c_str(),
// g_global_timer, evt.time);
evt.type->callback(evt.userdata, g_global_timer - evt.time);
// g.global_timer, evt.time);
evt.type->callback(evt.userdata, g.global_timer - evt.time);
}
s_is_global_timer_sane = false;
@ -340,11 +336,11 @@ void Advance()
// Still events left (scheduled in the future)
if (!s_event_queue.empty())
{
g_slice_length = static_cast<int>(
std::min<s64>(s_event_queue.front().time - g_global_timer, MAX_SLICE_LENGTH));
g.slice_length = static_cast<int>(
std::min<s64>(s_event_queue.front().time - g.global_timer, MAX_SLICE_LENGTH));
}
PowerPC::ppcState.downcount = CyclesToDowncount(g_slice_length);
PowerPC::ppcState.downcount = CyclesToDowncount(g.slice_length);
// Check for any external exceptions.
// It's important to do this after processing events otherwise any exceptions will be delayed
@ -359,7 +355,7 @@ void LogPendingEvents()
std::sort(clone.begin(), clone.end());
for (const Event& ev : clone)
{
INFO_LOG(POWERPC, "PENDING: Now: %" PRId64 " Pending: %" PRId64 " Type: %s", g_global_timer,
INFO_LOG(POWERPC, "PENDING: Now: %" PRId64 " Pending: %" PRId64 " Type: %s", g.global_timer,
ev.time, ev.type->name->c_str());
}
}
@ -369,8 +365,8 @@ void AdjustEventQueueTimes(u32 new_ppc_clock, u32 old_ppc_clock)
{
for (Event& ev : s_event_queue)
{
const s64 ticks = (ev.time - g_global_timer) * new_ppc_clock / old_ppc_clock;
ev.time = g_global_timer + ticks;
const s64 ticks = (ev.time - g.global_timer) * new_ppc_clock / old_ppc_clock;
ev.time = g.global_timer + ticks;
}
}
@ -425,22 +421,22 @@ void SetFakeDecStartTicks(u64 val)
u64 GetFakeTBStartValue()
{
return g_fake_TB_start_value;
return g.fake_TB_start_value;
}
void SetFakeTBStartValue(u64 val)
{
g_fake_TB_start_value = val;
g.fake_TB_start_value = val;
}
u64 GetFakeTBStartTicks()
{
return g_fake_TB_start_ticks;
return g.fake_TB_start_ticks;
}
void SetFakeTBStartTicks(u64 val)
{
g_fake_TB_start_ticks = val;
g.fake_TB_start_ticks = val;
}
} // namespace

View File

@ -25,11 +25,15 @@ class PointerWrap;
namespace CoreTiming
{
// These really shouldn't be global, but jit64 accesses them directly
extern s64 g_global_timer;
extern u64 g_fake_TB_start_value;
extern u64 g_fake_TB_start_ticks;
extern int g_slice_length;
extern float g_last_OC_factor_inverted;
struct Globals
{
s64 global_timer;
u64 fake_TB_start_value;
u64 fake_TB_start_ticks;
int slice_length;
float last_OC_factor_inverted;
};
extern Globals g;
// CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is
// required to end slice -1 and start slice 0 before the first cycle of code is executed.

View File

@ -203,7 +203,7 @@ void Interpreter::SingleStep()
SingleStepInner();
// The interpreter ignores instruction timing information outside the 'fast runloop'.
CoreTiming::g_slice_length = 1;
CoreTiming::g.slice_length = 1;
PowerPC::ppcState.downcount = 0;
if (PowerPC::ppcState.Exceptions)

View File

@ -282,18 +282,21 @@ void Jit64::mfspr(UGeckoInstruction inst)
// no register choice
gpr.FlushLockX(RDX, RAX);
gpr.FlushLockX(RCX);
MOV(64, R(RCX), ImmPtr(&CoreTiming::g));
// An inline implementation of CoreTiming::GetFakeTimeBase, since in timer-heavy games the
// cost of calling out to C for this is actually significant.
// Scale downcount by the CPU overclocking factor.
CVTSI2SS(XMM0, PPCSTATE(downcount));
MULSS(XMM0, M(&CoreTiming::g_last_OC_factor_inverted));
MULSS(XMM0, MDisp(RCX, offsetof(CoreTiming::Globals, last_OC_factor_inverted)));
CVTSS2SI(RDX, R(XMM0)); // RDX is downcount scaled by the overclocking factor
MOV(32, R(RAX), M(&CoreTiming::g_slice_length));
MOV(32, R(RAX), MDisp(RCX, offsetof(CoreTiming::Globals, slice_length)));
SUB(64, R(RAX), R(RDX)); // cycles since the last CoreTiming::Advance() event is (slicelength -
// Scaled_downcount)
ADD(64, R(RAX), M(&CoreTiming::g_global_timer));
SUB(64, R(RAX), M(&CoreTiming::g_fake_TB_start_ticks));
ADD(64, R(RAX), MDisp(RCX, offsetof(CoreTiming::Globals, global_timer)));
SUB(64, R(RAX), MDisp(RCX, offsetof(CoreTiming::Globals, fake_TB_start_ticks)));
// It might seem convenient to correct the timer for the block position here for even more
// accurate
// timing, but as of currently, this can break games. If we end up reading a time *after* the
@ -309,7 +312,7 @@ void Jit64::mfspr(UGeckoInstruction inst)
// a / 12 = (a * 0xAAAAAAAAAAAAAAAB) >> 67
MOV(64, R(RDX), Imm64(0xAAAAAAAAAAAAAAABULL));
MUL(64, R(RDX));
MOV(64, R(RAX), M(&CoreTiming::g_fake_TB_start_value));
MOV(64, R(RAX), MDisp(RCX, offsetof(CoreTiming::Globals, fake_TB_start_value)));
SHR(64, R(RDX), Imm8(3));
ADD(64, R(RAX), R(RDX));
MOV(64, PPCSTATE(spr[SPR_TL]), R(RAX));

View File

@ -254,9 +254,9 @@ TEST(CoreTiming, ScheduleIntoPast)
// the stale value, i.e. effectively half-way through the previous slice.
// NOTE: We're only testing that the scheduler doesn't break, not whether this makes sense.
Core::UndeclareAsCPUThread();
CoreTiming::g_global_timer -= 1000;
CoreTiming::g.global_timer -= 1000;
CoreTiming::ScheduleEvent(0, cb_b, CB_IDS[1], CoreTiming::FromThread::NON_CPU);
CoreTiming::g_global_timer += 1000;
CoreTiming::g.global_timer += 1000;
Core::DeclareAsCPUThread();
AdvanceAndCheck(1, MAX_SLICE_LENGTH, MAX_SLICE_LENGTH + 1000);