forked from ShuriZma/suyu
1
0
Fork 0

Merge pull request #2965 from FernandoS27/fair-core-timing

Core Timing: Rework Core Timing to run all cores evenly.
This commit is contained in:
bunnei 2019-10-15 11:48:30 -04:00 committed by GitHub
commit cab2619aeb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 146 additions and 133 deletions

View File

@ -116,7 +116,7 @@ public:
num_interpreted_instructions = 0;
}
u64 GetTicksRemaining() override {
return std::max(parent.system.CoreTiming().GetDowncount(), 0);
return std::max(parent.system.CoreTiming().GetDowncount(), s64{0});
}
u64 GetCNTPCT() override {
return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks());

View File

@ -156,7 +156,7 @@ void ARM_Unicorn::Run() {
if (GDBStub::IsServerEnabled()) {
ExecuteInstructions(std::max(4000000, 0));
} else {
ExecuteInstructions(std::max(system.CoreTiming().GetDowncount(), 0));
ExecuteInstructions(std::max(system.CoreTiming().GetDowncount(), s64{0}));
}
}

View File

@ -85,24 +85,16 @@ void Cpu::RunLoop(bool tight_loop) {
// instead advance to the next event and try to yield to the next thread
if (Kernel::GetCurrentThread() == nullptr) {
LOG_TRACE(Core, "Core-{} idling", core_index);
if (IsMainCore()) {
// TODO(Subv): Only let CoreTiming idle if all 4 cores are idling.
core_timing.Idle();
core_timing.Advance();
}
core_timing.Idle();
core_timing.Advance();
PrepareReschedule();
} else {
if (IsMainCore()) {
core_timing.Advance();
}
if (tight_loop) {
arm_interface->Run();
} else {
arm_interface->Step();
}
core_timing.Advance();
}
Reschedule();

View File

@ -15,7 +15,7 @@
namespace Core::Timing {
constexpr int MAX_SLICE_LENGTH = 20000;
constexpr int MAX_SLICE_LENGTH = 10000;
struct CoreTiming::Event {
s64 time;
@ -38,10 +38,12 @@ CoreTiming::CoreTiming() = default;
CoreTiming::~CoreTiming() = default;
void CoreTiming::Initialize() {
downcount = MAX_SLICE_LENGTH;
downcounts.fill(MAX_SLICE_LENGTH);
time_slice.fill(MAX_SLICE_LENGTH);
slice_length = MAX_SLICE_LENGTH;
global_timer = 0;
idled_cycles = 0;
current_context = 0;
// The time between CoreTiming being initialized and the first call to Advance() is considered
// the slice boundary between slice -1 and slice 0. Dispatcher loops must call Advance() before
@ -110,7 +112,7 @@ void CoreTiming::UnscheduleEvent(const EventType* event_type, u64 userdata) {
u64 CoreTiming::GetTicks() const {
u64 ticks = static_cast<u64>(global_timer);
if (!is_global_timer_sane) {
ticks += slice_length - downcount;
ticks += accumulated_ticks;
}
return ticks;
}
@ -120,7 +122,8 @@ u64 CoreTiming::GetIdleTicks() const {
}
void CoreTiming::AddTicks(u64 ticks) {
downcount -= static_cast<int>(ticks);
accumulated_ticks += ticks;
downcounts[current_context] -= static_cast<s64>(ticks);
}
void CoreTiming::ClearPendingEvents() {
@ -141,22 +144,35 @@ void CoreTiming::RemoveEvent(const EventType* event_type) {
void CoreTiming::ForceExceptionCheck(s64 cycles) {
cycles = std::max<s64>(0, cycles);
if (downcount <= cycles) {
if (downcounts[current_context] <= cycles) {
return;
}
// downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int
// here. Account for cycles already executed by adjusting the g.slice_length
slice_length -= downcount - static_cast<int>(cycles);
downcount = static_cast<int>(cycles);
downcounts[current_context] = static_cast<int>(cycles);
}
std::optional<u64> CoreTiming::NextAvailableCore(const s64 needed_ticks) const {
const u64 original_context = current_context;
u64 next_context = (original_context + 1) % num_cpu_cores;
while (next_context != original_context) {
if (time_slice[next_context] >= needed_ticks) {
return {next_context};
} else if (time_slice[next_context] >= 0) {
return std::nullopt;
}
next_context = (next_context + 1) % num_cpu_cores;
}
return std::nullopt;
}
void CoreTiming::Advance() {
std::unique_lock<std::mutex> guard(inner_mutex);
const int cycles_executed = slice_length - downcount;
const u64 cycles_executed = accumulated_ticks;
time_slice[current_context] = std::max<s64>(0, time_slice[current_context] - accumulated_ticks);
global_timer += cycles_executed;
slice_length = MAX_SLICE_LENGTH;
is_global_timer_sane = true;
@ -173,24 +189,46 @@ void CoreTiming::Advance() {
// Still events left (scheduled in the future)
if (!event_queue.empty()) {
slice_length = static_cast<int>(
std::min<s64>(event_queue.front().time - global_timer, MAX_SLICE_LENGTH));
const s64 needed_ticks =
std::min<s64>(event_queue.front().time - global_timer, MAX_SLICE_LENGTH);
const auto next_core = NextAvailableCore(needed_ticks);
if (next_core) {
downcounts[*next_core] = needed_ticks;
}
}
downcount = slice_length;
accumulated_ticks = 0;
downcounts[current_context] = time_slice[current_context];
}
void CoreTiming::ResetRun() {
downcounts.fill(MAX_SLICE_LENGTH);
time_slice.fill(MAX_SLICE_LENGTH);
current_context = 0;
// Still events left (scheduled in the future)
if (!event_queue.empty()) {
const s64 needed_ticks =
std::min<s64>(event_queue.front().time - global_timer, MAX_SLICE_LENGTH);
downcounts[current_context] = needed_ticks;
}
is_global_timer_sane = false;
accumulated_ticks = 0;
}
void CoreTiming::Idle() {
idled_cycles += downcount;
downcount = 0;
accumulated_ticks += downcounts[current_context];
idled_cycles += downcounts[current_context];
downcounts[current_context] = 0;
}
std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
return std::chrono::microseconds{GetTicks() * 1000000 / BASE_CLOCK_RATE};
}
int CoreTiming::GetDowncount() const {
return downcount;
s64 CoreTiming::GetDowncount() const {
return downcounts[current_context];
}
} // namespace Core::Timing

View File

@ -7,6 +7,7 @@
#include <chrono>
#include <functional>
#include <mutex>
#include <optional>
#include <string>
#include <unordered_map>
#include <vector>
@ -104,7 +105,19 @@ public:
std::chrono::microseconds GetGlobalTimeUs() const;
int GetDowncount() const;
void ResetRun();
s64 GetDowncount() const;
void SwitchContext(u64 new_context) {
current_context = new_context;
}
bool CanCurrentContextRun() const {
return time_slice[current_context] > 0;
}
std::optional<u64> NextAvailableCore(const s64 needed_ticks) const;
private:
struct Event;
@ -112,10 +125,16 @@ private:
/// Clear all pending events. This should ONLY be done on exit.
void ClearPendingEvents();
static constexpr u64 num_cpu_cores = 4;
s64 global_timer = 0;
s64 idled_cycles = 0;
int slice_length = 0;
int downcount = 0;
s64 slice_length = 0;
u64 accumulated_ticks = 0;
std::array<s64, num_cpu_cores> downcounts{};
// Slice of time assigned to each core per run.
std::array<s64, num_cpu_cores> time_slice{};
u64 current_context = 0;
// Are we in a function that has been called from Advance()
// If events are scheduled from a function that gets called from Advance(),

View File

@ -6,6 +6,7 @@
#include "core/arm/exclusive_monitor.h"
#include "core/core.h"
#include "core/core_cpu.h"
#include "core/core_timing.h"
#include "core/cpu_core_manager.h"
#include "core/gdbstub/gdbstub.h"
#include "core/settings.h"
@ -122,13 +123,19 @@ void CpuCoreManager::RunLoop(bool tight_loop) {
}
}
for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) {
cores[active_core]->RunLoop(tight_loop);
if (Settings::values.use_multi_core) {
// Cores 1-3 are run on other threads in this mode
break;
auto& core_timing = system.CoreTiming();
core_timing.ResetRun();
bool keep_running{};
do {
keep_running = false;
for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) {
core_timing.SwitchContext(active_core);
if (core_timing.CanCurrentContextRun()) {
cores[active_core]->RunLoop(tight_loop);
}
keep_running |= core_timing.CanCurrentContextRun();
}
}
} while (keep_running);
if (GDBStub::IsServerEnabled()) {
GDBStub::SetCpuStepFlag(false);

View File

@ -6,6 +6,7 @@
#include <array>
#include <bitset>
#include <cstdlib>
#include <string>
#include "common/file_util.h"
#include "core/core.h"
@ -13,7 +14,7 @@
// Numbers are chosen randomly to make sure the correct one is given.
static constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}};
static constexpr int MAX_SLICE_LENGTH = 20000; // Copied from CoreTiming internals
static constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals
static std::bitset<CB_IDS.size()> callbacks_ran_flags;
static u64 expected_callback = 0;
@ -28,6 +29,12 @@ void CallbackTemplate(u64 userdata, s64 cycles_late) {
REQUIRE(lateness == cycles_late);
}
static u64 callbacks_done = 0;
void EmptyCallback(u64 userdata, s64 cycles_late) {
++callbacks_done;
}
struct ScopeInit final {
ScopeInit() {
core_timing.Initialize();
@ -39,18 +46,19 @@ struct ScopeInit final {
Core::Timing::CoreTiming core_timing;
};
static void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, int downcount,
static void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, u32 context = 0,
int expected_lateness = 0, int cpu_downcount = 0) {
callbacks_ran_flags = 0;
expected_callback = CB_IDS[idx];
lateness = expected_lateness;
// Pretend we executed X cycles of instructions.
core_timing.SwitchContext(context);
core_timing.AddTicks(core_timing.GetDowncount() - cpu_downcount);
core_timing.Advance();
core_timing.SwitchContext((context + 1) % 4);
REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags);
REQUIRE(downcount == core_timing.GetDowncount());
}
TEST_CASE("CoreTiming[BasicOrder]", "[core]") {
@ -64,9 +72,10 @@ TEST_CASE("CoreTiming[BasicOrder]", "[core]") {
Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", CallbackTemplate<4>);
// Enter slice 0
core_timing.Advance();
core_timing.ResetRun();
// D -> B -> C -> A -> E
core_timing.SwitchContext(0);
core_timing.ScheduleEvent(1000, cb_a, CB_IDS[0]);
REQUIRE(1000 == core_timing.GetDowncount());
core_timing.ScheduleEvent(500, cb_b, CB_IDS[1]);
@ -78,98 +87,46 @@ TEST_CASE("CoreTiming[BasicOrder]", "[core]") {
core_timing.ScheduleEvent(1200, cb_e, CB_IDS[4]);
REQUIRE(100 == core_timing.GetDowncount());
AdvanceAndCheck(core_timing, 3, 400);
AdvanceAndCheck(core_timing, 1, 300);
AdvanceAndCheck(core_timing, 2, 200);
AdvanceAndCheck(core_timing, 0, 200);
AdvanceAndCheck(core_timing, 4, MAX_SLICE_LENGTH);
AdvanceAndCheck(core_timing, 3, 0);
AdvanceAndCheck(core_timing, 1, 1);
AdvanceAndCheck(core_timing, 2, 2);
AdvanceAndCheck(core_timing, 0, 3);
AdvanceAndCheck(core_timing, 4, 0);
}
TEST_CASE("CoreTiming[Threadsave]", "[core]") {
ScopeInit guard;
auto& core_timing = guard.core_timing;
Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>);
Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", CallbackTemplate<3>);
Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", CallbackTemplate<4>);
// Enter slice 0
core_timing.Advance();
// D -> B -> C -> A -> E
core_timing.ScheduleEvent(1000, cb_a, CB_IDS[0]);
// Manually force since ScheduleEvent doesn't call it
core_timing.ForceExceptionCheck(1000);
REQUIRE(1000 == core_timing.GetDowncount());
core_timing.ScheduleEvent(500, cb_b, CB_IDS[1]);
// Manually force since ScheduleEvent doesn't call it
core_timing.ForceExceptionCheck(500);
REQUIRE(500 == core_timing.GetDowncount());
core_timing.ScheduleEvent(800, cb_c, CB_IDS[2]);
// Manually force since ScheduleEvent doesn't call it
core_timing.ForceExceptionCheck(800);
REQUIRE(500 == core_timing.GetDowncount());
core_timing.ScheduleEvent(100, cb_d, CB_IDS[3]);
// Manually force since ScheduleEvent doesn't call it
core_timing.ForceExceptionCheck(100);
REQUIRE(100 == core_timing.GetDowncount());
core_timing.ScheduleEvent(1200, cb_e, CB_IDS[4]);
// Manually force since ScheduleEvent doesn't call it
core_timing.ForceExceptionCheck(1200);
REQUIRE(100 == core_timing.GetDowncount());
AdvanceAndCheck(core_timing, 3, 400);
AdvanceAndCheck(core_timing, 1, 300);
AdvanceAndCheck(core_timing, 2, 200);
AdvanceAndCheck(core_timing, 0, 200);
AdvanceAndCheck(core_timing, 4, MAX_SLICE_LENGTH);
}
namespace SharedSlotTest {
static unsigned int counter = 0;
template <unsigned int ID>
void FifoCallback(u64 userdata, s64 cycles_late) {
static_assert(ID < CB_IDS.size(), "ID out of range");
callbacks_ran_flags.set(ID);
REQUIRE(CB_IDS[ID] == userdata);
REQUIRE(ID == counter);
REQUIRE(lateness == cycles_late);
++counter;
}
} // namespace SharedSlotTest
TEST_CASE("CoreTiming[SharedSlot]", "[core]") {
using namespace SharedSlotTest;
TEST_CASE("CoreTiming[FairSharing]", "[core]") {
ScopeInit guard;
auto& core_timing = guard.core_timing;
Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", FifoCallback<0>);
Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", FifoCallback<1>);
Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", FifoCallback<2>);
Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", FifoCallback<3>);
Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", FifoCallback<4>);
Core::Timing::EventType* empty_callback =
core_timing.RegisterEvent("empty_callback", EmptyCallback);
core_timing.ScheduleEvent(1000, cb_a, CB_IDS[0]);
core_timing.ScheduleEvent(1000, cb_b, CB_IDS[1]);
core_timing.ScheduleEvent(1000, cb_c, CB_IDS[2]);
core_timing.ScheduleEvent(1000, cb_d, CB_IDS[3]);
core_timing.ScheduleEvent(1000, cb_e, CB_IDS[4]);
callbacks_done = 0;
u64 MAX_CALLBACKS = 10;
for (std::size_t i = 0; i < 10; i++) {
core_timing.ScheduleEvent(i * 3333U, empty_callback, 0);
}
// Enter slice 0
core_timing.Advance();
REQUIRE(1000 == core_timing.GetDowncount());
const s64 advances = MAX_SLICE_LENGTH / 10;
core_timing.ResetRun();
u64 current_time = core_timing.GetTicks();
bool keep_running{};
do {
keep_running = false;
for (u32 active_core = 0; active_core < 4; ++active_core) {
core_timing.SwitchContext(active_core);
if (core_timing.CanCurrentContextRun()) {
core_timing.AddTicks(std::min<s64>(advances, core_timing.GetDowncount()));
core_timing.Advance();
}
keep_running |= core_timing.CanCurrentContextRun();
}
} while (keep_running);
u64 current_time_2 = core_timing.GetTicks();
callbacks_ran_flags = 0;
counter = 0;
lateness = 0;
core_timing.AddTicks(core_timing.GetDowncount());
core_timing.Advance();
REQUIRE(MAX_SLICE_LENGTH == core_timing.GetDowncount());
REQUIRE(0x1FULL == callbacks_ran_flags.to_ullong());
REQUIRE(MAX_CALLBACKS == callbacks_done);
REQUIRE(current_time_2 == current_time + MAX_SLICE_LENGTH * 4);
}
TEST_CASE("Core::Timing[PredictableLateness]", "[core]") {
@ -180,13 +137,13 @@ TEST_CASE("Core::Timing[PredictableLateness]", "[core]") {
Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
// Enter slice 0
core_timing.Advance();
core_timing.ResetRun();
core_timing.ScheduleEvent(100, cb_a, CB_IDS[0]);
core_timing.ScheduleEvent(200, cb_b, CB_IDS[1]);
AdvanceAndCheck(core_timing, 0, 90, 10, -10); // (100 - 10)
AdvanceAndCheck(core_timing, 1, MAX_SLICE_LENGTH, 50, -50);
AdvanceAndCheck(core_timing, 0, 0, 10, -10); // (100 - 10)
AdvanceAndCheck(core_timing, 1, 1, 50, -50);
}
namespace ChainSchedulingTest {
@ -220,7 +177,7 @@ TEST_CASE("CoreTiming[ChainScheduling]", "[core]") {
});
// Enter slice 0
core_timing.Advance();
core_timing.ResetRun();
core_timing.ScheduleEvent(800, cb_a, CB_IDS[0]);
core_timing.ScheduleEvent(1000, cb_b, CB_IDS[1]);
@ -229,19 +186,19 @@ TEST_CASE("CoreTiming[ChainScheduling]", "[core]") {
REQUIRE(800 == core_timing.GetDowncount());
reschedules = 3;
AdvanceAndCheck(core_timing, 0, 200); // cb_a
AdvanceAndCheck(core_timing, 1, 1000); // cb_b, cb_rs
AdvanceAndCheck(core_timing, 0, 0); // cb_a
AdvanceAndCheck(core_timing, 1, 1); // cb_b, cb_rs
REQUIRE(2 == reschedules);
core_timing.AddTicks(core_timing.GetDowncount());
core_timing.Advance(); // cb_rs
core_timing.SwitchContext(3);
REQUIRE(1 == reschedules);
REQUIRE(200 == core_timing.GetDowncount());
AdvanceAndCheck(core_timing, 2, 800); // cb_c
AdvanceAndCheck(core_timing, 2, 3); // cb_c
core_timing.AddTicks(core_timing.GetDowncount());
core_timing.Advance(); // cb_rs
REQUIRE(0 == reschedules);
REQUIRE(MAX_SLICE_LENGTH == core_timing.GetDowncount());
}