initial multithreaded rendering

This commit is contained in:
Anthony Pesch 2016-01-16 19:42:32 -08:00
parent 19a188d9c2
commit e1db917a4e
9 changed files with 241 additions and 167 deletions

View File

@ -1,7 +1,9 @@
#include <thread>
#include "core/core.h"
#include "emu/emulator.h"
#include "emu/profiler.h"
#include "hw/gdrom/gdrom.h"
#include "hw/holly/texture_cache.h"
#include "hw/holly/tile_renderer.h"
#include "hw/maple/maple.h"
#include "hw/dreamcast.h"
@ -18,17 +20,14 @@ using namespace dvm::renderer;
using namespace dvm::sys;
using namespace dvm::trace;
// scheduler is ticked at 1000hz, this is fairly arbitrary, but seems to be a
// good balance of executing cycles / handling interrupts
static const std::chrono::nanoseconds SCHEDULER_STEP = HZ_TO_NANO(1000);
// process input and render frames at 60hz
static const std::chrono::nanoseconds FRAME_STEP = HZ_TO_NANO(60);
DEFINE_string(bios, "dc_bios.bin", "Path to BIOS");
DEFINE_string(flash, "dc_flash.bin", "Path to flash ROM");
Emulator::Emulator() : trace_writer_(nullptr), deltas_(), delta_seq_(0) {
Emulator::Emulator()
: tile_renderer_(*dc_.texcache()),
trace_writer_(nullptr),
core_events_(MAX_EVENTS),
speed_() {
rb_ = new GLBackend(window_);
dc_.set_rb(rb_);
}
@ -69,40 +68,17 @@ void Emulator::Run(const char *path) {
}
}
auto current_time = std::chrono::high_resolution_clock::now();
auto last_time = current_time;
auto delta_time = std::chrono::nanoseconds(0);
auto scheduler_remaining = std::chrono::nanoseconds(0);
auto frame_remaining = std::chrono::nanoseconds(0);
// start running
running_ = true;
while (running_) {
current_time = std::chrono::high_resolution_clock::now();
delta_time = current_time - last_time;
last_time = current_time;
// run core emulator in a separate thread
std::thread cpu_thread(&Emulator::CoreThread, this);
scheduler_remaining += delta_time;
if (scheduler_remaining >= SCHEDULER_STEP) {
scheduler_remaining -= SCHEDULER_STEP;
// run graphics in the current thread
GraphicsThread();
auto start = current_time;
dc_.scheduler()->Tick(SCHEDULER_STEP);
auto end = std::chrono::high_resolution_clock::now();
// save off delta for speed stats
deltas_[delta_seq_++ % MAX_SCHEDULER_DELTAS] = end - start;
}
frame_remaining += delta_time;
if (frame_remaining >= FRAME_STEP) {
frame_remaining -= FRAME_STEP;
PumpEvents();
RenderFrame();
}
}
// wait until cpu thread finishes
cpu_thread.join();
}
bool Emulator::LoadBios(const char *path) {
@ -203,44 +179,6 @@ bool Emulator::LaunchGDI(const char *path) {
return true;
}
void Emulator::PumpEvents() {
WindowEvent ev;
window_.PumpEvents();
while (window_.PollEvent(&ev)) {
switch (ev.type) {
case WE_KEY: {
// let the profiler take a stab at the input first
if (!Profiler::instance().HandleInput(ev.key.code, ev.key.value)) {
// debug tracing
if (ev.key.code == K_F2) {
if (ev.key.value) {
ToggleTracing();
}
}
// else, forward to maple
else {
dc_.maple()->HandleInput(0, ev.key.code, ev.key.value);
}
}
} break;
case WE_MOUSEMOVE: {
Profiler::instance().HandleMouseMove(ev.mousemove.x, ev.mousemove.y);
} break;
case WE_RESIZE: {
rb_->ResizeVideo(ev.resize.width, ev.resize.height);
} break;
case WE_QUIT: {
running_ = false;
} break;
}
}
}
void Emulator::ToggleTracing() {
if (!trace_writer_) {
char filename[PATH_MAX];
@ -268,27 +206,54 @@ void Emulator::ToggleTracing() {
dc_.set_trace_writer(trace_writer_);
}
void Emulator::RenderFrame() {
void Emulator::GraphicsThread() {
while (running_.load(std::memory_order_relaxed)) {
PumpGraphicsEvents();
RenderGraphics();
}
}
void Emulator::PumpGraphicsEvents() {
WindowEvent ev;
window_.PumpEvents();
while (window_.PollEvent(&ev)) {
switch (ev.type) {
case WE_KEY: {
// let the profiler take a stab at the input first
if (!Profiler::instance().HandleInput(ev.key.code, ev.key.value)) {
// else, forward to the CPU thread
QueueCoreEvent(ev);
}
} break;
case WE_MOUSEMOVE: {
Profiler::instance().HandleMouseMove(ev.mousemove.x, ev.mousemove.y);
} break;
case WE_RESIZE: {
rb_->ResizeVideo(ev.resize.width, ev.resize.height);
} break;
case WE_QUIT: {
running_.store(false, std::memory_order_relaxed);
} break;
}
}
}
void Emulator::RenderGraphics() {
rb_->BeginFrame();
// render the last tile context
TileContext *last_context = dc_.ta()->GetLastContext();
if (last_context) {
dc_.tile_renderer()->RenderContext(last_context, rb_);
// render the latest tile context
if (TileContext *tactx = dc_.ta()->GetLastContext()) {
tile_renderer_.RenderContext(tactx, rb_);
}
// calculate scheduler speed
auto total_delta = std::chrono::nanoseconds(0);
for (unsigned i = 0; i < MAX_SCHEDULER_DELTAS; i++) {
total_delta += deltas_[(delta_seq_ + i) % MAX_SCHEDULER_DELTAS];
}
float speed = ((SCHEDULER_STEP.count() * MAX_SCHEDULER_DELTAS) /
(float)total_delta.count()) *
100.0f;
// render stats
char stats[512];
float speed = *reinterpret_cast<float *>(&speed_);
snprintf(stats, sizeof(stats), "%.2f%%, %.2f rps", speed, dc_.pvr()->rps());
rb_->RenderText2D(0, 0, 12.0f, 0xffffffff, stats);
@ -297,3 +262,94 @@ void Emulator::RenderFrame() {
rb_->EndFrame();
}
void Emulator::CoreThread() {
static const std::chrono::nanoseconds STEP = HZ_TO_NANO(1000);
static const std::chrono::nanoseconds SAMPLE_PERIOD = HZ_TO_NANO(10);
auto current_time = std::chrono::high_resolution_clock::now();
auto delta_time = std::chrono::nanoseconds(0);
auto last_time = current_time;
auto remaining_time = std::chrono::nanoseconds(0);
auto next_sample_time = current_time + SAMPLE_PERIOD;
auto run_time = std::chrono::nanoseconds(0);
bool ran = false;
while (running_.load(std::memory_order_relaxed)) {
current_time = std::chrono::high_resolution_clock::now();
delta_time = current_time - last_time;
last_time = current_time;
remaining_time += delta_time;
// handle events the graphics thread forwarded on
PumpCoreEvents();
// track run time in this backwards way with a condition variable to avoid
// polling the clock too much
if (ran) {
run_time += delta_time;
ran = false;
}
// run scheduler every STEP nanoseconds
while (remaining_time >= STEP) {
remaining_time -= STEP;
dc_.scheduler()->Tick(STEP);
ran = true;
}
// update speed every SAMPLE_PERIOD nanoseconds
if (current_time > next_sample_time) {
auto since = SAMPLE_PERIOD + (current_time - next_sample_time);
float speed = (since.count() / (float)run_time.count()) * 100.0f;
speed_ = *reinterpret_cast<uint32_t *>(&speed);
next_sample_time = current_time + SAMPLE_PERIOD;
run_time = std::chrono::nanoseconds(0);
}
}
}
void Emulator::QueueCoreEvent(const WindowEvent &ev) {
std::lock_guard<std::mutex> guard(core_events_mutex_);
if (core_events_.Full()) {
LOG_WARNING("Core event overflow");
return;
}
core_events_.PushBack(ev);
}
bool Emulator::PollCoreEvent(WindowEvent *ev) {
std::lock_guard<std::mutex> guard(core_events_mutex_);
if (core_events_.Empty()) {
return false;
}
*ev = core_events_.front();
core_events_.PopFront();
return true;
}
void Emulator::PumpCoreEvents() {
WindowEvent ev;
while (PollCoreEvent(&ev)) {
switch (ev.type) {
case WE_KEY: {
if (ev.key.code == K_F2) {
if (ev.key.value) {
ToggleTracing();
}
} else {
dc_.maple()->HandleInput(0, ev.key.code, ev.key.value);
}
} break;
default: { CHECK(false, "Unexpected event type"); } break;
}
}
}

View File

@ -1,6 +1,9 @@
#ifndef EMULATOR_H
#define EMULATOR_H
#include <atomic>
#include <mutex>
#include "hw/holly/tile_renderer.h"
#include "hw/dreamcast.h"
#include "renderer/backend.h"
#include "sys/window.h"
@ -8,11 +11,6 @@
namespace dvm {
namespace emu {
enum {
// number of deltas to use for speed stats
MAX_SCHEDULER_DELTAS = 1000
};
class Emulator {
public:
Emulator();
@ -25,18 +23,32 @@ class Emulator {
bool LoadFlash(const char *path);
bool LaunchBIN(const char *path);
bool LaunchGDI(const char *path);
void PumpEvents();
void ToggleTracing();
void RenderFrame();
// ran in the main thread, the graphics thread processes the TA output and
// renders it along with various stats and debug menus
void GraphicsThread();
void PumpGraphicsEvents();
void RenderGraphics();
// ran from a separate thread, the core thread actually runs the emulator,
// ultimately producing output for the graphics thread
void CoreThread();
void QueueCoreEvent(const sys::WindowEvent &ev);
bool PollCoreEvent(sys::WindowEvent *ev);
void PumpCoreEvents();
sys::Window window_;
hw::Dreamcast dc_;
hw::holly::TileRenderer tile_renderer_;
renderer::Backend *rb_;
trace::TraceWriter *trace_writer_;
std::chrono::nanoseconds deltas_[MAX_SCHEDULER_DELTAS];
unsigned delta_seq_;
bool running_;
// variables accessed by both the graphics and core thread
RingBuffer<sys::WindowEvent> core_events_;
std::mutex core_events_mutex_;
std::atomic<uint32_t> speed_;
std::atomic<bool> running_;
};
}
}

View File

@ -5,7 +5,6 @@
#include "hw/holly/pvr2.h"
#include "hw/holly/texture_cache.h"
#include "hw/holly/tile_accelerator.h"
#include "hw/holly/tile_renderer.h"
#include "hw/maple/maple.h"
#include "hw/sh4/sh4.h"
#include "hw/dreamcast.h"
@ -61,7 +60,6 @@ Dreamcast::Dreamcast()
sh4_ = new SH4(*memory_);
ta_ = new TileAccelerator(this);
texcache_ = new TextureCache(this);
tile_renderer_ = new TileRenderer(*texcache_);
}
Dreamcast::~Dreamcast() {
@ -78,7 +76,6 @@ Dreamcast::~Dreamcast() {
delete sh4_;
delete ta_;
delete texcache_;
delete tile_renderer_;
}
bool Dreamcast::Init() {

View File

@ -159,7 +159,6 @@ class Dreamcast {
hw::sh4::SH4 *sh4() { return sh4_; }
hw::holly::TileAccelerator *ta() { return ta_; }
hw::holly::TextureCache *texcache() { return texcache_; }
hw::holly::TileRenderer *tile_renderer() { return tile_renderer_; }
renderer::Backend *rb() { return rb_; }
void set_rb(renderer::Backend *rb) { rb_ = rb; }
@ -211,7 +210,6 @@ class Dreamcast {
hw::sh4::SH4 *sh4_;
hw::holly::TileAccelerator *ta_;
hw::holly::TextureCache *texcache_;
hw::holly::TileRenderer *tile_renderer_;
// not owned by us
renderer::Backend *rb_;

View File

@ -83,7 +83,7 @@ void PVR2::WriteRegister(void *ctx, uint32_t addr, uint32_t value) {
self->rps_ = 1000000000.0f / delta.count();
}
self->ta_->SwapContext(self->dc_->PARAM_BASE.base_address);
self->ta_->FinalizeContext(self->dc_->PARAM_BASE.base_address);
} break;
case SPG_LOAD_OFFSET:

View File

@ -204,17 +204,10 @@ int TileAccelerator::GetVertexType(const PCW &pcw) {
pcw.para_type * TA_NUM_LISTS + pcw.list_type];
}
TileAccelerator::TileAccelerator(Dreamcast *dc)
: dc_(dc), last_context_(nullptr) {}
TileAccelerator::~TileAccelerator() {
while (contexts_.size()) {
const auto &it = contexts_.begin();
TileContext *tactx = it->second;
delete tactx;
contexts_.erase(it);
TileAccelerator::TileAccelerator(Dreamcast *dc) : dc_(dc), contexts_() {
// initialize context queue
for (int i = 0; i < MAX_CONTEXTS; i++) {
free_contexts_.push(&contexts_[i]);
}
}
@ -232,8 +225,26 @@ void TileAccelerator::SoftReset() {
}
void TileAccelerator::InitContext(uint32_t addr) {
TileContext *tactx = GetContext(addr);
// try to reuse an existing live context
auto it = live_contexts_.find(addr);
if (it == live_contexts_.end()) {
// else, allocate a new context from the free queue
std::lock_guard<std::mutex> guard(context_mutex_);
CHECK(free_contexts_.size());
TileContext *tactx = free_contexts_.front();
free_contexts_.pop();
auto res = live_contexts_.insert(std::make_pair(addr, tactx));
CHECK(res.second);
it = res.first;
}
TileContext *tactx = it->second;
memset(tactx, 0, sizeof(*tactx));
tactx->addr = addr;
tactx->cursor = 0;
tactx->size = 0;
tactx->last_poly = nullptr;
@ -243,7 +254,9 @@ void TileAccelerator::InitContext(uint32_t addr) {
}
void TileAccelerator::WriteContext(uint32_t addr, uint32_t value) {
TileContext *tactx = GetContext(addr);
auto it = live_contexts_.find(addr);
CHECK_NE(it, live_contexts_.end());
TileContext *tactx = it->second;
CHECK_LT(tactx->size + 4, (int)sizeof(tactx->data));
*(uint32_t *)&tactx->data[tactx->size] = value;
@ -289,33 +302,50 @@ void TileAccelerator::WriteContext(uint32_t addr, uint32_t value) {
}
}
void TileAccelerator::SwapContext(uint32_t addr) {
if (!last_context_) {
last_context_ = &scratch_context_;
}
// swap context with last context to be delayed rendered
auto it = FindContext(addr);
TileContext *tmp = last_context_;
last_context_ = it->second;
it->second = tmp;
void TileAccelerator::FinalizeContext(uint32_t addr) {
auto it = live_contexts_.find(addr);
CHECK_NE(it, live_contexts_.end());
TileContext *tactx = it->second;
// save PVR state to context
WritePVRState(last_context_);
WriteBackgroundState(last_context_);
WritePVRState(tactx);
WriteBackgroundState(tactx);
// add context to trace
if (dc_->trace_writer()) {
dc_->trace_writer()->WriteRenderContext(last_context_);
dc_->trace_writer()->WriteRenderContext(tactx);
}
// tell holly that rendering is complete
holly_->RequestInterrupt(HOLLY_INTC_PCEOVINT);
holly_->RequestInterrupt(HOLLY_INTC_PCEOIINT);
holly_->RequestInterrupt(HOLLY_INTC_PCEOTINT);
// erase from the live map
live_contexts_.erase(it);
// append to the pending queue
std::lock_guard<std::mutex> guard(context_mutex_);
pending_contexts_.push(tactx);
}
TileContext *TileAccelerator::GetLastContext() { return last_context_; }
TileContext *TileAccelerator::GetLastContext() {
std::lock_guard<std::mutex> guard(context_mutex_);
if (pending_contexts_.empty()) {
return nullptr;
}
// free pending contexts which are not the latest
while (pending_contexts_.size() > 1) {
TileContext *tactx = pending_contexts_.front();
pending_contexts_.pop();
free_contexts_.push(tactx);
}
// return the latest context
return pending_contexts_.front();
}
void TileAccelerator::WriteCommand(void *ctx, uint32_t addr, uint32_t value) {
TileAccelerator *self = reinterpret_cast<TileAccelerator *>(ctx);
@ -331,27 +361,6 @@ void TileAccelerator::WriteTexture(void *ctx, uint32_t addr, uint32_t value) {
dvm::store(&self->video_ram_[addr], value);
}
TileContextIterator TileAccelerator::FindContext(uint32_t addr) {
TileContextIterator it = contexts_.find(addr);
// add context if it doesn't exist
if (it == contexts_.end()) {
TileContext *ctx = new TileContext();
auto result = contexts_.insert(std::make_pair(addr, ctx));
it = result.first;
}
// set context address
it->second->addr = addr;
return it;
}
TileContext *TileAccelerator::GetContext(uint32_t addr) {
TileContextIterator it = FindContext(addr);
return it->second;
}
void TileAccelerator::WritePVRState(TileContext *tactx) {
// autosort
if (!dc_->FPU_PARAM_CFG.region_header_type) {

View File

@ -2,6 +2,7 @@
#define TILE_ACCELERATOR_H
#include <memory>
#include <queue>
#include <unordered_map>
#include "hw/holly/tile_renderer.h"
#include "renderer/backend.h"
@ -444,6 +445,7 @@ union VertexParam {
};
enum {
MAX_CONTEXTS = 8,
// worst case background vertex size, see ISP_BACKGND_T field
BG_VERTEX_SIZE = (0b111 * 2 + 3) * 4 * 3
};
@ -476,7 +478,7 @@ struct TileContext {
};
typedef std::unordered_map<TextureKey, TileContext *> TileContextMap;
typedef TileContextMap::iterator TileContextIterator;
typedef std::queue<TileContext *> TileContextQueue;
class TileAccelerator {
public:
@ -485,22 +487,20 @@ class TileAccelerator {
static int GetVertexType(const PCW &pcw);
TileAccelerator(hw::Dreamcast *dc);
~TileAccelerator();
bool Init();
void SoftReset();
void InitContext(uint32_t addr);
void WriteContext(uint32_t addr, uint32_t value);
void SwapContext(uint32_t addr);
void FinalizeContext(uint32_t addr);
TileContext *GetLastContext();
static void WriteCommand(void *ctx, uint32_t addr, uint32_t value);
static void WriteTexture(void *ctx, uint32_t addr, uint32_t value);
private:
TileContextIterator FindContext(uint32_t addr);
TileContext *GetContext(uint32_t addr);
void WritePVRState(TileContext *tactx);
void WriteBackgroundState(TileContext *tactx);
@ -510,9 +510,11 @@ class TileAccelerator {
hw::holly::TextureCache *texcache_;
uint8_t *video_ram_;
TileContextMap contexts_;
TileContext scratch_context_;
TileContext *last_context_;
std::mutex context_mutex_;
TileContext contexts_[MAX_CONTEXTS];
TileContextMap live_contexts_;
TileContextQueue free_contexts_;
TileContextQueue pending_contexts_;
};
}
}

View File

@ -376,8 +376,8 @@ bool GLBackend::InitContext() {
return false;
}
// disable vsync
SDL_GL_SetSwapInterval(0);
// enable vsync
SDL_GL_SetSwapInterval(1);
// set default width / height
state_.video_width = window_.width();

View File

@ -11,7 +11,7 @@ namespace sys {
enum {
MAX_EVENTS = 1024,
NUM_JOYSTICK_AXES = (K_AXIS15 - K_AXIS0) + 1,
NUM_JOYSTICK_KEYS = (K_JOY31 - K_JOY0) + 1
NUM_JOYSTICK_KEYS = (K_JOY31 - K_JOY0) + 1,
};
enum WindowEventType {