Swapping.

This commit is contained in:
Ben Vanik 2014-12-24 22:35:03 -08:00
parent fa0dfa4dce
commit f438ae1bfd
11 changed files with 380 additions and 127 deletions

View File

@ -33,6 +33,10 @@ Win32Loop::Win32Loop() : thread_id_(0) {
poly::threading::set_name("Win32 Loop"); poly::threading::set_name("Win32 Loop");
thread_id_ = GetCurrentThreadId(); thread_id_ = GetCurrentThreadId();
// Make a Win32 call to enable the thread queue.
MSG msg;
PeekMessage(&msg, NULL, WM_USER, WM_USER, PM_NOREMOVE);
init_fence.Signal(); init_fence.Signal();
ThreadMain(); ThreadMain();
@ -68,9 +72,11 @@ void Win32Loop::ThreadMain() {
void Win32Loop::Post(std::function<void()> fn) { void Win32Loop::Post(std::function<void()> fn) {
assert_true(thread_id_ != 0); assert_true(thread_id_ != 0);
PostThreadMessage(thread_id_, kWmWin32LoopPost, if (!PostThreadMessage(
reinterpret_cast<WPARAM>(this), thread_id_, kWmWin32LoopPost, reinterpret_cast<WPARAM>(this),
reinterpret_cast<LPARAM>(new PostedFn(std::move(fn)))); reinterpret_cast<LPARAM>(new PostedFn(std::move(fn))))) {
assert_always("Unable to post message to thread queue");
}
} }
void Win32Loop::Quit() { void Win32Loop::Quit() {
@ -79,9 +85,7 @@ void Win32Loop::Quit() {
reinterpret_cast<WPARAM>(this), 0); reinterpret_cast<WPARAM>(this), 0);
} }
void Win32Loop::AwaitQuit() { void Win32Loop::AwaitQuit() { quit_fence_.Wait(); }
quit_fence_.Wait();
}
} // namespace win32 } // namespace win32
} // namespace ui } // namespace ui

View File

@ -13,6 +13,7 @@
#include <poly/logging.h> #include <poly/logging.h>
#include <poly/math.h> #include <poly/math.h>
#include <xenia/gpu/gl4/gl4_gpu-private.h>
#include <xenia/gpu/gl4/gl4_graphics_system.h> #include <xenia/gpu/gl4/gl4_graphics_system.h>
#include <xenia/gpu/gpu-private.h> #include <xenia/gpu/gpu-private.h>
#include <xenia/gpu/xenos.h> #include <xenia/gpu/xenos.h>
@ -28,7 +29,9 @@ namespace gl4 {
using namespace xe::gpu::xenos; using namespace xe::gpu::xenos;
extern "C" extern "C" GLEWContext* glewGetContext(); extern "C" GLEWContext* glewGetContext();
const GLuint kAnyTarget = UINT_MAX;
CommandProcessor::CommandProcessor(GL4GraphicsSystem* graphics_system) CommandProcessor::CommandProcessor(GL4GraphicsSystem* graphics_system)
: memory_(graphics_system->memory()), : memory_(graphics_system->memory()),
@ -48,7 +51,8 @@ CommandProcessor::CommandProcessor(GL4GraphicsSystem* graphics_system)
bin_select_(0xFFFFFFFFull), bin_select_(0xFFFFFFFFull),
bin_mask_(0xFFFFFFFFull), bin_mask_(0xFFFFFFFFull),
active_vertex_shader_(nullptr), active_vertex_shader_(nullptr),
active_pixel_shader_(nullptr) { active_pixel_shader_(nullptr),
active_framebuffer_(nullptr) {
std::memset(&draw_command_, 0, sizeof(draw_command_)); std::memset(&draw_command_, 0, sizeof(draw_command_));
LARGE_INTEGER perf_counter; LARGE_INTEGER perf_counter;
QueryPerformanceCounter(&perf_counter); QueryPerformanceCounter(&perf_counter);
@ -70,7 +74,6 @@ bool CommandProcessor::Initialize(std::unique_ptr<GLContext> context) {
worker_thread_ = std::thread([this]() { worker_thread_ = std::thread([this]() {
poly::threading::set_name("GL4 Worker"); poly::threading::set_name("GL4 Worker");
xe::Profiler::ThreadEnter("GL4 Worker"); xe::Profiler::ThreadEnter("GL4 Worker");
context_->MakeCurrent();
WorkerMain(); WorkerMain();
xe::Profiler::ThreadExit(); xe::Profiler::ThreadExit();
}); });
@ -89,6 +92,7 @@ void CommandProcessor::Shutdown() {
} }
void CommandProcessor::WorkerMain() { void CommandProcessor::WorkerMain() {
context_->MakeCurrent();
if (!SetupGL()) { if (!SetupGL()) {
PFATAL("Unable to setup command processor GL state"); PFATAL("Unable to setup command processor GL state");
return; return;
@ -106,9 +110,11 @@ void CommandProcessor::WorkerMain() {
const int wait_time_ms = 5; const int wait_time_ms = 5;
if (WaitForSingleObject(write_ptr_index_event_, wait_time_ms) == if (WaitForSingleObject(write_ptr_index_event_, wait_time_ms) ==
WAIT_TIMEOUT) { WAIT_TIMEOUT) {
ReturnFromWait();
write_ptr_index = write_ptr_index_.load(); write_ptr_index = write_ptr_index_.load();
continue; continue;
} }
ReturnFromWait();
} }
assert_true(read_ptr_index_ != write_ptr_index); assert_true(read_ptr_index_ != write_ptr_index);
@ -128,6 +134,7 @@ void CommandProcessor::WorkerMain() {
} }
ShutdownGL(); ShutdownGL();
context_->ClearCurrent();
} }
bool CommandProcessor::SetupGL() { bool CommandProcessor::SetupGL() {
@ -232,6 +239,16 @@ void CommandProcessor::PrepareForWait() {
// synchronize here. // synchronize here.
// glFlush(); // glFlush();
glFinish(); glFinish();
if (FLAGS_thread_safe_gl) {
context_->ClearCurrent();
}
}
void CommandProcessor::ReturnFromWait() {
if (FLAGS_thread_safe_gl) {
context_->MakeCurrent();
}
} }
class CommandProcessor::RingbufferReader { class CommandProcessor::RingbufferReader {
@ -588,6 +605,10 @@ bool CommandProcessor::ExecutePacketType3_XE_SWAP(RingbufferReader* reader,
uint32_t packet_ptr, uint32_t packet_ptr,
uint32_t packet, uint32_t packet,
uint32_t count) { uint32_t count) {
auto& regs = *register_file_;
PLOGI("XE_SWAP");
// Xenia-specific VdSwap hook. // Xenia-specific VdSwap hook.
// VdSwap will post this to tell us we need to swap the screen/fire an // VdSwap will post this to tell us we need to swap the screen/fire an
// interrupt. // interrupt.
@ -595,10 +616,36 @@ bool CommandProcessor::ExecutePacketType3_XE_SWAP(RingbufferReader* reader,
// 63 words here, but only the first has any data. // 63 words here, but only the first has any data.
reader->TraceData(1); reader->TraceData(1);
uint32_t frontbuffer_ptr = reader->Read(); uint32_t frontbuffer_ptr = reader->Read();
// TODO(benvanik): something with the frontbuffer ptr.
reader->Advance(count - 1); reader->Advance(count - 1);
if (swap_handler_) { if (swap_handler_) {
swap_handler_(); SwapParameters swap_params;
// Lookup the framebuffer in the recently-resolved list.
// TODO(benvanik): make this much more sophisticated.
// TODO(benvanik): handle not found cases.
// TODO(benvanik): handle dirty cases (resolved to sysmem, touched).
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// HACK: just use whatever our current framebuffer is.
if (active_framebuffer_) {
swap_params.framebuffer = active_framebuffer_->framebuffer;
// TODO(benvanik): pick the right one?
swap_params.attachment = GL_COLOR_ATTACHMENT0;
} else {
swap_params.framebuffer = 0;
}
// Guess frontbuffer dimensions.
// Command buffer seems to set these right before the XE_SWAP.
uint32_t window_scissor_tl = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32;
uint32_t window_scissor_br = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32;
swap_params.x = window_scissor_tl & 0x7FFF;
swap_params.y = (window_scissor_tl >> 16) & 0x7FFF;
swap_params.width = window_scissor_br & 0x7FFF - swap_params.x;
swap_params.height = (window_scissor_br >> 16) & 0x7FFF - swap_params.y;
PrepareForWait();
swap_handler_(swap_params);
ReturnFromWait();
} }
return true; return true;
} }
@ -677,6 +724,7 @@ bool CommandProcessor::ExecutePacketType3_WAIT_REG_MEM(RingbufferReader* reader,
if (wait >= 0x100) { if (wait >= 0x100) {
PrepareForWait(); PrepareForWait();
Sleep(wait / 0x100); Sleep(wait / 0x100);
ReturnFromWait();
} else { } else {
SwitchToThread(); SwitchToThread();
} }
@ -1077,18 +1125,21 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
if (enable_mode == ModeControl::kIgnore) { if (enable_mode == ModeControl::kIgnore) {
// Ignored. // Ignored.
return true; return true;
} else if (enable_mode == ModeControl::kCopy) {
// Special copy handling.
return IssueCopy(draw_command);
} }
if (!UpdateRenderTargets(draw_command)) { if (!UpdateRenderTargets(draw_command)) {
PLOGE("Unable to setup render targets"); PLOGE("Unable to setup render targets");
return false; return false;
} }
if (!active_framebuffer_) {
if (enable_mode == ModeControl::kCopy) { // No framebuffer, so nothing we do will actually have an effect.
// Special copy handling. // Treat it as a no-op.
return IssueCopy(draw_command); XETRACECP("No-op draw (no framebuffer set)");
return true;
} }
if (!UpdateState(draw_command)) { if (!UpdateState(draw_command)) {
PLOGE("Unable to setup render state"); PLOGE("Unable to setup render state");
return false; return false;
@ -1301,7 +1352,6 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
/* 3 */ GL_MAX, /* 3 */ GL_MAX,
/* 4 */ GL_FUNC_REVERSE_SUBTRACT, /* 4 */ GL_FUNC_REVERSE_SUBTRACT,
}; };
uint32_t color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
uint32_t blend_control[4] = { uint32_t blend_control[4] = {
regs[XE_GPU_REG_RB_BLENDCONTROL_0].u32, regs[XE_GPU_REG_RB_BLENDCONTROL_0].u32,
regs[XE_GPU_REG_RB_BLENDCONTROL_1].u32, regs[XE_GPU_REG_RB_BLENDCONTROL_1].u32,
@ -1321,8 +1371,6 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) {
auto dest_blend_alpha = blend_map[(blend_control[n] & 0x1F000000) >> 24]; auto dest_blend_alpha = blend_map[(blend_control[n] & 0x1F000000) >> 24];
// A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN // A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN
auto blend_op_alpha = blend_op_map[(blend_control[n] & 0x00E00000) >> 21]; auto blend_op_alpha = blend_op_map[(blend_control[n] & 0x00E00000) >> 21];
// A2XX_RB_COLOR_MASK_WRITE_*
uint32_t write_mask = (color_mask >> (n * 4)) & 0xF;
// A2XX_RB_COLORCONTROL_BLEND_DISABLE ?? Can't find this! // A2XX_RB_COLORCONTROL_BLEND_DISABLE ?? Can't find this!
// Just guess based on actions. // Just guess based on actions.
bool blend_enable = bool blend_enable =
@ -1443,34 +1491,78 @@ bool CommandProcessor::UpdateRenderTargets(DrawCommand* draw_command) {
// Get/create all color render targets, if we are using them. // Get/create all color render targets, if we are using them.
// In depth-only mode we don't need them. // In depth-only mode we don't need them.
GLuint color_targets[4] = {0, 0, 0, 0}; GLenum draw_buffers[4] = {GL_NONE, GL_NONE, GL_NONE, GL_NONE};
GLuint color_targets[4] = {kAnyTarget, kAnyTarget, kAnyTarget, kAnyTarget};
if (enable_mode == ModeControl::kColorDepth) { if (enable_mode == ModeControl::kColorDepth) {
uint32_t color_info[4] = { uint32_t color_info[4] = {
regs[XE_GPU_REG_RB_COLOR_INFO].u32, regs[XE_GPU_REG_RB_COLOR1_INFO].u32, regs[XE_GPU_REG_RB_COLOR_INFO].u32, regs[XE_GPU_REG_RB_COLOR1_INFO].u32,
regs[XE_GPU_REG_RB_COLOR2_INFO].u32, regs[XE_GPU_REG_RB_COLOR2_INFO].u32,
regs[XE_GPU_REG_RB_COLOR3_INFO].u32, regs[XE_GPU_REG_RB_COLOR3_INFO].u32,
}; };
// A2XX_RB_COLOR_MASK_WRITE_* == D3DRS_COLORWRITEENABLE
uint32_t color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
for (int n = 0; n < poly::countof(color_info); n++) { for (int n = 0; n < poly::countof(color_info); n++) {
uint32_t write_mask = (color_mask >> (n * 4)) & 0xF;
if (!write_mask) {
// Unused, so keep disabled and set to wildcard so we'll take any
// framebuffer that has it.
continue;
}
uint32_t color_base = color_info[n] & 0xFFF; uint32_t color_base = color_info[n] & 0xFFF;
auto color_format = auto color_format =
static_cast<ColorRenderTargetFormat>((color_info[n] >> 16) & 0xF); static_cast<ColorRenderTargetFormat>((color_info[n] >> 16) & 0xF);
color_targets[n] = GetColorRenderTarget(surface_pitch, surface_msaa, color_targets[n] = GetColorRenderTarget(surface_pitch, surface_msaa,
color_base, color_format); color_base, color_format);
draw_buffers[n] = GL_COLOR_ATTACHMENT0 + n;
glColorMaski(n, !!(write_mask & 0x1), !!(write_mask & 0x2),
!!(write_mask & 0x4), !!(write_mask & 0x8));
} }
} }
uint32_t depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32; // Get/create depth buffer, but only if we are going to use it.
uint32_t depth_base = depth_info & 0xFFF; uint32_t depth_control = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32;
auto depth_format = uint32_t stencil_ref_mask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32;
static_cast<DepthRenderTargetFormat>((depth_info >> 16) & 0x1); bool uses_depth =
GLuint depth_target = GetDepthRenderTarget(surface_pitch, surface_msaa, (depth_control & 0x00000002) || (depth_control & 0x00000004);
depth_base, depth_format); uint32_t stencil_write_mask = (stencil_ref_mask & 0x00FF0000) >> 16;
// TODO(benvanik): when a game switches does it expect to keep the same bool uses_stencil = (depth_control & 0x00000001) || (stencil_write_mask != 0);
// depth buffer contents? GLuint depth_target = kAnyTarget;
if (uses_depth && uses_stencil) {
uint32_t depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32;
uint32_t depth_base = depth_info & 0xFFF;
auto depth_format =
static_cast<DepthRenderTargetFormat>((depth_info >> 16) & 0x1);
depth_target = GetDepthRenderTarget(surface_pitch, surface_msaa, depth_base,
depth_format);
// TODO(benvanik): when a game switches does it expect to keep the same
// depth buffer contents?
}
// Get/create a framebuffer with the required targets. // Get/create a framebuffer with the required targets.
GLuint framebuffer = GetFramebuffer(color_targets, depth_target); // Note that none may be returned if we really don't need one.
glBindFramebuffer(GL_FRAMEBUFFER, framebuffer); auto cached_framebuffer = GetFramebuffer(color_targets, depth_target);
active_framebuffer_ = cached_framebuffer;
if (!active_framebuffer_) {
// Nothing to do.
return true;
}
// Setup just the targets we want.
glNamedFramebufferDrawBuffers(cached_framebuffer->framebuffer, 4,
draw_buffers);
// Make active.
// TODO(benvanik): can we do this all named?
// TODO(benvanik): do we want this on READ too?
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, cached_framebuffer->framebuffer);
// TEST TEST TEST TEST TEST TEST TEST TEST TEST TEST
// Pretend we are drawing.
glEnable(GL_SCISSOR_TEST);
glScissor(100, 100, 100, 100);
float red[] = {rand() / (float)RAND_MAX, 0, 0, 1.0f};
glClearNamedFramebufferfv(active_framebuffer_->framebuffer, GL_COLOR, 0, red);
glDisable(GL_SCISSOR_TEST);
return true; return true;
} }
@ -1519,6 +1611,45 @@ bool CommandProcessor::IssueCopy(DrawCommand* draw_command) {
uint32_t copy_mask = regs[XE_GPU_REG_RB_COPY_MASK].u32; uint32_t copy_mask = regs[XE_GPU_REG_RB_COPY_MASK].u32;
assert_true(copy_mask == 0); assert_true(copy_mask == 0);
// RB_SURFACE_INFO
// http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html
uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
uint32_t surface_pitch = surface_info & 0x3FFF;
auto surface_msaa = static_cast<MsaaSamples>((surface_info >> 16) & 0x3);
// Depending on the source, pick the buffer we'll be sourcing.
// We then query for a cached framebuffer setup with that buffer active.
GLuint color_targets[4] = {kAnyTarget, kAnyTarget, kAnyTarget, kAnyTarget};
GLuint depth_target = kAnyTarget;
if (copy_src_select <= 3) {
// Source from a color target.
uint32_t color_info[4] = {
regs[XE_GPU_REG_RB_COLOR_INFO].u32, regs[XE_GPU_REG_RB_COLOR1_INFO].u32,
regs[XE_GPU_REG_RB_COLOR2_INFO].u32,
regs[XE_GPU_REG_RB_COLOR3_INFO].u32,
};
uint32_t color_base = color_info[copy_src_select] & 0xFFF;
auto color_format = static_cast<ColorRenderTargetFormat>(
(color_info[copy_src_select] >> 16) & 0xF);
color_targets[copy_src_select] = GetColorRenderTarget(
surface_pitch, surface_msaa, color_base, color_format);
} else {
// Source from depth/stencil.
uint32_t depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32;
uint32_t depth_base = depth_info & 0xFFF;
auto depth_format =
static_cast<DepthRenderTargetFormat>((depth_info >> 16) & 0x1);
depth_target = GetDepthRenderTarget(surface_pitch, surface_msaa, depth_base,
depth_format);
}
auto source_framebuffer = GetFramebuffer(color_targets, depth_target);
if (!source_framebuffer) {
// If we get here we are likely missing some state checks.
assert_always("No framebuffer for copy source? no-op copy?");
PLOGE("No framebuffer for copy source");
return false;
}
GLenum read_format; GLenum read_format;
GLenum read_type; GLenum read_type;
switch (copy_dest_format) { switch (copy_dest_format) {
@ -1553,15 +1684,25 @@ bool CommandProcessor::IssueCopy(DrawCommand* draw_command) {
// TODO(benvanik): copy to staging texture then PBO back? // TODO(benvanik): copy to staging texture then PBO back?
void* ptr = membase_ + GpuToCpu(copy_dest_base); void* ptr = membase_ + GpuToCpu(copy_dest_base);
// TODO(benvanik): any way to scissor this? a200 has:
// REG_A2XX_RB_COPY_DEST_OFFSET = A2XX_RB_COPY_DEST_OFFSET_X(tile->xoff) |
// A2XX_RB_COPY_DEST_OFFSET_Y(tile->yoff);
// but I can't seem to find something similar.
// Maybe scissor rect/window offset?
uint32_t x = 0; uint32_t x = 0;
uint32_t y = 0; uint32_t y = 0;
uint32_t w = copy_dest_pitch; uint32_t w = copy_dest_pitch;
uint32_t h = copy_dest_height; uint32_t h = copy_dest_height;
// Make active so glReadPixels reads from us.
glBindFramebuffer(GL_READ_FRAMEBUFFER, source_framebuffer->framebuffer);
switch (copy_command) { switch (copy_command) {
case CopyCommand::kConvert: case CopyCommand::kConvert:
if (copy_src_select <= 3) { if (copy_src_select <= 3) {
// Source from a bound render target. // Source from a bound render target.
glReadBuffer(GL_COLOR_ATTACHMENT0 + copy_src_select); // glBindBuffer(GL_READ_FRAMEBUFFER, framebuffer)
glNamedFramebufferReadBuffer(source_framebuffer->framebuffer,
GL_COLOR_ATTACHMENT0 + copy_src_select);
glReadPixels(x, y, w, h, read_format, read_type, ptr); glReadPixels(x, y, w, h, read_format, read_type, ptr);
} else { } else {
// Source from the bound depth/stencil target. // Source from the bound depth/stencil target.
@ -1575,22 +1716,36 @@ bool CommandProcessor::IssueCopy(DrawCommand* draw_command) {
assert_unhandled_case(copy_command); assert_unhandled_case(copy_command);
return false; return false;
} }
glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
if (color_clear_enabled || depth_clear_enabled) { // Perform any requested clears.
// Clear requested, so let's setup for that. uint32_t copy_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32;
uint32_t copy_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32; uint32_t copy_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32;
uint32_t copy_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32; uint32_t copy_color_clear_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LOW].u32;
uint32_t copy_color_clear_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LOW].u32; assert_true(copy_color_clear == copy_color_clear_low);
assert_true(copy_color_clear == copy_color_clear_low);
if (color_clear_enabled) { if (color_clear_enabled) {
// Clear the render target we selected for copy. // Clear the render target we selected for copy.
assert_true(copy_src_select < 3); assert_true(copy_src_select < 3);
} // TODO(benvanik): verify color order.
float color[] = {(copy_color_clear & 0xFF) / 255.0f,
((copy_color_clear >> 8) & 0xFF) / 255.0f,
((copy_color_clear >> 16) & 0xFF) / 255.0f,
((copy_color_clear >> 24) & 0xFF) / 255.0f};
glClearNamedFramebufferfv(source_framebuffer->framebuffer, GL_COLOR,
copy_src_select, color);
}
if (depth_clear_enabled) { if (depth_clear_enabled) {
// Clear the current depth buffer. // Clear the current depth buffer.
} // TODO(benvanik): verify format.
union {
uint32_t uint_value;
GLfloat float_value;
} depth = {copy_depth_clear & 0xFFFFFF00};
GLint stencil = copy_depth_clear & 0xFF;
glClearNamedFramebufferfi(source_framebuffer->framebuffer, GL_DEPTH_STENCIL,
depth.float_value, stencil);
} }
return true; return true;
@ -1610,7 +1765,6 @@ GLuint CommandProcessor::GetColorRenderTarget(uint32_t pitch,
format = ColorRenderTargetFormat::k8888; format = ColorRenderTargetFormat::k8888;
} }
CachedColorRenderTarget* cached = nullptr;
for (auto& it = cached_color_render_targets_.begin(); for (auto& it = cached_color_render_targets_.begin();
it != cached_color_render_targets_.end(); ++it) { it != cached_color_render_targets_.end(); ++it) {
if (it->base == base && it->width == width && it->height == height && if (it->base == base && it->width == width && it->height == height &&
@ -1619,7 +1773,7 @@ GLuint CommandProcessor::GetColorRenderTarget(uint32_t pitch,
} }
} }
cached_color_render_targets_.push_back(CachedColorRenderTarget()); cached_color_render_targets_.push_back(CachedColorRenderTarget());
cached = &cached_color_render_targets_.back(); auto cached = &cached_color_render_targets_.back();
cached->base = base; cached->base = base;
cached->width = width; cached->width = width;
cached->height = height; cached->height = height;
@ -1649,7 +1803,6 @@ GLuint CommandProcessor::GetDepthRenderTarget(uint32_t pitch,
uint32_t width = 2560; uint32_t width = 2560;
uint32_t height = 2560; uint32_t height = 2560;
CachedDepthRenderTarget* cached = nullptr;
for (auto& it = cached_depth_render_targets_.begin(); for (auto& it = cached_depth_render_targets_.begin();
it != cached_depth_render_targets_.end(); ++it) { it != cached_depth_render_targets_.end(); ++it) {
if (it->base == base && it->width == width && it->height == height && if (it->base == base && it->width == width && it->height == height &&
@ -1658,7 +1811,7 @@ GLuint CommandProcessor::GetDepthRenderTarget(uint32_t pitch,
} }
} }
cached_depth_render_targets_.push_back(CachedDepthRenderTarget()); cached_depth_render_targets_.push_back(CachedDepthRenderTarget());
cached = &cached_depth_render_targets_.back(); auto cached = &cached_depth_render_targets_.back();
cached->base = base; cached->base = base;
cached->width = width; cached->width = width;
cached->height = height; cached->height = height;
@ -1682,38 +1835,58 @@ GLuint CommandProcessor::GetDepthRenderTarget(uint32_t pitch,
return cached->texture; return cached->texture;
} }
GLuint CommandProcessor::GetFramebuffer(GLuint color_targets[4], CommandProcessor::CachedFramebuffer* CommandProcessor::GetFramebuffer(
GLuint depth_target) { GLuint color_targets[4], GLuint depth_target) {
CachedFramebuffer* cached = nullptr;
for (auto& it = cached_framebuffers_.begin(); for (auto& it = cached_framebuffers_.begin();
it != cached_framebuffers_.end(); ++it) { it != cached_framebuffers_.end(); ++it) {
if ((depth_target == -1u || it->depth_target == depth_target) && if ((depth_target == kAnyTarget || it->depth_target == depth_target) &&
(color_targets[0] == -1u || it->color_targets[0] == color_targets[0]) && (color_targets[0] == kAnyTarget ||
(color_targets[1] == -1u || it->color_targets[1] == color_targets[1]) && it->color_targets[0] == color_targets[0]) &&
(color_targets[2] == -1u || it->color_targets[2] == color_targets[2]) && (color_targets[1] == kAnyTarget ||
(color_targets[3] == -1u || it->color_targets[3] == color_targets[3])) { it->color_targets[1] == color_targets[1]) &&
return it->framebuffer; (color_targets[2] == kAnyTarget ||
it->color_targets[2] == color_targets[2]) &&
(color_targets[3] == kAnyTarget ||
it->color_targets[3] == color_targets[3])) {
return &*it;
} }
} }
GLuint real_color_targets[4];
bool any_set = false;
for (int i = 0; i < 4; ++i) {
if (color_targets[i] == kAnyTarget) {
real_color_targets[i] = 0;
} else {
any_set = true;
real_color_targets[i] = color_targets[i];
}
}
GLuint real_depth_target;
if (depth_target == kAnyTarget) {
real_depth_target = 0;
} else {
any_set = true;
real_depth_target = depth_target;
}
if (!any_set) {
// No framebuffer required.
return nullptr;
}
cached_framebuffers_.push_back(CachedFramebuffer()); cached_framebuffers_.push_back(CachedFramebuffer());
cached = &cached_framebuffers_.back(); auto cached = &cached_framebuffers_.back();
glCreateFramebuffers(1, &cached->framebuffer); glCreateFramebuffers(1, &cached->framebuffer);
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
uint32_t color_target = color_targets[i]; cached->color_targets[i] = real_color_targets[i];
if (color_target == -1u) {
color_target = 0;
}
cached->color_targets[i] = color_target;
glNamedFramebufferTexture(cached->framebuffer, GL_COLOR_ATTACHMENT0 + i, glNamedFramebufferTexture(cached->framebuffer, GL_COLOR_ATTACHMENT0 + i,
color_target, 0); real_color_targets[i], 0);
} }
if (depth_target == -1u) { cached->depth_target = real_depth_target;
depth_target = 0;
}
cached->depth_target = depth_target;
glNamedFramebufferTexture(cached->framebuffer, GL_DEPTH_STENCIL_ATTACHMENT, glNamedFramebufferTexture(cached->framebuffer, GL_DEPTH_STENCIL_ATTACHMENT,
depth_target, 0); real_depth_target, 0);
return cached->framebuffer;
return cached;
} }
} // namespace gl4 } // namespace gl4

View File

@ -29,6 +29,16 @@ namespace gl4 {
class GL4GraphicsSystem; class GL4GraphicsSystem;
struct SwapParameters {
uint32_t x;
uint32_t y;
uint32_t width;
uint32_t height;
GLuint framebuffer;
GLenum attachment;
};
// TODO(benvanik): move more of the enums in here? // TODO(benvanik): move more of the enums in here?
struct DrawCommand { struct DrawCommand {
PrimitiveType prim_type; PrimitiveType prim_type;
@ -74,7 +84,8 @@ class CommandProcessor {
CommandProcessor(GL4GraphicsSystem* graphics_system); CommandProcessor(GL4GraphicsSystem* graphics_system);
~CommandProcessor(); ~CommandProcessor();
void set_swap_handler(std::function<void()> fn) { swap_handler_ = fn; } typedef std::function<void(const SwapParameters& params)> SwapHandler;
void set_swap_handler(SwapHandler fn) { swap_handler_ = fn; }
uint64_t QueryTime(); uint64_t QueryTime();
uint32_t counter() const { return counter_; } uint32_t counter() const { return counter_; }
@ -91,6 +102,26 @@ class CommandProcessor {
private: private:
class RingbufferReader; class RingbufferReader;
struct CachedFramebuffer {
GLuint color_targets[4];
GLuint depth_target;
GLuint framebuffer;
};
struct CachedColorRenderTarget {
uint32_t base;
uint32_t width;
uint32_t height;
xenos::ColorRenderTargetFormat format;
GLuint texture;
};
struct CachedDepthRenderTarget {
uint32_t base;
uint32_t width;
uint32_t height;
xenos::DepthRenderTargetFormat format;
GLuint texture;
};
void WorkerMain(); void WorkerMain();
bool SetupGL(); bool SetupGL();
void ShutdownGL(); void ShutdownGL();
@ -98,6 +129,7 @@ class CommandProcessor {
void WriteRegister(uint32_t packet_ptr, uint32_t index, uint32_t value); void WriteRegister(uint32_t packet_ptr, uint32_t index, uint32_t value);
void MakeCoherent(); void MakeCoherent();
void PrepareForWait(); void PrepareForWait();
void ReturnFromWait();
void ExecutePrimaryBuffer(uint32_t start_index, uint32_t end_index); void ExecutePrimaryBuffer(uint32_t start_index, uint32_t end_index);
void ExecuteIndirectBuffer(uint32_t ptr, uint32_t length); void ExecuteIndirectBuffer(uint32_t ptr, uint32_t length);
@ -168,7 +200,8 @@ class CommandProcessor {
// bool PopulateVertexBuffers(DrawCommand* draw_command); // bool PopulateVertexBuffers(DrawCommand* draw_command);
bool IssueCopy(DrawCommand* draw_command); bool IssueCopy(DrawCommand* draw_command);
GLuint GetFramebuffer(GLuint color_targets[4], GLuint depth_target); CachedFramebuffer* GetFramebuffer(GLuint color_targets[4],
GLuint depth_target);
GLuint GetColorRenderTarget(uint32_t pitch, xenos::MsaaSamples samples, GLuint GetColorRenderTarget(uint32_t pitch, xenos::MsaaSamples samples,
uint32_t base, uint32_t base,
xenos::ColorRenderTargetFormat format); xenos::ColorRenderTargetFormat format);
@ -184,7 +217,7 @@ class CommandProcessor {
std::thread worker_thread_; std::thread worker_thread_;
std::atomic<bool> worker_running_; std::atomic<bool> worker_running_;
std::unique_ptr<GLContext> context_; std::unique_ptr<GLContext> context_;
std::function<void()> swap_handler_; SwapHandler swap_handler_;
uint64_t time_base_; uint64_t time_base_;
uint32_t counter_; uint32_t counter_;
@ -202,37 +235,20 @@ class CommandProcessor {
uint64_t bin_select_; uint64_t bin_select_;
uint64_t bin_mask_; uint64_t bin_mask_;
GLuint uniform_data_buffer_;
std::vector<std::unique_ptr<GL4Shader>> all_shaders_; std::vector<std::unique_ptr<GL4Shader>> all_shaders_;
std::unordered_map<uint64_t, GL4Shader*> shader_cache_; std::unordered_map<uint64_t, GL4Shader*> shader_cache_;
GL4Shader* active_vertex_shader_; GL4Shader* active_vertex_shader_;
GL4Shader* active_pixel_shader_; GL4Shader* active_pixel_shader_;
GLuint uniform_data_buffer_; CachedFramebuffer* active_framebuffer_;
std::vector<CachedFramebuffer> cached_framebuffers_;
std::vector<CachedColorRenderTarget> cached_color_render_targets_;
std::vector<CachedDepthRenderTarget> cached_depth_render_targets_;
DrawCommand draw_command_; DrawCommand draw_command_;
struct CachedFramebuffer {
GLuint color_targets[4];
GLuint depth_target;
GLuint framebuffer;
};
std::vector<CachedFramebuffer> cached_framebuffers_;
struct CachedColorRenderTarget {
uint32_t base;
uint32_t width;
uint32_t height;
xenos::ColorRenderTargetFormat format;
GLuint texture;
};
std::vector<CachedColorRenderTarget> cached_color_render_targets_;
struct CachedDepthRenderTarget {
uint32_t base;
uint32_t width;
uint32_t height;
xenos::DepthRenderTargetFormat format;
GLuint texture;
};
std::vector<CachedDepthRenderTarget> cached_depth_render_targets_;
}; };
} // namespace gl4 } // namespace gl4

View File

@ -10,11 +10,13 @@
#ifndef XENIA_GPU_GL4_GL4_GPU_PRIVATE_H_ #ifndef XENIA_GPU_GL4_GL4_GPU_PRIVATE_H_
#define XENIA_GPU_GL4_GL4_GPU_PRIVATE_H_ #define XENIA_GPU_GL4_GL4_GPU_PRIVATE_H_
// GL headers #include <gflags/gflags.h>
#include <xenia/common.h> #include <xenia/common.h>
#include <xenia/gpu/gl4/gl4_gpu.h> #include <xenia/gpu/gl4/gl4_gpu.h>
DECLARE_bool(thread_safe_gl);
namespace xe { namespace xe {
namespace gpu { namespace gpu {
namespace gl4 { namespace gl4 {

View File

@ -9,8 +9,12 @@
#include <xenia/gpu/gl4/gl4_gpu.h> #include <xenia/gpu/gl4/gl4_gpu.h>
#include <xenia/gpu/gl4/gl4_gpu-private.h>
#include <xenia/gpu/gl4/gl4_graphics_system.h> #include <xenia/gpu/gl4/gl4_graphics_system.h>
DEFINE_bool(thread_safe_gl, false,
"Only allow one GL context to be active at a time.");
namespace xe { namespace xe {
namespace gpu { namespace gpu {
namespace gl4 { namespace gl4 {

View File

@ -17,6 +17,8 @@ namespace xe {
namespace gpu { namespace gpu {
namespace gl4 { namespace gl4 {
extern "C" GLEWContext* glewGetContext();
GL4GraphicsSystem::GL4GraphicsSystem(Emulator* emulator) GL4GraphicsSystem::GL4GraphicsSystem(Emulator* emulator)
: GraphicsSystem(emulator), timer_queue_(nullptr), vsync_timer_(nullptr) {} : GraphicsSystem(emulator), timer_queue_(nullptr), vsync_timer_(nullptr) {}
@ -57,7 +59,7 @@ X_STATUS GL4GraphicsSystem::Setup() {
return X_STATUS_UNSUCCESSFUL; return X_STATUS_UNSUCCESSFUL;
} }
command_processor_->set_swap_handler( command_processor_->set_swap_handler(
std::bind(&GL4GraphicsSystem::SwapHandler, this)); [this](const SwapParameters& swap_params) { SwapHandler(swap_params); });
// Let the processor know we want register access callbacks. // Let the processor know we want register access callbacks.
emulator_->memory()->AddMappedRange( emulator_->memory()->AddMappedRange(
@ -115,12 +117,18 @@ void GL4GraphicsSystem::MarkVblank() {
DispatchInterruptCallback(0, 2); DispatchInterruptCallback(0, 2);
} }
void GL4GraphicsSystem::SwapHandler() { void GL4GraphicsSystem::SwapHandler(const SwapParameters& swap_params) {
SCOPE_profile_cpu_f("gpu"); SCOPE_profile_cpu_f("gpu");
// Swap requested. Synchronously post a request to the loop so that // Swap requested. Synchronously post a request to the loop so that
// we do the swap in the right thread. // we do the swap in the right thread.
control_->SynchronousRepaint(); control_->SynchronousRepaint([&]() {
glBlitNamedFramebuffer(swap_params.framebuffer, 0, swap_params.x,
swap_params.y, swap_params.x + swap_params.width,
swap_params.y + swap_params.height, 0, 0,
control_->width(), control_->height(),
GL_COLOR_BUFFER_BIT, GL_LINEAR);
});
// Roll over vblank. // Roll over vblank.
MarkVblank(); MarkVblank();

View File

@ -37,7 +37,7 @@ class GL4GraphicsSystem : public GraphicsSystem {
private: private:
void MarkVblank(); void MarkVblank();
void SwapHandler(); void SwapHandler(const SwapParameters& swap_params);
uint64_t ReadRegister(uint64_t addr); uint64_t ReadRegister(uint64_t addr);
void WriteRegister(uint64_t addr, uint64_t value); void WriteRegister(uint64_t addr, uint64_t value);

View File

@ -9,13 +9,18 @@
#include <xenia/gpu/gl4/gl_context.h> #include <xenia/gpu/gl4/gl_context.h>
#include <mutex>
#include <poly/assert.h> #include <poly/assert.h>
#include <poly/logging.h> #include <poly/logging.h>
#include <xenia/gpu/gl4/gl4_gpu-private.h>
namespace xe { namespace xe {
namespace gpu { namespace gpu {
namespace gl4 { namespace gl4 {
static std::recursive_mutex global_gl_mutex_;
thread_local GLEWContext* tls_glew_context_ = nullptr; thread_local GLEWContext* tls_glew_context_ = nullptr;
thread_local WGLEWContext* tls_wglew_context_ = nullptr; thread_local WGLEWContext* tls_wglew_context_ = nullptr;
extern "C" GLEWContext* glewGetContext() { return tls_glew_context_; } extern "C" GLEWContext* glewGetContext() { return tls_glew_context_; }
@ -110,24 +115,31 @@ bool GLContext::Initialize(HWND hwnd) {
// Clearing errors. // Clearing errors.
} }
ClearCurrent();
return true; return true;
} }
std::unique_ptr<GLContext> GLContext::CreateShared() { std::unique_ptr<GLContext> GLContext::CreateShared() {
assert_not_null(glrc_); assert_not_null(glrc_);
int context_flags = 0; HGLRC new_glrc = nullptr;
{
GLContextLock context_lock(this);
int context_flags = 0;
#if DEBUG #if DEBUG
context_flags |= WGL_CONTEXT_DEBUG_BIT_ARB; context_flags |= WGL_CONTEXT_DEBUG_BIT_ARB;
#endif // DEBUG #endif // DEBUG
int attrib_list[] = {WGL_CONTEXT_MAJOR_VERSION_ARB, 4, // int attrib_list[] = {WGL_CONTEXT_MAJOR_VERSION_ARB, 4, //
WGL_CONTEXT_MINOR_VERSION_ARB, 5, // WGL_CONTEXT_MINOR_VERSION_ARB, 5, //
WGL_CONTEXT_FLAGS_ARB, context_flags, // WGL_CONTEXT_FLAGS_ARB, context_flags, //
0}; 0};
auto new_glrc = wglCreateContextAttribsARB(dc_, glrc_, attrib_list); new_glrc = wglCreateContextAttribsARB(dc_, glrc_, attrib_list);
if (!new_glrc) { if (!new_glrc) {
PLOGE("Could not create shared context"); PLOGE("Could not create shared context");
return nullptr; return nullptr;
}
} }
auto new_context = std::make_unique<GLContext>(hwnd_, new_glrc); auto new_context = std::make_unique<GLContext>(hwnd_, new_glrc);
@ -138,26 +150,31 @@ std::unique_ptr<GLContext> GLContext::CreateShared() {
glewExperimental = GL_TRUE; glewExperimental = GL_TRUE;
if (glewInit() != GLEW_OK) { if (glewInit() != GLEW_OK) {
new_context->ClearCurrent();
PLOGE("Unable to initialize GLEW"); PLOGE("Unable to initialize GLEW");
return nullptr; return nullptr;
} }
if (wglewInit() != GLEW_OK) { if (wglewInit() != GLEW_OK) {
new_context->ClearCurrent();
PLOGE("Unable to initialize WGLEW"); PLOGE("Unable to initialize WGLEW");
return nullptr; return nullptr;
} }
new_context->ClearCurrent(); new_context->ClearCurrent();
MakeCurrent();
while (glGetError()) {
// Clearing errors.
}
return new_context; return new_context;
} }
bool GLContext::MakeCurrent() { bool GLContext::MakeCurrent() {
if (FLAGS_thread_safe_gl) {
global_gl_mutex_.lock();
}
if (!wglMakeCurrent(dc_, glrc_)) { if (!wglMakeCurrent(dc_, glrc_)) {
if (FLAGS_thread_safe_gl) {
global_gl_mutex_.unlock();
}
PLOGE("Unable to make GL context current");
return false; return false;
} }
tls_glew_context_ = &glew_context_; tls_glew_context_ = &glew_context_;
@ -169,6 +186,10 @@ void GLContext::ClearCurrent() {
wglMakeCurrent(nullptr, nullptr); wglMakeCurrent(nullptr, nullptr);
tls_glew_context_ = nullptr; tls_glew_context_ = nullptr;
tls_wglew_context_ = nullptr; tls_wglew_context_ = nullptr;
if (FLAGS_thread_safe_gl) {
global_gl_mutex_.unlock();
}
} }
} // namespace gl4 } // namespace gl4

View File

@ -43,6 +43,16 @@ class GLContext {
WGLEWContext wglew_context_; WGLEWContext wglew_context_;
}; };
struct GLContextLock {
GLContextLock(GLContext* context) : context_(context) {
context_->MakeCurrent();
}
~GLContextLock() { context_->ClearCurrent(); }
private:
GLContext* context_;
};
} // namespace gl4 } // namespace gl4
} // namespace gpu } // namespace gpu
} // namespace xe } // namespace xe

View File

@ -11,15 +11,18 @@
#include <poly/assert.h> #include <poly/assert.h>
#include <poly/logging.h> #include <poly/logging.h>
#include <xenia/gpu/gl4/gl4_gpu-private.h>
#include <xenia/profiling.h> #include <xenia/profiling.h>
namespace xe { namespace xe {
namespace gpu { namespace gpu {
namespace gl4 { namespace gl4 {
extern "C" GLEWContext* glewGetContext();
extern "C" WGLEWContext* wglewGetContext();
WGLControl::WGLControl(poly::ui::Loop* loop) WGLControl::WGLControl(poly::ui::Loop* loop)
: poly::ui::win32::Win32Control(Flags::kFlagOwnPaint), : poly::ui::win32::Win32Control(Flags::kFlagOwnPaint), loop_(loop) {}
loop_(loop) {}
WGLControl::~WGLControl() = default; WGLControl::~WGLControl() = default;
@ -70,21 +73,30 @@ void WGLControl::OnLayout(poly::ui::UIEvent& e) { Control::ResizeToFill(); }
LRESULT WGLControl::WndProc(HWND hWnd, UINT message, WPARAM wParam, LRESULT WGLControl::WndProc(HWND hWnd, UINT message, WPARAM wParam,
LPARAM lParam) { LPARAM lParam) {
switch (message) { switch (message) {
case WM_PAINT: case WM_PAINT: {
context_.MakeCurrent(); GLContextLock context_lock(&context_);
// TODO(benvanik): is viewport needed?
glViewport(0, 0, width_, height_); glViewport(0, 0, width_, height_);
glClearColor(rand() / (float)RAND_MAX, 1.0f, 0, 1.0f); float clear_color[] = {rand() / (float)RAND_MAX, 1.0f, 0, 1.0f};
glClear(GL_COLOR_BUFFER_BIT); glClearNamedFramebufferfv(0, GL_COLOR, 0, clear_color);
if (current_paint_callback_) {
current_paint_callback_();
current_paint_callback_ = nullptr;
}
// TODO(benvanik): profiler present. // TODO(benvanik): profiler present.
// Profiler::Present(); // Profiler::Present();
SwapBuffers(context_.dc()); SwapBuffers(context_.dc());
break; } break;
} }
return Win32Control::WndProc(hWnd, message, wParam, lParam); return Win32Control::WndProc(hWnd, message, wParam, lParam);
} }
void WGLControl::SynchronousRepaint() { void WGLControl::SynchronousRepaint(std::function<void()> paint_callback) {
SCOPE_profile_cpu_f("gpu"); SCOPE_profile_cpu_f("gpu");
assert_null(current_paint_callback_);
current_paint_callback_ = std::move(paint_callback);
// This will not return until the WM_PAINT has completed. // This will not return until the WM_PAINT has completed.
RedrawWindow(hwnd(), nullptr, nullptr, RedrawWindow(hwnd(), nullptr, nullptr,
RDW_INTERNALPAINT | RDW_UPDATENOW | RDW_ALLCHILDREN); RDW_INTERNALPAINT | RDW_UPDATENOW | RDW_ALLCHILDREN);

View File

@ -10,6 +10,8 @@
#ifndef XENIA_GPU_GL4_WGL_CONTROL_H_ #ifndef XENIA_GPU_GL4_WGL_CONTROL_H_
#define XENIA_GPU_GL4_WGL_CONTROL_H_ #define XENIA_GPU_GL4_WGL_CONTROL_H_
#include <functional>
#include <poly/threading.h> #include <poly/threading.h>
#include <poly/ui/loop.h> #include <poly/ui/loop.h>
#include <poly/ui/win32/win32_control.h> #include <poly/ui/win32/win32_control.h>
@ -26,7 +28,7 @@ class WGLControl : public poly::ui::win32::Win32Control {
GLContext* context() { return &context_; } GLContext* context() { return &context_; }
void SynchronousRepaint(); void SynchronousRepaint(std::function<void()> paint_callback);
protected: protected:
bool Create() override; bool Create() override;
@ -39,6 +41,7 @@ class WGLControl : public poly::ui::win32::Win32Control {
private: private:
poly::ui::Loop* loop_; poly::ui::Loop* loop_;
GLContext context_; GLContext context_;
std::function<void()> current_paint_callback_;
}; };
} // namespace gl4 } // namespace gl4