From f438ae1bfd96479615c4d7f3c66d55cac36f5926 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Wed, 24 Dec 2014 22:35:03 -0800 Subject: [PATCH] Swapping. --- src/poly/ui/win32/win32_loop.cc | 16 +- src/xenia/gpu/gl4/command_processor.cc | 301 ++++++++++++++++++----- src/xenia/gpu/gl4/command_processor.h | 70 ++++-- src/xenia/gpu/gl4/gl4_gpu-private.h | 4 +- src/xenia/gpu/gl4/gl4_gpu.cc | 4 + src/xenia/gpu/gl4/gl4_graphics_system.cc | 14 +- src/xenia/gpu/gl4/gl4_graphics_system.h | 2 +- src/xenia/gpu/gl4/gl_context.cc | 53 ++-- src/xenia/gpu/gl4/gl_context.h | 10 + src/xenia/gpu/gl4/wgl_control.cc | 28 ++- src/xenia/gpu/gl4/wgl_control.h | 5 +- 11 files changed, 380 insertions(+), 127 deletions(-) diff --git a/src/poly/ui/win32/win32_loop.cc b/src/poly/ui/win32/win32_loop.cc index 426fa3f53..dad966528 100644 --- a/src/poly/ui/win32/win32_loop.cc +++ b/src/poly/ui/win32/win32_loop.cc @@ -33,6 +33,10 @@ Win32Loop::Win32Loop() : thread_id_(0) { poly::threading::set_name("Win32 Loop"); thread_id_ = GetCurrentThreadId(); + // Make a Win32 call to enable the thread queue. + MSG msg; + PeekMessage(&msg, NULL, WM_USER, WM_USER, PM_NOREMOVE); + init_fence.Signal(); ThreadMain(); @@ -68,9 +72,11 @@ void Win32Loop::ThreadMain() { void Win32Loop::Post(std::function fn) { assert_true(thread_id_ != 0); - PostThreadMessage(thread_id_, kWmWin32LoopPost, - reinterpret_cast(this), - reinterpret_cast(new PostedFn(std::move(fn)))); + if (!PostThreadMessage( + thread_id_, kWmWin32LoopPost, reinterpret_cast(this), + reinterpret_cast(new PostedFn(std::move(fn))))) { + assert_always("Unable to post message to thread queue"); + } } void Win32Loop::Quit() { @@ -79,9 +85,7 @@ void Win32Loop::Quit() { reinterpret_cast(this), 0); } -void Win32Loop::AwaitQuit() { - quit_fence_.Wait(); -} +void Win32Loop::AwaitQuit() { quit_fence_.Wait(); } } // namespace win32 } // namespace ui diff --git a/src/xenia/gpu/gl4/command_processor.cc b/src/xenia/gpu/gl4/command_processor.cc index 27247288a..4f6fa56f8 100644 --- a/src/xenia/gpu/gl4/command_processor.cc +++ b/src/xenia/gpu/gl4/command_processor.cc @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -28,7 +29,9 @@ namespace gl4 { using namespace xe::gpu::xenos; -extern "C" extern "C" GLEWContext* glewGetContext(); +extern "C" GLEWContext* glewGetContext(); + +const GLuint kAnyTarget = UINT_MAX; CommandProcessor::CommandProcessor(GL4GraphicsSystem* graphics_system) : memory_(graphics_system->memory()), @@ -48,7 +51,8 @@ CommandProcessor::CommandProcessor(GL4GraphicsSystem* graphics_system) bin_select_(0xFFFFFFFFull), bin_mask_(0xFFFFFFFFull), active_vertex_shader_(nullptr), - active_pixel_shader_(nullptr) { + active_pixel_shader_(nullptr), + active_framebuffer_(nullptr) { std::memset(&draw_command_, 0, sizeof(draw_command_)); LARGE_INTEGER perf_counter; QueryPerformanceCounter(&perf_counter); @@ -70,7 +74,6 @@ bool CommandProcessor::Initialize(std::unique_ptr context) { worker_thread_ = std::thread([this]() { poly::threading::set_name("GL4 Worker"); xe::Profiler::ThreadEnter("GL4 Worker"); - context_->MakeCurrent(); WorkerMain(); xe::Profiler::ThreadExit(); }); @@ -89,6 +92,7 @@ void CommandProcessor::Shutdown() { } void CommandProcessor::WorkerMain() { + context_->MakeCurrent(); if (!SetupGL()) { PFATAL("Unable to setup command processor GL state"); return; @@ -106,9 +110,11 @@ void CommandProcessor::WorkerMain() { const int wait_time_ms = 5; if (WaitForSingleObject(write_ptr_index_event_, wait_time_ms) == WAIT_TIMEOUT) { + ReturnFromWait(); write_ptr_index = write_ptr_index_.load(); continue; } + ReturnFromWait(); } assert_true(read_ptr_index_ != write_ptr_index); @@ -128,6 +134,7 @@ void CommandProcessor::WorkerMain() { } ShutdownGL(); + context_->ClearCurrent(); } bool CommandProcessor::SetupGL() { @@ -232,6 +239,16 @@ void CommandProcessor::PrepareForWait() { // synchronize here. // glFlush(); glFinish(); + + if (FLAGS_thread_safe_gl) { + context_->ClearCurrent(); + } +} + +void CommandProcessor::ReturnFromWait() { + if (FLAGS_thread_safe_gl) { + context_->MakeCurrent(); + } } class CommandProcessor::RingbufferReader { @@ -588,6 +605,10 @@ bool CommandProcessor::ExecutePacketType3_XE_SWAP(RingbufferReader* reader, uint32_t packet_ptr, uint32_t packet, uint32_t count) { + auto& regs = *register_file_; + + PLOGI("XE_SWAP"); + // Xenia-specific VdSwap hook. // VdSwap will post this to tell us we need to swap the screen/fire an // interrupt. @@ -595,10 +616,36 @@ bool CommandProcessor::ExecutePacketType3_XE_SWAP(RingbufferReader* reader, // 63 words here, but only the first has any data. reader->TraceData(1); uint32_t frontbuffer_ptr = reader->Read(); - // TODO(benvanik): something with the frontbuffer ptr. reader->Advance(count - 1); if (swap_handler_) { - swap_handler_(); + SwapParameters swap_params; + + // Lookup the framebuffer in the recently-resolved list. + // TODO(benvanik): make this much more sophisticated. + // TODO(benvanik): handle not found cases. + // TODO(benvanik): handle dirty cases (resolved to sysmem, touched). + // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + // HACK: just use whatever our current framebuffer is. + if (active_framebuffer_) { + swap_params.framebuffer = active_framebuffer_->framebuffer; + // TODO(benvanik): pick the right one? + swap_params.attachment = GL_COLOR_ATTACHMENT0; + } else { + swap_params.framebuffer = 0; + } + + // Guess frontbuffer dimensions. + // Command buffer seems to set these right before the XE_SWAP. + uint32_t window_scissor_tl = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32; + uint32_t window_scissor_br = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32; + swap_params.x = window_scissor_tl & 0x7FFF; + swap_params.y = (window_scissor_tl >> 16) & 0x7FFF; + swap_params.width = window_scissor_br & 0x7FFF - swap_params.x; + swap_params.height = (window_scissor_br >> 16) & 0x7FFF - swap_params.y; + + PrepareForWait(); + swap_handler_(swap_params); + ReturnFromWait(); } return true; } @@ -677,6 +724,7 @@ bool CommandProcessor::ExecutePacketType3_WAIT_REG_MEM(RingbufferReader* reader, if (wait >= 0x100) { PrepareForWait(); Sleep(wait / 0x100); + ReturnFromWait(); } else { SwitchToThread(); } @@ -1077,18 +1125,21 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) { if (enable_mode == ModeControl::kIgnore) { // Ignored. return true; + } else if (enable_mode == ModeControl::kCopy) { + // Special copy handling. + return IssueCopy(draw_command); } if (!UpdateRenderTargets(draw_command)) { PLOGE("Unable to setup render targets"); return false; } - - if (enable_mode == ModeControl::kCopy) { - // Special copy handling. - return IssueCopy(draw_command); + if (!active_framebuffer_) { + // No framebuffer, so nothing we do will actually have an effect. + // Treat it as a no-op. + XETRACECP("No-op draw (no framebuffer set)"); + return true; } - if (!UpdateState(draw_command)) { PLOGE("Unable to setup render state"); return false; @@ -1301,7 +1352,6 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) { /* 3 */ GL_MAX, /* 4 */ GL_FUNC_REVERSE_SUBTRACT, }; - uint32_t color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32; uint32_t blend_control[4] = { regs[XE_GPU_REG_RB_BLENDCONTROL_0].u32, regs[XE_GPU_REG_RB_BLENDCONTROL_1].u32, @@ -1321,8 +1371,6 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) { auto dest_blend_alpha = blend_map[(blend_control[n] & 0x1F000000) >> 24]; // A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN auto blend_op_alpha = blend_op_map[(blend_control[n] & 0x00E00000) >> 21]; - // A2XX_RB_COLOR_MASK_WRITE_* - uint32_t write_mask = (color_mask >> (n * 4)) & 0xF; // A2XX_RB_COLORCONTROL_BLEND_DISABLE ?? Can't find this! // Just guess based on actions. bool blend_enable = @@ -1443,34 +1491,78 @@ bool CommandProcessor::UpdateRenderTargets(DrawCommand* draw_command) { // Get/create all color render targets, if we are using them. // In depth-only mode we don't need them. - GLuint color_targets[4] = {0, 0, 0, 0}; + GLenum draw_buffers[4] = {GL_NONE, GL_NONE, GL_NONE, GL_NONE}; + GLuint color_targets[4] = {kAnyTarget, kAnyTarget, kAnyTarget, kAnyTarget}; if (enable_mode == ModeControl::kColorDepth) { uint32_t color_info[4] = { regs[XE_GPU_REG_RB_COLOR_INFO].u32, regs[XE_GPU_REG_RB_COLOR1_INFO].u32, regs[XE_GPU_REG_RB_COLOR2_INFO].u32, regs[XE_GPU_REG_RB_COLOR3_INFO].u32, }; + // A2XX_RB_COLOR_MASK_WRITE_* == D3DRS_COLORWRITEENABLE + uint32_t color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32; for (int n = 0; n < poly::countof(color_info); n++) { + uint32_t write_mask = (color_mask >> (n * 4)) & 0xF; + if (!write_mask) { + // Unused, so keep disabled and set to wildcard so we'll take any + // framebuffer that has it. + continue; + } uint32_t color_base = color_info[n] & 0xFFF; auto color_format = static_cast((color_info[n] >> 16) & 0xF); color_targets[n] = GetColorRenderTarget(surface_pitch, surface_msaa, color_base, color_format); + draw_buffers[n] = GL_COLOR_ATTACHMENT0 + n; + glColorMaski(n, !!(write_mask & 0x1), !!(write_mask & 0x2), + !!(write_mask & 0x4), !!(write_mask & 0x8)); } } - uint32_t depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32; - uint32_t depth_base = depth_info & 0xFFF; - auto depth_format = - static_cast((depth_info >> 16) & 0x1); - GLuint depth_target = GetDepthRenderTarget(surface_pitch, surface_msaa, - depth_base, depth_format); - // TODO(benvanik): when a game switches does it expect to keep the same - // depth buffer contents? + // Get/create depth buffer, but only if we are going to use it. + uint32_t depth_control = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32; + uint32_t stencil_ref_mask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32; + bool uses_depth = + (depth_control & 0x00000002) || (depth_control & 0x00000004); + uint32_t stencil_write_mask = (stencil_ref_mask & 0x00FF0000) >> 16; + bool uses_stencil = (depth_control & 0x00000001) || (stencil_write_mask != 0); + GLuint depth_target = kAnyTarget; + if (uses_depth && uses_stencil) { + uint32_t depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32; + uint32_t depth_base = depth_info & 0xFFF; + auto depth_format = + static_cast((depth_info >> 16) & 0x1); + depth_target = GetDepthRenderTarget(surface_pitch, surface_msaa, depth_base, + depth_format); + // TODO(benvanik): when a game switches does it expect to keep the same + // depth buffer contents? + } // Get/create a framebuffer with the required targets. - GLuint framebuffer = GetFramebuffer(color_targets, depth_target); - glBindFramebuffer(GL_FRAMEBUFFER, framebuffer); + // Note that none may be returned if we really don't need one. + auto cached_framebuffer = GetFramebuffer(color_targets, depth_target); + active_framebuffer_ = cached_framebuffer; + if (!active_framebuffer_) { + // Nothing to do. + return true; + } + + // Setup just the targets we want. + glNamedFramebufferDrawBuffers(cached_framebuffer->framebuffer, 4, + draw_buffers); + + // Make active. + // TODO(benvanik): can we do this all named? + // TODO(benvanik): do we want this on READ too? + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, cached_framebuffer->framebuffer); + + // TEST TEST TEST TEST TEST TEST TEST TEST TEST TEST + // Pretend we are drawing. + glEnable(GL_SCISSOR_TEST); + glScissor(100, 100, 100, 100); + float red[] = {rand() / (float)RAND_MAX, 0, 0, 1.0f}; + glClearNamedFramebufferfv(active_framebuffer_->framebuffer, GL_COLOR, 0, red); + glDisable(GL_SCISSOR_TEST); return true; } @@ -1519,6 +1611,45 @@ bool CommandProcessor::IssueCopy(DrawCommand* draw_command) { uint32_t copy_mask = regs[XE_GPU_REG_RB_COPY_MASK].u32; assert_true(copy_mask == 0); + // RB_SURFACE_INFO + // http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html + uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; + uint32_t surface_pitch = surface_info & 0x3FFF; + auto surface_msaa = static_cast((surface_info >> 16) & 0x3); + + // Depending on the source, pick the buffer we'll be sourcing. + // We then query for a cached framebuffer setup with that buffer active. + GLuint color_targets[4] = {kAnyTarget, kAnyTarget, kAnyTarget, kAnyTarget}; + GLuint depth_target = kAnyTarget; + if (copy_src_select <= 3) { + // Source from a color target. + uint32_t color_info[4] = { + regs[XE_GPU_REG_RB_COLOR_INFO].u32, regs[XE_GPU_REG_RB_COLOR1_INFO].u32, + regs[XE_GPU_REG_RB_COLOR2_INFO].u32, + regs[XE_GPU_REG_RB_COLOR3_INFO].u32, + }; + uint32_t color_base = color_info[copy_src_select] & 0xFFF; + auto color_format = static_cast( + (color_info[copy_src_select] >> 16) & 0xF); + color_targets[copy_src_select] = GetColorRenderTarget( + surface_pitch, surface_msaa, color_base, color_format); + } else { + // Source from depth/stencil. + uint32_t depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32; + uint32_t depth_base = depth_info & 0xFFF; + auto depth_format = + static_cast((depth_info >> 16) & 0x1); + depth_target = GetDepthRenderTarget(surface_pitch, surface_msaa, depth_base, + depth_format); + } + auto source_framebuffer = GetFramebuffer(color_targets, depth_target); + if (!source_framebuffer) { + // If we get here we are likely missing some state checks. + assert_always("No framebuffer for copy source? no-op copy?"); + PLOGE("No framebuffer for copy source"); + return false; + } + GLenum read_format; GLenum read_type; switch (copy_dest_format) { @@ -1553,15 +1684,25 @@ bool CommandProcessor::IssueCopy(DrawCommand* draw_command) { // TODO(benvanik): copy to staging texture then PBO back? void* ptr = membase_ + GpuToCpu(copy_dest_base); + // TODO(benvanik): any way to scissor this? a200 has: + // REG_A2XX_RB_COPY_DEST_OFFSET = A2XX_RB_COPY_DEST_OFFSET_X(tile->xoff) | + // A2XX_RB_COPY_DEST_OFFSET_Y(tile->yoff); + // but I can't seem to find something similar. + // Maybe scissor rect/window offset? uint32_t x = 0; uint32_t y = 0; uint32_t w = copy_dest_pitch; uint32_t h = copy_dest_height; + + // Make active so glReadPixels reads from us. + glBindFramebuffer(GL_READ_FRAMEBUFFER, source_framebuffer->framebuffer); switch (copy_command) { case CopyCommand::kConvert: if (copy_src_select <= 3) { // Source from a bound render target. - glReadBuffer(GL_COLOR_ATTACHMENT0 + copy_src_select); + // glBindBuffer(GL_READ_FRAMEBUFFER, framebuffer) + glNamedFramebufferReadBuffer(source_framebuffer->framebuffer, + GL_COLOR_ATTACHMENT0 + copy_src_select); glReadPixels(x, y, w, h, read_format, read_type, ptr); } else { // Source from the bound depth/stencil target. @@ -1575,22 +1716,36 @@ bool CommandProcessor::IssueCopy(DrawCommand* draw_command) { assert_unhandled_case(copy_command); return false; } + glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); - if (color_clear_enabled || depth_clear_enabled) { - // Clear requested, so let's setup for that. - uint32_t copy_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32; - uint32_t copy_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32; - uint32_t copy_color_clear_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LOW].u32; - assert_true(copy_color_clear == copy_color_clear_low); + // Perform any requested clears. + uint32_t copy_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32; + uint32_t copy_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32; + uint32_t copy_color_clear_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LOW].u32; + assert_true(copy_color_clear == copy_color_clear_low); - if (color_clear_enabled) { - // Clear the render target we selected for copy. - assert_true(copy_src_select < 3); - } + if (color_clear_enabled) { + // Clear the render target we selected for copy. + assert_true(copy_src_select < 3); + // TODO(benvanik): verify color order. + float color[] = {(copy_color_clear & 0xFF) / 255.0f, + ((copy_color_clear >> 8) & 0xFF) / 255.0f, + ((copy_color_clear >> 16) & 0xFF) / 255.0f, + ((copy_color_clear >> 24) & 0xFF) / 255.0f}; + glClearNamedFramebufferfv(source_framebuffer->framebuffer, GL_COLOR, + copy_src_select, color); + } - if (depth_clear_enabled) { - // Clear the current depth buffer. - } + if (depth_clear_enabled) { + // Clear the current depth buffer. + // TODO(benvanik): verify format. + union { + uint32_t uint_value; + GLfloat float_value; + } depth = {copy_depth_clear & 0xFFFFFF00}; + GLint stencil = copy_depth_clear & 0xFF; + glClearNamedFramebufferfi(source_framebuffer->framebuffer, GL_DEPTH_STENCIL, + depth.float_value, stencil); } return true; @@ -1610,7 +1765,6 @@ GLuint CommandProcessor::GetColorRenderTarget(uint32_t pitch, format = ColorRenderTargetFormat::k8888; } - CachedColorRenderTarget* cached = nullptr; for (auto& it = cached_color_render_targets_.begin(); it != cached_color_render_targets_.end(); ++it) { if (it->base == base && it->width == width && it->height == height && @@ -1619,7 +1773,7 @@ GLuint CommandProcessor::GetColorRenderTarget(uint32_t pitch, } } cached_color_render_targets_.push_back(CachedColorRenderTarget()); - cached = &cached_color_render_targets_.back(); + auto cached = &cached_color_render_targets_.back(); cached->base = base; cached->width = width; cached->height = height; @@ -1649,7 +1803,6 @@ GLuint CommandProcessor::GetDepthRenderTarget(uint32_t pitch, uint32_t width = 2560; uint32_t height = 2560; - CachedDepthRenderTarget* cached = nullptr; for (auto& it = cached_depth_render_targets_.begin(); it != cached_depth_render_targets_.end(); ++it) { if (it->base == base && it->width == width && it->height == height && @@ -1658,7 +1811,7 @@ GLuint CommandProcessor::GetDepthRenderTarget(uint32_t pitch, } } cached_depth_render_targets_.push_back(CachedDepthRenderTarget()); - cached = &cached_depth_render_targets_.back(); + auto cached = &cached_depth_render_targets_.back(); cached->base = base; cached->width = width; cached->height = height; @@ -1682,38 +1835,58 @@ GLuint CommandProcessor::GetDepthRenderTarget(uint32_t pitch, return cached->texture; } -GLuint CommandProcessor::GetFramebuffer(GLuint color_targets[4], - GLuint depth_target) { - CachedFramebuffer* cached = nullptr; +CommandProcessor::CachedFramebuffer* CommandProcessor::GetFramebuffer( + GLuint color_targets[4], GLuint depth_target) { for (auto& it = cached_framebuffers_.begin(); it != cached_framebuffers_.end(); ++it) { - if ((depth_target == -1u || it->depth_target == depth_target) && - (color_targets[0] == -1u || it->color_targets[0] == color_targets[0]) && - (color_targets[1] == -1u || it->color_targets[1] == color_targets[1]) && - (color_targets[2] == -1u || it->color_targets[2] == color_targets[2]) && - (color_targets[3] == -1u || it->color_targets[3] == color_targets[3])) { - return it->framebuffer; + if ((depth_target == kAnyTarget || it->depth_target == depth_target) && + (color_targets[0] == kAnyTarget || + it->color_targets[0] == color_targets[0]) && + (color_targets[1] == kAnyTarget || + it->color_targets[1] == color_targets[1]) && + (color_targets[2] == kAnyTarget || + it->color_targets[2] == color_targets[2]) && + (color_targets[3] == kAnyTarget || + it->color_targets[3] == color_targets[3])) { + return &*it; } } + + GLuint real_color_targets[4]; + bool any_set = false; + for (int i = 0; i < 4; ++i) { + if (color_targets[i] == kAnyTarget) { + real_color_targets[i] = 0; + } else { + any_set = true; + real_color_targets[i] = color_targets[i]; + } + } + GLuint real_depth_target; + if (depth_target == kAnyTarget) { + real_depth_target = 0; + } else { + any_set = true; + real_depth_target = depth_target; + } + if (!any_set) { + // No framebuffer required. + return nullptr; + } + cached_framebuffers_.push_back(CachedFramebuffer()); - cached = &cached_framebuffers_.back(); + auto cached = &cached_framebuffers_.back(); glCreateFramebuffers(1, &cached->framebuffer); for (int i = 0; i < 4; ++i) { - uint32_t color_target = color_targets[i]; - if (color_target == -1u) { - color_target = 0; - } - cached->color_targets[i] = color_target; + cached->color_targets[i] = real_color_targets[i]; glNamedFramebufferTexture(cached->framebuffer, GL_COLOR_ATTACHMENT0 + i, - color_target, 0); + real_color_targets[i], 0); } - if (depth_target == -1u) { - depth_target = 0; - } - cached->depth_target = depth_target; + cached->depth_target = real_depth_target; glNamedFramebufferTexture(cached->framebuffer, GL_DEPTH_STENCIL_ATTACHMENT, - depth_target, 0); - return cached->framebuffer; + real_depth_target, 0); + + return cached; } } // namespace gl4 diff --git a/src/xenia/gpu/gl4/command_processor.h b/src/xenia/gpu/gl4/command_processor.h index de71b15b0..2f57bd9ac 100644 --- a/src/xenia/gpu/gl4/command_processor.h +++ b/src/xenia/gpu/gl4/command_processor.h @@ -29,6 +29,16 @@ namespace gl4 { class GL4GraphicsSystem; +struct SwapParameters { + uint32_t x; + uint32_t y; + uint32_t width; + uint32_t height; + + GLuint framebuffer; + GLenum attachment; +}; + // TODO(benvanik): move more of the enums in here? struct DrawCommand { PrimitiveType prim_type; @@ -74,7 +84,8 @@ class CommandProcessor { CommandProcessor(GL4GraphicsSystem* graphics_system); ~CommandProcessor(); - void set_swap_handler(std::function fn) { swap_handler_ = fn; } + typedef std::function SwapHandler; + void set_swap_handler(SwapHandler fn) { swap_handler_ = fn; } uint64_t QueryTime(); uint32_t counter() const { return counter_; } @@ -91,6 +102,26 @@ class CommandProcessor { private: class RingbufferReader; + struct CachedFramebuffer { + GLuint color_targets[4]; + GLuint depth_target; + GLuint framebuffer; + }; + struct CachedColorRenderTarget { + uint32_t base; + uint32_t width; + uint32_t height; + xenos::ColorRenderTargetFormat format; + GLuint texture; + }; + struct CachedDepthRenderTarget { + uint32_t base; + uint32_t width; + uint32_t height; + xenos::DepthRenderTargetFormat format; + GLuint texture; + }; + void WorkerMain(); bool SetupGL(); void ShutdownGL(); @@ -98,6 +129,7 @@ class CommandProcessor { void WriteRegister(uint32_t packet_ptr, uint32_t index, uint32_t value); void MakeCoherent(); void PrepareForWait(); + void ReturnFromWait(); void ExecutePrimaryBuffer(uint32_t start_index, uint32_t end_index); void ExecuteIndirectBuffer(uint32_t ptr, uint32_t length); @@ -168,7 +200,8 @@ class CommandProcessor { // bool PopulateVertexBuffers(DrawCommand* draw_command); bool IssueCopy(DrawCommand* draw_command); - GLuint GetFramebuffer(GLuint color_targets[4], GLuint depth_target); + CachedFramebuffer* GetFramebuffer(GLuint color_targets[4], + GLuint depth_target); GLuint GetColorRenderTarget(uint32_t pitch, xenos::MsaaSamples samples, uint32_t base, xenos::ColorRenderTargetFormat format); @@ -184,7 +217,7 @@ class CommandProcessor { std::thread worker_thread_; std::atomic worker_running_; std::unique_ptr context_; - std::function swap_handler_; + SwapHandler swap_handler_; uint64_t time_base_; uint32_t counter_; @@ -202,37 +235,20 @@ class CommandProcessor { uint64_t bin_select_; uint64_t bin_mask_; + GLuint uniform_data_buffer_; + std::vector> all_shaders_; std::unordered_map shader_cache_; GL4Shader* active_vertex_shader_; GL4Shader* active_pixel_shader_; - GLuint uniform_data_buffer_; + CachedFramebuffer* active_framebuffer_; + + std::vector cached_framebuffers_; + std::vector cached_color_render_targets_; + std::vector cached_depth_render_targets_; DrawCommand draw_command_; - - struct CachedFramebuffer { - GLuint color_targets[4]; - GLuint depth_target; - GLuint framebuffer; - }; - std::vector cached_framebuffers_; - struct CachedColorRenderTarget { - uint32_t base; - uint32_t width; - uint32_t height; - xenos::ColorRenderTargetFormat format; - GLuint texture; - }; - std::vector cached_color_render_targets_; - struct CachedDepthRenderTarget { - uint32_t base; - uint32_t width; - uint32_t height; - xenos::DepthRenderTargetFormat format; - GLuint texture; - }; - std::vector cached_depth_render_targets_; }; } // namespace gl4 diff --git a/src/xenia/gpu/gl4/gl4_gpu-private.h b/src/xenia/gpu/gl4/gl4_gpu-private.h index a19e7a092..da11370f5 100644 --- a/src/xenia/gpu/gl4/gl4_gpu-private.h +++ b/src/xenia/gpu/gl4/gl4_gpu-private.h @@ -10,11 +10,13 @@ #ifndef XENIA_GPU_GL4_GL4_GPU_PRIVATE_H_ #define XENIA_GPU_GL4_GL4_GPU_PRIVATE_H_ -// GL headers +#include #include #include +DECLARE_bool(thread_safe_gl); + namespace xe { namespace gpu { namespace gl4 { diff --git a/src/xenia/gpu/gl4/gl4_gpu.cc b/src/xenia/gpu/gl4/gl4_gpu.cc index 8e940a512..6a3fe49b8 100644 --- a/src/xenia/gpu/gl4/gl4_gpu.cc +++ b/src/xenia/gpu/gl4/gl4_gpu.cc @@ -9,8 +9,12 @@ #include +#include #include +DEFINE_bool(thread_safe_gl, false, + "Only allow one GL context to be active at a time."); + namespace xe { namespace gpu { namespace gl4 { diff --git a/src/xenia/gpu/gl4/gl4_graphics_system.cc b/src/xenia/gpu/gl4/gl4_graphics_system.cc index 239911dc2..563a987c2 100644 --- a/src/xenia/gpu/gl4/gl4_graphics_system.cc +++ b/src/xenia/gpu/gl4/gl4_graphics_system.cc @@ -17,6 +17,8 @@ namespace xe { namespace gpu { namespace gl4 { +extern "C" GLEWContext* glewGetContext(); + GL4GraphicsSystem::GL4GraphicsSystem(Emulator* emulator) : GraphicsSystem(emulator), timer_queue_(nullptr), vsync_timer_(nullptr) {} @@ -57,7 +59,7 @@ X_STATUS GL4GraphicsSystem::Setup() { return X_STATUS_UNSUCCESSFUL; } command_processor_->set_swap_handler( - std::bind(&GL4GraphicsSystem::SwapHandler, this)); + [this](const SwapParameters& swap_params) { SwapHandler(swap_params); }); // Let the processor know we want register access callbacks. emulator_->memory()->AddMappedRange( @@ -115,12 +117,18 @@ void GL4GraphicsSystem::MarkVblank() { DispatchInterruptCallback(0, 2); } -void GL4GraphicsSystem::SwapHandler() { +void GL4GraphicsSystem::SwapHandler(const SwapParameters& swap_params) { SCOPE_profile_cpu_f("gpu"); // Swap requested. Synchronously post a request to the loop so that // we do the swap in the right thread. - control_->SynchronousRepaint(); + control_->SynchronousRepaint([&]() { + glBlitNamedFramebuffer(swap_params.framebuffer, 0, swap_params.x, + swap_params.y, swap_params.x + swap_params.width, + swap_params.y + swap_params.height, 0, 0, + control_->width(), control_->height(), + GL_COLOR_BUFFER_BIT, GL_LINEAR); + }); // Roll over vblank. MarkVblank(); diff --git a/src/xenia/gpu/gl4/gl4_graphics_system.h b/src/xenia/gpu/gl4/gl4_graphics_system.h index 982048f24..5f53fbdaf 100644 --- a/src/xenia/gpu/gl4/gl4_graphics_system.h +++ b/src/xenia/gpu/gl4/gl4_graphics_system.h @@ -37,7 +37,7 @@ class GL4GraphicsSystem : public GraphicsSystem { private: void MarkVblank(); - void SwapHandler(); + void SwapHandler(const SwapParameters& swap_params); uint64_t ReadRegister(uint64_t addr); void WriteRegister(uint64_t addr, uint64_t value); diff --git a/src/xenia/gpu/gl4/gl_context.cc b/src/xenia/gpu/gl4/gl_context.cc index e20565557..bdfbced7a 100644 --- a/src/xenia/gpu/gl4/gl_context.cc +++ b/src/xenia/gpu/gl4/gl_context.cc @@ -9,13 +9,18 @@ #include +#include + #include #include +#include namespace xe { namespace gpu { namespace gl4 { +static std::recursive_mutex global_gl_mutex_; + thread_local GLEWContext* tls_glew_context_ = nullptr; thread_local WGLEWContext* tls_wglew_context_ = nullptr; extern "C" GLEWContext* glewGetContext() { return tls_glew_context_; } @@ -110,24 +115,31 @@ bool GLContext::Initialize(HWND hwnd) { // Clearing errors. } + ClearCurrent(); + return true; } std::unique_ptr GLContext::CreateShared() { assert_not_null(glrc_); - int context_flags = 0; + HGLRC new_glrc = nullptr; + { + GLContextLock context_lock(this); + + int context_flags = 0; #if DEBUG - context_flags |= WGL_CONTEXT_DEBUG_BIT_ARB; -#endif // DEBUG - int attrib_list[] = {WGL_CONTEXT_MAJOR_VERSION_ARB, 4, // - WGL_CONTEXT_MINOR_VERSION_ARB, 5, // - WGL_CONTEXT_FLAGS_ARB, context_flags, // - 0}; - auto new_glrc = wglCreateContextAttribsARB(dc_, glrc_, attrib_list); - if (!new_glrc) { - PLOGE("Could not create shared context"); - return nullptr; + context_flags |= WGL_CONTEXT_DEBUG_BIT_ARB; +#endif // DEBUG + int attrib_list[] = {WGL_CONTEXT_MAJOR_VERSION_ARB, 4, // + WGL_CONTEXT_MINOR_VERSION_ARB, 5, // + WGL_CONTEXT_FLAGS_ARB, context_flags, // + 0}; + new_glrc = wglCreateContextAttribsARB(dc_, glrc_, attrib_list); + if (!new_glrc) { + PLOGE("Could not create shared context"); + return nullptr; + } } auto new_context = std::make_unique(hwnd_, new_glrc); @@ -138,26 +150,31 @@ std::unique_ptr GLContext::CreateShared() { glewExperimental = GL_TRUE; if (glewInit() != GLEW_OK) { + new_context->ClearCurrent(); PLOGE("Unable to initialize GLEW"); return nullptr; } if (wglewInit() != GLEW_OK) { + new_context->ClearCurrent(); PLOGE("Unable to initialize WGLEW"); return nullptr; } new_context->ClearCurrent(); - MakeCurrent(); - - while (glGetError()) { - // Clearing errors. - } return new_context; } bool GLContext::MakeCurrent() { + if (FLAGS_thread_safe_gl) { + global_gl_mutex_.lock(); + } + if (!wglMakeCurrent(dc_, glrc_)) { + if (FLAGS_thread_safe_gl) { + global_gl_mutex_.unlock(); + } + PLOGE("Unable to make GL context current"); return false; } tls_glew_context_ = &glew_context_; @@ -169,6 +186,10 @@ void GLContext::ClearCurrent() { wglMakeCurrent(nullptr, nullptr); tls_glew_context_ = nullptr; tls_wglew_context_ = nullptr; + + if (FLAGS_thread_safe_gl) { + global_gl_mutex_.unlock(); + } } } // namespace gl4 diff --git a/src/xenia/gpu/gl4/gl_context.h b/src/xenia/gpu/gl4/gl_context.h index 05d3c5206..e6a9e93e7 100644 --- a/src/xenia/gpu/gl4/gl_context.h +++ b/src/xenia/gpu/gl4/gl_context.h @@ -43,6 +43,16 @@ class GLContext { WGLEWContext wglew_context_; }; +struct GLContextLock { + GLContextLock(GLContext* context) : context_(context) { + context_->MakeCurrent(); + } + ~GLContextLock() { context_->ClearCurrent(); } + + private: + GLContext* context_; +}; + } // namespace gl4 } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/gl4/wgl_control.cc b/src/xenia/gpu/gl4/wgl_control.cc index 527463fb7..bfd503b60 100644 --- a/src/xenia/gpu/gl4/wgl_control.cc +++ b/src/xenia/gpu/gl4/wgl_control.cc @@ -11,15 +11,18 @@ #include #include +#include #include namespace xe { namespace gpu { namespace gl4 { +extern "C" GLEWContext* glewGetContext(); +extern "C" WGLEWContext* wglewGetContext(); + WGLControl::WGLControl(poly::ui::Loop* loop) - : poly::ui::win32::Win32Control(Flags::kFlagOwnPaint), - loop_(loop) {} + : poly::ui::win32::Win32Control(Flags::kFlagOwnPaint), loop_(loop) {} WGLControl::~WGLControl() = default; @@ -70,21 +73,30 @@ void WGLControl::OnLayout(poly::ui::UIEvent& e) { Control::ResizeToFill(); } LRESULT WGLControl::WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam) { switch (message) { - case WM_PAINT: - context_.MakeCurrent(); + case WM_PAINT: { + GLContextLock context_lock(&context_); + // TODO(benvanik): is viewport needed? glViewport(0, 0, width_, height_); - glClearColor(rand() / (float)RAND_MAX, 1.0f, 0, 1.0f); - glClear(GL_COLOR_BUFFER_BIT); + float clear_color[] = {rand() / (float)RAND_MAX, 1.0f, 0, 1.0f}; + glClearNamedFramebufferfv(0, GL_COLOR, 0, clear_color); + if (current_paint_callback_) { + current_paint_callback_(); + current_paint_callback_ = nullptr; + } // TODO(benvanik): profiler present. // Profiler::Present(); SwapBuffers(context_.dc()); - break; + } break; } return Win32Control::WndProc(hWnd, message, wParam, lParam); } -void WGLControl::SynchronousRepaint() { +void WGLControl::SynchronousRepaint(std::function paint_callback) { SCOPE_profile_cpu_f("gpu"); + + assert_null(current_paint_callback_); + current_paint_callback_ = std::move(paint_callback); + // This will not return until the WM_PAINT has completed. RedrawWindow(hwnd(), nullptr, nullptr, RDW_INTERNALPAINT | RDW_UPDATENOW | RDW_ALLCHILDREN); diff --git a/src/xenia/gpu/gl4/wgl_control.h b/src/xenia/gpu/gl4/wgl_control.h index 34f7453e9..4c2f2d64d 100644 --- a/src/xenia/gpu/gl4/wgl_control.h +++ b/src/xenia/gpu/gl4/wgl_control.h @@ -10,6 +10,8 @@ #ifndef XENIA_GPU_GL4_WGL_CONTROL_H_ #define XENIA_GPU_GL4_WGL_CONTROL_H_ +#include + #include #include #include @@ -26,7 +28,7 @@ class WGLControl : public poly::ui::win32::Win32Control { GLContext* context() { return &context_; } - void SynchronousRepaint(); + void SynchronousRepaint(std::function paint_callback); protected: bool Create() override; @@ -39,6 +41,7 @@ class WGLControl : public poly::ui::win32::Win32Control { private: poly::ui::Loop* loop_; GLContext context_; + std::function current_paint_callback_; }; } // namespace gl4