From 3e61cc95cd5c8cf6caeb0893d1ccbc20c5976b7f Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sat, 3 Jan 2015 03:19:32 -0800 Subject: [PATCH] Tracking whether state changes are compatible for batching. --- src/xenia/gpu/gl4/command_processor.cc | 190 +++++++++++++++---------- src/xenia/gpu/gl4/command_processor.h | 32 +++-- 2 files changed, 132 insertions(+), 90 deletions(-) diff --git a/src/xenia/gpu/gl4/command_processor.cc b/src/xenia/gpu/gl4/command_processor.cc index 2113dac23..3b8730dab 100644 --- a/src/xenia/gpu/gl4/command_processor.cc +++ b/src/xenia/gpu/gl4/command_processor.cc @@ -1362,41 +1362,42 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) { return false; } - if (!UpdateShaders(draw_command)) { - PLOGE("Unable to prepare draw shaders"); - return false; - } - if (!UpdateRenderTargets(draw_command)) { - PLOGE("Unable to setup render targets"); - return false; +#define CHECK_ISSUE_UPDATE_STATUS(status, mismatch, error_message) \ + { \ + if (status == UpdateStatus::kError) { \ + PLOGE(error_message); \ + return false; \ + } else if (status == UpdateStatus::kMismatch) { \ + mismatch = true; \ + } \ } + + UpdateStatus status; + bool mismatch = false; + status = UpdateShaders(draw_command); + CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to prepare draw shaders"); + status = UpdateRenderTargets(draw_command); + CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup render targets"); if (!active_framebuffer_) { // No framebuffer, so nothing we do will actually have an effect. // Treat it as a no-op. XETRACECP("No-op draw (no framebuffer set)"); return true; } - if (!UpdateState(draw_command)) { - PLOGE("Unable to setup render state"); - return false; - } - if (!UpdateConstants(draw_command)) { - PLOGE("Unable to update shader constants"); - return false; - } - if (!PopulateIndexBuffer(draw_command)) { - PLOGE("Unable to setup index buffer"); - return false; - } - if (!PopulateVertexBuffers(draw_command)) { - PLOGE("Unable to setup vertex buffers"); - return false; - } - if (!PopulateSamplers(draw_command)) { - PLOGE("Unable to prepare draw samplers"); - return false; - } + status = UpdateState(draw_command); + CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup render state"); + status = UpdateConstants(draw_command); + CHECK_ISSUE_UPDATE_STATUS(status, mismatch, + "Unable to update shader constants"); + status = PopulateSamplers(draw_command); + CHECK_ISSUE_UPDATE_STATUS(status, mismatch, + "Unable to prepare draw samplers"); + + status = PopulateIndexBuffer(draw_command); + CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup index buffer"); + status = PopulateVertexBuffers(draw_command); + CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup vertex buffers"); GLenum prim_type = 0; switch (cmd.prim_type) { @@ -1481,7 +1482,8 @@ bool CommandProcessor::SetShadowRegister(float& dest, uint32_t register_name) { return true; } -bool CommandProcessor::UpdateRenderTargets(DrawCommand* draw_command) { +CommandProcessor::UpdateStatus CommandProcessor::UpdateRenderTargets( + DrawCommand* draw_command) { auto& regs = update_render_targets_regs_; bool dirty = false; @@ -1497,7 +1499,7 @@ bool CommandProcessor::UpdateRenderTargets(DrawCommand* draw_command) { SetShadowRegister(regs.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK); dirty |= SetShadowRegister(regs.rb_depth_info, XE_GPU_REG_RB_DEPTH_INFO); if (!dirty) { - return true; + return UpdateStatus::kCompatible; } SCOPE_profile_cpu_f("gpu"); @@ -1574,14 +1576,16 @@ bool CommandProcessor::UpdateRenderTargets(DrawCommand* draw_command) { glBindFramebuffer(GL_DRAW_FRAMEBUFFER, cached_framebuffer->framebuffer); } - return true; + return UpdateStatus::kMismatch; } -bool CommandProcessor::UpdateState(DrawCommand* draw_command) { - SCOPE_profile_cpu_f("gpu"); +CommandProcessor::UpdateStatus CommandProcessor::UpdateState( + DrawCommand* draw_command) { auto& regs = *register_file_; auto state_data = draw_command->state_data; + bool mismatch = false; + // Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE // Deprecated in GL, implemented in shader. // if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard; @@ -1591,19 +1595,35 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) { state_data->alpha_test.y = float(color_control & 0x3); // ALPHAFUNC state_data->alpha_test.z = regs[XE_GPU_REG_RB_ALPHA_REF].f32; - UpdateViewportState(draw_command); - UpdateRasterizerState(draw_command); - UpdateBlendState(draw_command); - UpdateDepthStencilState(draw_command); +#define CHECK_UPDATE_STATUS(status, mismatch, error_message) \ + { \ + if (status == UpdateStatus::kError) { \ + PLOGE(error_message); \ + return status; \ + } else if (status == UpdateStatus::kMismatch) { \ + mismatch = true; \ + } \ + } - return true; + UpdateStatus status; + status = UpdateViewportState(draw_command); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update viewport state"); + status = UpdateRasterizerState(draw_command); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update rasterizer state"); + status = UpdateBlendState(draw_command); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update blend state"); + status = UpdateDepthStencilState(draw_command); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update depth/stencil state"); + + return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible; } -bool CommandProcessor::UpdateViewportState(DrawCommand* draw_command) { +CommandProcessor::UpdateStatus CommandProcessor::UpdateViewportState( + DrawCommand* draw_command) { auto& regs = *register_file_; auto state_data = draw_command->state_data; - SCOPE_profile_cpu_f("gpu"); + // NOTE: we don't track state here as this is all cheap to update (ish). // Much of this state machine is extracted from: // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c @@ -1701,10 +1721,11 @@ bool CommandProcessor::UpdateViewportState(DrawCommand* draw_command) { bool dx_clip = ((clip_control >> 20) & 0x1) == 0x1; // assert_true(dx_clip); - return true; + return UpdateStatus::kCompatible; } -bool CommandProcessor::UpdateRasterizerState(DrawCommand* draw_command) { +CommandProcessor::UpdateStatus CommandProcessor::UpdateRasterizerState( + DrawCommand* draw_command) { auto& regs = update_rasterizer_state_regs_; bool dirty = false; @@ -1715,7 +1736,7 @@ bool CommandProcessor::UpdateRasterizerState(DrawCommand* draw_command) { dirty |= SetShadowRegister(regs.pa_sc_screen_scissor_br, XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR); if (!dirty) { - return true; + return UpdateStatus::kCompatible; } SCOPE_profile_cpu_f("gpu"); @@ -1765,10 +1786,11 @@ bool CommandProcessor::UpdateRasterizerState(DrawCommand* draw_command) { // glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); - return true; + return UpdateStatus::kMismatch; } -bool CommandProcessor::UpdateBlendState(DrawCommand* draw_command) { +CommandProcessor::UpdateStatus CommandProcessor::UpdateBlendState( + DrawCommand* draw_command) { auto& regs = update_blend_state_regs_; bool dirty = false; @@ -1785,7 +1807,7 @@ bool CommandProcessor::UpdateBlendState(DrawCommand* draw_command) { dirty |= SetShadowRegister(regs.rb_blend_rgba[2], XE_GPU_REG_RB_BLEND_BLUE); dirty |= SetShadowRegister(regs.rb_blend_rgba[3], XE_GPU_REG_RB_BLEND_ALPHA); if (!dirty) { - return true; + return UpdateStatus::kCompatible; } SCOPE_profile_cpu_f("gpu"); @@ -1849,10 +1871,11 @@ bool CommandProcessor::UpdateBlendState(DrawCommand* draw_command) { glBlendColor(regs.rb_blend_rgba[0], regs.rb_blend_rgba[1], regs.rb_blend_rgba[2], regs.rb_blend_rgba[3]); - return true; + return UpdateStatus::kMismatch; } -bool CommandProcessor::UpdateDepthStencilState(DrawCommand* draw_command) { +CommandProcessor::UpdateStatus CommandProcessor::UpdateDepthStencilState( + DrawCommand* draw_command) { auto& regs = update_depth_stencil_state_regs_; bool dirty = false; @@ -1860,7 +1883,7 @@ bool CommandProcessor::UpdateDepthStencilState(DrawCommand* draw_command) { dirty |= SetShadowRegister(regs.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK); if (!dirty) { - return true; + return UpdateStatus::kCompatible; } SCOPE_profile_cpu_f("gpu"); @@ -1943,10 +1966,11 @@ bool CommandProcessor::UpdateDepthStencilState(DrawCommand* draw_command) { stencil_op_map[(regs.rb_depthcontrol & 0x0001C000) >> 14]); } - return true; + return UpdateStatus::kMismatch; } -bool CommandProcessor::UpdateConstants(DrawCommand* draw_command) { +CommandProcessor::UpdateStatus CommandProcessor::UpdateConstants( + DrawCommand* draw_command) { auto& regs = *register_file_; auto state_data = draw_command->state_data; @@ -1961,10 +1985,11 @@ bool CommandProcessor::UpdateConstants(DrawCommand* draw_command) { sizeof(state_data->float_consts) + sizeof(state_data->fetch_consts) + sizeof(state_data->loop_consts) + sizeof(state_data->bool_consts)); - return true; + return UpdateStatus::kCompatible; } -bool CommandProcessor::UpdateShaders(DrawCommand* draw_command) { +CommandProcessor::UpdateStatus CommandProcessor::UpdateShaders( + DrawCommand* draw_command) { auto& regs = update_shaders_regs_; auto& cmd = *draw_command; @@ -1974,7 +1999,7 @@ bool CommandProcessor::UpdateShaders(DrawCommand* draw_command) { dirty |= regs.pixel_shader != active_pixel_shader_; dirty |= regs.prim_type != cmd.prim_type; if (!dirty) { - return true; + return UpdateStatus::kCompatible; } regs.vertex_shader = active_vertex_shader_; regs.pixel_shader = active_pixel_shader_; @@ -1987,21 +2012,21 @@ bool CommandProcessor::UpdateShaders(DrawCommand* draw_command) { if (!active_vertex_shader_->has_prepared()) { if (!active_vertex_shader_->PrepareVertexShader(program_cntl)) { XELOGE("Unable to prepare vertex shader"); - return false; + return UpdateStatus::kError; } } else if (!active_vertex_shader_->is_valid()) { XELOGE("Vertex shader invalid"); - return false; + return UpdateStatus::kError; } if (!active_pixel_shader_->has_prepared()) { if (!active_pixel_shader_->PreparePixelShader(program_cntl)) { XELOGE("Unable to prepare pixel shader"); - return false; + return UpdateStatus::kError; } } else if (!active_pixel_shader_->is_valid()) { XELOGE("Pixel shader invalid"); - return false; + return UpdateStatus::kError; } GLuint vertex_program = active_vertex_shader_->program(); @@ -2065,16 +2090,17 @@ bool CommandProcessor::UpdateShaders(DrawCommand* draw_command) { } glBindProgramPipeline(pipeline); - return true; + return UpdateStatus::kMismatch; } -bool CommandProcessor::PopulateIndexBuffer(DrawCommand* draw_command) { +CommandProcessor::UpdateStatus CommandProcessor::PopulateIndexBuffer( + DrawCommand* draw_command) { auto& cmd = *draw_command; auto& info = cmd.index_buffer; if (!cmd.index_count || !info.address) { // No index buffer or auto draw. - return true; + return UpdateStatus::kMismatch; // ? } SCOPE_profile_cpu_f("gpu"); @@ -2109,10 +2135,11 @@ bool CommandProcessor::PopulateIndexBuffer(DrawCommand* draw_command) { } scratch_buffer_.Commit(std::move(allocation)); - return true; + return UpdateStatus::kMismatch; } -bool CommandProcessor::PopulateVertexBuffers(DrawCommand* draw_command) { +CommandProcessor::UpdateStatus CommandProcessor::PopulateVertexBuffers( + DrawCommand* draw_command) { SCOPE_profile_cpu_f("gpu"); auto& regs = *register_file_; auto& cmd = *draw_command; @@ -2235,13 +2262,16 @@ bool CommandProcessor::PopulateVertexBuffers(DrawCommand* draw_command) { scratch_buffer_.Commit(std::move(allocation)); } - return true; + return UpdateStatus::kMismatch; } -bool CommandProcessor::PopulateSamplers(DrawCommand* draw_command) { +CommandProcessor::UpdateStatus CommandProcessor::PopulateSamplers( + DrawCommand* draw_command) { SCOPE_profile_cpu_f("gpu"); auto& regs = *register_file_; + bool mismatch = false; + // VS and PS samplers are shared, but may be used exclusively. // We walk each and setup lazily. bool has_setup_sampler[32] = {false}; @@ -2254,8 +2284,11 @@ bool CommandProcessor::PopulateSamplers(DrawCommand* draw_command) { continue; } has_setup_sampler[desc.fetch_slot] = true; - if (!PopulateSampler(draw_command, desc)) { - return false; + auto status = PopulateSampler(draw_command, desc); + if (status == UpdateStatus::kError) { + return status; + } else if (status == UpdateStatus::kMismatch) { + mismatch = true; } } @@ -2267,16 +2300,19 @@ bool CommandProcessor::PopulateSamplers(DrawCommand* draw_command) { continue; } has_setup_sampler[desc.fetch_slot] = true; - if (!PopulateSampler(draw_command, desc)) { - return false; + auto status = PopulateSampler(draw_command, desc); + if (status == UpdateStatus::kError) { + return UpdateStatus::kError; + } else if (status == UpdateStatus::kMismatch) { + mismatch = true; } } - return true; + return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible; } -bool CommandProcessor::PopulateSampler(DrawCommand* draw_command, - const Shader::SamplerDesc& desc) { +CommandProcessor::UpdateStatus CommandProcessor::PopulateSampler( + DrawCommand* draw_command, const Shader::SamplerDesc& desc) { auto& regs = *register_file_; int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + desc.fetch_slot * 6; auto group = reinterpret_cast(®s.values[r]); @@ -2287,38 +2323,38 @@ bool CommandProcessor::PopulateSampler(DrawCommand* draw_command, draw_command->state_data->texture_samplers[desc.fetch_slot] = 0; if (FLAGS_disable_textures) { - return true; + return UpdateStatus::kCompatible; } // ? if (!fetch.type) { - return true; + return UpdateStatus::kCompatible; } assert_true(fetch.type == 0x2); TextureInfo texture_info; if (!TextureInfo::Prepare(fetch, &texture_info)) { XELOGE("Unable to parse texture fetcher info"); - return true; // invalid texture used + return UpdateStatus::kCompatible; // invalid texture used } SamplerInfo sampler_info; if (!SamplerInfo::Prepare(fetch, desc.tex_fetch, &sampler_info)) { XELOGE("Unable to parse sampler info"); - return true; // invalid texture used + return UpdateStatus::kCompatible; // invalid texture used } auto entry_view = texture_cache_.Demand(texture_info, sampler_info); if (!entry_view) { // Unable to create/fetch/etc. XELOGE("Failed to demand texture"); - return true; + return UpdateStatus::kCompatible; } // Shaders will use bindless to fetch right from it. draw_command->state_data->texture_samplers[desc.fetch_slot] = entry_view->texture_sampler_handle; - return true; + return UpdateStatus::kCompatible; } bool CommandProcessor::IssueCopy(DrawCommand* draw_command) { diff --git a/src/xenia/gpu/gl4/command_processor.h b/src/xenia/gpu/gl4/command_processor.h index 4f0fda8b8..2433e964e 100644 --- a/src/xenia/gpu/gl4/command_processor.h +++ b/src/xenia/gpu/gl4/command_processor.h @@ -129,6 +129,12 @@ class CommandProcessor { private: class RingbufferReader; + enum class UpdateStatus { + kCompatible, + kMismatch, + kError, + }; + struct CachedFramebuffer { GLuint color_targets[4]; GLuint depth_target; @@ -235,19 +241,19 @@ class CommandProcessor { void PrepareDraw(DrawCommand* draw_command); bool IssueDraw(DrawCommand* draw_command); - bool UpdateRenderTargets(DrawCommand* draw_command); - bool UpdateState(DrawCommand* draw_command); - bool UpdateViewportState(DrawCommand* draw_command); - bool UpdateRasterizerState(DrawCommand* draw_command); - bool UpdateBlendState(DrawCommand* draw_command); - bool UpdateDepthStencilState(DrawCommand* draw_command); - bool UpdateConstants(DrawCommand* draw_command); - bool UpdateShaders(DrawCommand* draw_command); - bool PopulateIndexBuffer(DrawCommand* draw_command); - bool PopulateVertexBuffers(DrawCommand* draw_command); - bool PopulateSamplers(DrawCommand* draw_command); - bool PopulateSampler(DrawCommand* draw_command, - const Shader::SamplerDesc& desc); + UpdateStatus UpdateRenderTargets(DrawCommand* draw_command); + UpdateStatus UpdateState(DrawCommand* draw_command); + UpdateStatus UpdateViewportState(DrawCommand* draw_command); + UpdateStatus UpdateRasterizerState(DrawCommand* draw_command); + UpdateStatus UpdateBlendState(DrawCommand* draw_command); + UpdateStatus UpdateDepthStencilState(DrawCommand* draw_command); + UpdateStatus UpdateConstants(DrawCommand* draw_command); + UpdateStatus UpdateShaders(DrawCommand* draw_command); + UpdateStatus PopulateIndexBuffer(DrawCommand* draw_command); + UpdateStatus PopulateVertexBuffers(DrawCommand* draw_command); + UpdateStatus PopulateSamplers(DrawCommand* draw_command); + UpdateStatus PopulateSampler(DrawCommand* draw_command, + const Shader::SamplerDesc& desc); bool IssueCopy(DrawCommand* draw_command); CachedFramebuffer* GetFramebuffer(GLuint color_targets[4],