diff --git a/src/xenia/gpu/gl4/draw_batcher.cc b/src/xenia/gpu/gl4/draw_batcher.cc index c5d0bdb39..304992a80 100644 --- a/src/xenia/gpu/gl4/draw_batcher.cc +++ b/src/xenia/gpu/gl4/draw_batcher.cc @@ -335,14 +335,33 @@ bool DrawBatcher::Flush(FlushMode mode) { batch_state_.command_stride, vertex_buffer_count); } } else { - if (batch_state_.indexed) { - glMultiDrawElementsIndirect(prim_type, batch_state_.index_type, - indirect_offset, batch_state_.draw_count, - batch_state_.command_stride); + if (batch_state_.draw_count == 1) { + // Fast path for one draw. Removes MDI overhead when not required. + if (batch_state_.indexed) { + auto& cmd = active_draw_.draw_elements_cmd; + glDrawElementsInstancedBaseVertexBaseInstance( + prim_type, cmd->count, batch_state_.index_type, + reinterpret_cast( + uintptr_t(cmd->first_index) * + (batch_state_.index_type == GL_UNSIGNED_SHORT ? 2 : 4)), + cmd->instance_count, cmd->base_vertex, cmd->base_instance); + } else { + auto& cmd = active_draw_.draw_arrays_cmd; + glDrawArraysInstancedBaseInstance(prim_type, cmd->first_index, + cmd->count, cmd->instance_count, + cmd->base_instance); + } } else { - glMultiDrawArraysIndirect(prim_type, indirect_offset, - batch_state_.draw_count, - batch_state_.command_stride); + // Full multi-draw. + if (batch_state_.indexed) { + glMultiDrawElementsIndirect(prim_type, batch_state_.index_type, + indirect_offset, batch_state_.draw_count, + batch_state_.command_stride); + } else { + glMultiDrawArraysIndirect(prim_type, indirect_offset, + batch_state_.draw_count, + batch_state_.command_stride); + } } } diff --git a/src/xenia/kernel/xboxkrnl_threading.cc b/src/xenia/kernel/xboxkrnl_threading.cc index e63ca2ef0..4c404aa1b 100644 --- a/src/xenia/kernel/xboxkrnl_threading.cc +++ b/src/xenia/kernel/xboxkrnl_threading.cc @@ -287,7 +287,7 @@ SHIM_CALL KeDelayExecutionThread_shim(PPCContext* ppc_state, } SHIM_CALL NtYieldExecution_shim(PPCContext* ppc_state, KernelState* state) { - XELOGD("NtYieldExecution()"); + //XELOGD("NtYieldExecution()"); XThread* thread = XThread::GetCurrentThread(); X_STATUS result = thread->Delay(0, 0, 0); SHIM_SET_RETURN_64(0);