diff --git a/pcsx2/Frontend/MetalHostDisplay.h b/pcsx2/Frontend/MetalHostDisplay.h
index 4f6a306eb1..ec53ff4146 100644
--- a/pcsx2/Frontend/MetalHostDisplay.h
+++ b/pcsx2/Frontend/MetalHostDisplay.h
@@ -24,6 +24,7 @@
 #ifdef __APPLE__
 
 #include "GS/Renderers/Metal/GSMTLDeviceInfo.h"
+#include <mutex>
 #include <AppKit/AppKit.h>
 #include <Metal/Metal.h>
 #include <QuartzCore/QuartzCore.h>
@@ -38,6 +39,10 @@ class MetalHostDisplay final : public HostDisplay
 	MRCOwned<id<CAMetalDrawable>> m_current_drawable;
 	MRCOwned<MTLRenderPassDescriptor*> m_pass_desc;
 	u32 m_capture_start_frame;
+	bool m_gpu_timing_enabled = false;
+	double m_accumulated_gpu_time = 0;
+	double m_last_gpu_time_end = 0;
+	std::mutex m_mtx;
 
 	void AttachSurfaceOnMainThread();
 	void DetachSurfaceOnMainThread();
@@ -78,6 +83,10 @@ public:
 	bool UpdateImGuiFontTexture() override;
 
 	bool GetHostRefreshRate(float* refresh_rate) override;
+
+	bool SetGPUTimingEnabled(bool enabled) override;
+	float GetAndResetAccumulatedGPUTime() override;
+	void AccumulateCommandBufferTime(id<MTLCommandBuffer> buffer);
 };
 
 #endif
diff --git a/pcsx2/Frontend/MetalHostDisplay.mm b/pcsx2/Frontend/MetalHostDisplay.mm
index 87751c349e..56f55686c0 100644
--- a/pcsx2/Frontend/MetalHostDisplay.mm
+++ b/pcsx2/Frontend/MetalHostDisplay.mm
@@ -406,4 +406,47 @@ bool MetalHostDisplay::GetHostRefreshRate(float* refresh_rate)
 	return *refresh_rate != 0;
 }
 
+bool MetalHostDisplay::SetGPUTimingEnabled(bool enabled)
+{
+	if (enabled == m_gpu_timing_enabled)
+		return true;
+	if (@available(macOS 10.15, iOS 10.3, *))
+	{
+		std::lock_guard<std::mutex> l(m_mtx);
+		m_gpu_timing_enabled = enabled;
+		m_accumulated_gpu_time = 0;
+		m_last_gpu_time_end = 0;
+		return true;
+	}
+	return false;
+}
+
+float MetalHostDisplay::GetAndResetAccumulatedGPUTime()
+{
+	std::lock_guard<std::mutex> l(m_mtx);
+	float time = m_accumulated_gpu_time * 1000;
+	m_accumulated_gpu_time = 0;
+	return time;
+}
+
+void MetalHostDisplay::AccumulateCommandBufferTime(id<MTLCommandBuffer> buffer)
+{
+	std::lock_guard<std::mutex> l(m_mtx);
+	if (!m_gpu_timing_enabled)
+		return;
+	// We do the check before enabling m_gpu_timing_enabled
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunguarded-availability"
+	// It's unlikely, but command buffers can overlap or run out of order
+	// This doesn't handle every case (fully out of order), but it should at least handle overlapping
+	double begin = std::max(m_last_gpu_time_end, [buffer GPUStartTime]);
+	double end = [buffer GPUEndTime];
+	if (end > begin)
+	{
+		m_accumulated_gpu_time += end - begin;
+		m_last_gpu_time_end = end;
+	}
+#pragma clang diagnostic pop
+}
+
 #endif // __APPLE__
diff --git a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h
index dd9e47675f..5a159b1680 100644
--- a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h
+++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h
@@ -324,6 +324,8 @@ public:
 	id<MTLBlitCommandEncoder> GetVertexUploadEncoder();
 	/// Get the render command buffer, creating a new one if it doesn't exist
 	id<MTLCommandBuffer> GetRenderCmdBuf();
+	/// Called by command buffers when they finish
+	void DrawCommandBufferFinished(u64 draw, id<MTLCommandBuffer> buffer);
 	/// Flush pending operations from all encoders to the GPU
 	void FlushEncoders();
 	/// End current render pass without flushing
diff --git a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm
index 3f1c3ac742..36fe0d86c0 100644
--- a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm
+++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm
@@ -16,6 +16,8 @@
 #include "PrecompiledHeader.h"
 #include "GSMetalCPPAccessible.h"
 #include "GSDeviceMTL.h"
+
+#include "Frontend/MetalHostDisplay.h"
 #include "GSTextureMTL.h"
 #include "GS/GSPerfMon.h"
 #include "HostDisplay.h"
@@ -219,6 +221,14 @@ id<MTLCommandBuffer> GSDeviceMTL::GetRenderCmdBuf()
 	return m_current_render_cmdbuf;
 }
 
+void GSDeviceMTL::DrawCommandBufferFinished(u64 draw, id<MTLCommandBuffer> buffer)
+{
+	// We can do the update non-atomically because we only ever update under the lock
+	u64 newval = std::max(draw, m_last_finished_draw.load(std::memory_order_relaxed));
+	m_last_finished_draw.store(newval, std::memory_order_release);
+	static_cast<MetalHostDisplay*>(m_display)->AccumulateCommandBufferTime(buffer);
+}
+
 void GSDeviceMTL::FlushEncoders()
 {
 	if (!m_current_render_cmdbuf)
@@ -252,11 +262,7 @@ void GSDeviceMTL::FlushEncoders()
 	{
 		std::lock_guard<std::mutex> guard(backref->first);
 		if (GSDeviceMTL* dev = backref->second)
-		{
-			// We can do the update non-atomically because we only ever update under the lock
-			u64 newval = std::max(draw, dev->m_last_finished_draw.load(std::memory_order_relaxed));
-			dev->m_last_finished_draw.store(newval, std::memory_order_release);
-		}
+			dev->DrawCommandBufferFinished(draw, buf);
 	}];
 	[m_current_render_cmdbuf commit];
 	m_current_render_cmdbuf = nil;