diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp
index bc8c7c736e..69852e14a6 100644
--- a/pcsx2/x86/ix86-32/iR5900-32.cpp
+++ b/pcsx2/x86/ix86-32/iR5900-32.cpp
@@ -2120,7 +2120,7 @@ static void memory_protect_recompiled_code(u32 startpc, u32 size)
 }
 
 // Skip MPEG Game-Fix
-bool skipMPEG_By_Pattern(u32 sPC)
+static bool skipMPEG_By_Pattern(u32 sPC)
 {
 
 	if (!CHECK_SKIPMPEGHACK)
@@ -2149,6 +2149,56 @@ bool skipMPEG_By_Pattern(u32 sPC)
 	return 0;
 }
 
+static bool recSkipTimeoutLoop(s32 reg, bool is_timeout_loop)
+{
+	if (!EmuConfig.Speedhacks.WaitLoop || !is_timeout_loop)
+		return false;
+
+	DevCon.WriteLn("[EE] Skipping timeout loop at 0x%08X -> 0x%08X", s_pCurBlockEx->startpc, s_nEndBlock);
+
+	// basically, if the time it takes the loop to run is shorter than the
+	// time to the next event, then we want to skip ahead to the event, but
+	// update v0 to reflect how long the loop would have run for.
+
+	// if (cycle >= nextEventCycle) { jump to dispatcher, we're running late }
+	// new_cycles = min(v0 * 8, nextEventCycle)
+	// new_v0 = (new_cycles - cycles) / 8
+	// if new_v0 > 0 { jump to dispatcher because loop exited early }
+	// else new_v0 is 0, so exit loop
+
+	xMOV(ebx, ptr32[&cpuRegs.cycle]); // ebx = cycle
+	xMOV(ecx, ptr32[&cpuRegs.nextEventCycle]); // ecx = nextEventCycle
+	xCMP(ebx, ecx);
+	//xJAE((void*)DispatcherEvent); // jump to dispatcher if event immediately
+
+	// TODO: In the case where nextEventCycle < cycle because it's overflowed, tack 8
+	// cycles onto the event count, so hopefully it'll wrap around. This is pretty
+	// gross, but until we switch to 64-bit counters, not many better options.
+	xForwardJB8 not_dispatcher;
+	xADD(ebx, 8);
+	xMOV(ptr32[&cpuRegs.cycle], ebx);
+	xJMP((void*)DispatcherEvent);
+	not_dispatcher.SetTarget();
+
+	xMOV(edx, ptr32[&cpuRegs.GPR.r[reg].UL[0]]); // eax = v0
+	xLEA(rax, ptrNative[rdx * 8 + rbx]); // edx = v0 * 8 + cycle
+	xCMP(rcx, rax);
+	xCMOVB(rax, rcx); // eax = new_cycles = min(v8 * 8, nextEventCycle)
+	xMOV(ptr32[&cpuRegs.cycle], eax); // writeback new_cycles
+	xSUB(eax, ebx); // new_cycles -= cycle
+	xSHR(eax, 3); // compute new v0 value
+	xSUB(edx, eax); // v0 -= cycle_diff
+	xMOV(ptr32[&cpuRegs.GPR.r[reg].UL[0]], edx); // write back new value of v0
+	xJNZ((void*)DispatcherEvent); // jump to dispatcher if new v0 is not zero (i.e. an event)
+	xMOV(ptr32[&cpuRegs.pc], s_nEndBlock); // otherwise end of loop
+	recBlocks.Link(HWADDR(s_nEndBlock), xJcc32());
+
+	g_branch = 1;
+	pc = s_nEndBlock;
+
+	return true;
+}
+
 static void recRecompile(const u32 startpc)
 {
 	u32 i = 0;
@@ -2259,6 +2309,26 @@ static void recRecompile(const u32 startpc)
 	s_nEndBlock = 0xffffffff;
 	s_branchTo = -1;
 
+	// Timeout loop speedhack.
+	// God of War 2 and other games (e.g. NFS series) have these timeout loops which just spin for a few thousand
+	// iterations, usually after kicking something which results in an IRQ, but instead of cancelling the loop,
+	// they just let it finish anyway. Such loops look like:
+	//
+	//   00186D6C addiu  v0,v0, -0x1
+	//   00186D70 nop
+	//   00186D74 nop
+	//   00186D78 nop
+	//   00186D7C nop
+	//   00186D80 bne    v0, zero, ->$0x00186D6C
+	//   00186D84 nop
+	//
+	// Skipping them entirely seems to have no negative effects, but we skip cycles based on the incoming value
+	// if the register being decremented, which appears to vary. So far I haven't seen any which increment instead
+	// of decrementing, so we'll limit the test to that to be safe.
+	//
+	s32 timeout_reg = -1;
+	bool is_timeout_loop = true;
+
 	// compile breakpoints as individual blocks
 	int n1 = isBreakpointNeeded(i);
 	int n2 = isMemcheckNeeded(i);
@@ -2302,6 +2372,28 @@ static void recRecompile(const u32 startpc)
 		//HUH ? PSM ? whut ? THIS IS VIRTUAL ACCESS GOD DAMMIT
 		cpuRegs.code = *(int*)PSM(i);
 
+		if (is_timeout_loop)
+		{
+			if ((cpuRegs.code >> 26) == 8 || (cpuRegs.code >> 26) == 9)
+			{
+				// addi/addiu
+				if (timeout_reg >= 0 || _Rs_ != _Rt_ || _Imm_ >= 0)
+					is_timeout_loop = false;
+				else
+					timeout_reg = _Rs_;
+			}
+			else if ((cpuRegs.code >> 26) == 5)
+			{
+				// bne
+				if (timeout_reg != _Rs_ || _Rt_ != 0 || memRead32(i + 4) != 0)
+					is_timeout_loop = false;
+			}
+			else if (cpuRegs.code != 0)
+			{
+				is_timeout_loop = false;
+			}
+		}
+
 		switch (cpuRegs.code >> 26)
 		{
 			case 0: // special
@@ -2393,11 +2485,6 @@ StartRecomp:
 	// (excepting registers initialised with constants or memory loads) or use any instructions
 	// which alter the machine state apart from registers, it will do the same thing on every
 	// iteration.
-	// TODO: special handling for counting loops.  God of war wastes time in a loop which just
-	// counts to some large number and does nothing else, many other games use a counter as a
-	// timeout on a register read.  AFAICS the only way to optimise this for non-const cases
-	// without a significant loss in cycle accuracy is with a division, but games would probably
-	// be happy with time wasting loops completing in 0 cycles and timeouts waiting forever.
 	s_nBlockFF = false;
 	if (s_branchTo == startpc)
 	{
@@ -2476,6 +2563,10 @@ StartRecomp:
 			}
 		}
 	}
+	else
+	{
+		is_timeout_loop = false;
+	}
 
 	// rec info //
 	bool has_cop2_instructions = false;
@@ -2538,7 +2629,7 @@ StartRecomp:
 	memory_protect_recompiled_code(startpc, (s_nEndBlock - startpc) >> 2);
 
 	// Skip Recompilation if sceMpegIsEnd Pattern detected
-	bool doRecompilation = !skipMPEG_By_Pattern(startpc);
+	bool doRecompilation = !skipMPEG_By_Pattern(startpc) && !recSkipTimeoutLoop(timeout_reg, is_timeout_loop);
 
 	if (doRecompilation)
 	{