diff --git a/include/mgba/internal/arm/emitter-inlines.h b/include/mgba/internal/arm/emitter-inlines.h
index 38bab70bd..ba4fb9951 100644
--- a/include/mgba/internal/arm/emitter-inlines.h
+++ b/include/mgba/internal/arm/emitter-inlines.h
@@ -29,4 +29,6 @@
 	LEFT, \
 	RIGHT
 
+#define MIN_V(PASS, FAIL, COND) (COND ? PASS : FAIL)
+
 #endif
diff --git a/include/mgba/internal/arm/emitter-thumb.h b/include/mgba/internal/arm/emitter-thumb.h
index 7d27524b2..f2e69d0bb 100644
--- a/include/mgba/internal/arm/emitter-thumb.h
+++ b/include/mgba/internal/arm/emitter-thumb.h
@@ -17,7 +17,7 @@
 	DECLARE_INSTRUCTION_THUMB(EMITTER, NAME ## 10), \
 	DECLARE_INSTRUCTION_THUMB(EMITTER, NAME ## 11)
 
-#define DECLARE_THUMB_EMITTER_BLOCK(EMITTER) \
+#define DECLARE_THUMB_EMITTER_BLOCK(EMITTER, V) \
 	DO_8(DO_4(DECLARE_INSTRUCTION_THUMB(EMITTER, LSL1))), \
 	DO_8(DO_4(DECLARE_INSTRUCTION_THUMB(EMITTER, LSR1))), \
 	DO_8(DO_4(DECLARE_INSTRUCTION_THUMB(EMITTER, ASR1))), \
@@ -106,7 +106,7 @@
 	DO_4(DECLARE_INSTRUCTION_THUMB(EMITTER, ILL)), \
 	DO_4(DECLARE_INSTRUCTION_THUMB(EMITTER, SWI)), \
 	DO_8(DO_4(DECLARE_INSTRUCTION_THUMB(EMITTER, B))), \
-	DO_8(DO_4(DECLARE_INSTRUCTION_THUMB(EMITTER, ILL))), \
+	DO_8(DO_4(MIN_V(DECLARE_INSTRUCTION_THUMB(EMITTER, BLX1), DECLARE_INSTRUCTION_THUMB(EMITTER, ILL), V >= 5))), \
 	DO_8(DO_4(DECLARE_INSTRUCTION_THUMB(EMITTER, BL1))), \
 	DO_8(DO_4(DECLARE_INSTRUCTION_THUMB(EMITTER, BL2))) \
 
diff --git a/include/mgba/internal/arm/isa-thumb.h b/include/mgba/internal/arm/isa-thumb.h
index 330a4faf8..c85636bb2 100644
--- a/include/mgba/internal/arm/isa-thumb.h
+++ b/include/mgba/internal/arm/isa-thumb.h
@@ -13,7 +13,8 @@ CXX_GUARD_START
 struct ARMCore;
 
 typedef void (*ThumbInstruction)(struct ARMCore*, uint16_t opcode);
-extern const ThumbInstruction _thumbTable[0x400];
+extern const ThumbInstruction _thumbv4Table[0x400];
+extern const ThumbInstruction _thumbv5Table[0x400];
 
 CXX_GUARD_END
 
diff --git a/src/arm/arm.c b/src/arm/arm.c
index dc6714681..7db39d7a9 100644
--- a/src/arm/arm.c
+++ b/src/arm/arm.c
@@ -223,15 +223,6 @@ void ARMRaiseUndefined(struct ARMCore* cpu) {
 	cpu->cycles += currentCycles;
 }
 
-static inline void ThumbStep(struct ARMCore* cpu) {
-	uint32_t opcode = cpu->prefetch[0];
-	cpu->prefetch[0] = cpu->prefetch[1];
-	cpu->gprs[ARM_PC] += WORD_SIZE_THUMB;
-	LOAD_16(cpu->prefetch[1], cpu->gprs[ARM_PC] & cpu->memory.activeMask, cpu->memory.activeRegion);
-	ThumbInstruction instruction = _thumbTable[opcode >> 6];
-	instruction(cpu, opcode);
-}
-
 #define ARM_IMPLEMENT(VERSION) \
 	static inline void ARM ## VERSION ## Step(struct ARMCore* cpu) { \
 		uint32_t opcode = cpu->prefetch[0]; \
@@ -297,9 +288,18 @@ static inline void ThumbStep(struct ARMCore* cpu) {
 		instruction(cpu, opcode); \
 	} \
 	\
-	void ARM  ## VERSION ## Run(struct ARMCore* cpu) { \
+	static inline void Thumb ## VERSION ## Step(struct ARMCore* cpu) { \
+		uint32_t opcode = cpu->prefetch[0]; \
+		cpu->prefetch[0] = cpu->prefetch[1]; \
+		cpu->gprs[ARM_PC] += WORD_SIZE_THUMB; \
+		LOAD_16(cpu->prefetch[1], cpu->gprs[ARM_PC] & cpu->memory.activeMask, cpu->memory.activeRegion); \
+		ThumbInstruction instruction = _thumb ## VERSION ## Table[opcode >> 6]; \
+		instruction(cpu, opcode); \
+	} \
+	\
+	void ARM ## VERSION ## Run(struct ARMCore* cpu) { \
 		if (cpu->executionMode == MODE_THUMB) { \
-			ThumbStep(cpu); \
+			Thumb ## VERSION ## Step(cpu); \
 		} else { \
 			ARM ## VERSION ## Step(cpu); \
 		} \
@@ -311,7 +311,7 @@ static inline void ThumbStep(struct ARMCore* cpu) {
 	void ARM ## VERSION ## RunLoop(struct ARMCore* cpu) { \
 		if (cpu->executionMode == MODE_THUMB) { \
 			while (cpu->cycles < cpu->nextEvent) { \
-				ThumbStep(cpu); \
+				Thumb ## VERSION ## Step(cpu); \
 			} \
 		} else { \
 			while (cpu->cycles < cpu->nextEvent) { \
@@ -319,28 +319,6 @@ static inline void ThumbStep(struct ARMCore* cpu) {
 			} \
 		} \
 		cpu->irqh.processEvents(cpu); \
-	} \
-	\
-	int32_t ARM ## VERSION ## RunCycles(struct ARMCore* cpu, int32_t cycles) { \
-		int32_t startCycles = cpu->cycles; \
-		int32_t endCycles = startCycles + cycles; \
-		\
-		if (cpu->executionMode == MODE_THUMB) { \
-			while (cpu->cycles < cpu->nextEvent && cpu->cycles < endCycles) { \
-				ThumbStep(cpu); \
-			} \
-		} else { \
-			while (cpu->cycles < cpu->nextEvent && cpu->cycles < endCycles) { \
-				ARM ## VERSION ## Step(cpu); \
-			} \
-		} \
-		\
-		endCycles = cpu->cycles - startCycles; \
-		if (cpu->cycles >= cpu->nextEvent) { \
-			/* TODO: Handle HALT */ \
-			cpu->irqh.processEvents(cpu); \
-		} \
-		return endCycles; \
 	}
 
 ARM_IMPLEMENT(v4)
diff --git a/src/arm/decoder-thumb.c b/src/arm/decoder-thumb.c
index a53da00e9..bb89050a0 100644
--- a/src/arm/decoder-thumb.c
+++ b/src/arm/decoder-thumb.c
@@ -282,6 +282,14 @@ DEFINE_THUMB_DECODER(BL2, BL,
 		ARM_OPERAND_REGISTER_2 | ARM_OPERAND_IMMEDIATE_3;
 	info->branchType = ARM_BRANCH_LINKED;)
 
+DEFINE_THUMB_DECODER(BLX1, BLX,
+	info->op1.reg = ARM_PC;
+	info->op2.reg = ARM_LR;
+	info->op3.immediate = (opcode & 0x07FF) << 1;
+	info->operandFormat = ARM_OPERAND_REGISTER_1 | ARM_OPERAND_AFFECTED_1 |
+		ARM_OPERAND_REGISTER_2 | ARM_OPERAND_IMMEDIATE_3;
+	info->branchType = ARM_BRANCH_LINKED;)
+
 DEFINE_THUMB_DECODER(BX, BX,
 	info->op1.reg = (opcode >> 3) & 0xF;
 	info->operandFormat = ARM_OPERAND_REGISTER_1;
@@ -295,7 +303,7 @@ DEFINE_THUMB_DECODER(SWI, SWI,
 typedef void (*ThumbDecoder)(uint16_t opcode, struct ARMInstructionInfo* info);
 
 static const ThumbDecoder _thumbDecoderTable[0x400] = {
-	DECLARE_THUMB_EMITTER_BLOCK(_ThumbDecode)
+	DECLARE_THUMB_EMITTER_BLOCK(_ThumbDecode, 5)
 };
 
 void ARMDecodeThumb(uint16_t opcode, struct ARMInstructionInfo* info) {
diff --git a/src/arm/decoder.c b/src/arm/decoder.c
index 32e92f23a..8c7d25540 100644
--- a/src/arm/decoder.c
+++ b/src/arm/decoder.c
@@ -389,6 +389,7 @@ int ARMDisassemble(struct ARMInstructionInfo* info, uint32_t pc, char* buffer, i
 		break;
 	case ARM_MN_B:
 	case ARM_MN_BL:
+	case ARM_MN_BLX:
 		if (info->operandFormat & ARM_OPERAND_IMMEDIATE_1) {
 			written = _decodePCRelative(info->op1.immediate, pc, buffer, blen);
 			ADVANCE(written);
diff --git a/src/arm/isa-thumb.c b/src/arm/isa-thumb.c
index 54faf6ddc..b65483d67 100644
--- a/src/arm/isa-thumb.c
+++ b/src/arm/isa-thumb.c
@@ -382,6 +382,14 @@ DEFINE_INSTRUCTION_THUMB(BL2,
 	cpu->gprs[ARM_LR] = pc - 1;
 	THUMB_WRITE_PC;)
 
+DEFINE_INSTRUCTION_THUMB(BLX1,
+	uint16_t immediate = (opcode & 0x07FF) << 1;
+	uint32_t pc = cpu->gprs[ARM_PC];
+	cpu->gprs[ARM_PC] = (cpu->gprs[ARM_LR] + immediate) & 0xFFFFFFFC;
+	cpu->gprs[ARM_LR] = pc - 1;
+	_ARMSetMode(cpu, MODE_ARM);
+	ARM_WRITE_PC;)
+
 DEFINE_INSTRUCTION_THUMB(BX,
 	int rm = (opcode >> 3) & 0xF;
 	_ARMSetMode(cpu, cpu->gprs[rm] & 0x00000001);
@@ -398,6 +406,10 @@ DEFINE_INSTRUCTION_THUMB(BX,
 
 DEFINE_INSTRUCTION_THUMB(SWI, cpu->irqh.swi16(cpu, opcode & 0xFF))
 
-const ThumbInstruction _thumbTable[0x400] = {
-	DECLARE_THUMB_EMITTER_BLOCK(_ThumbInstruction)
+const ThumbInstruction _thumbv4Table[0x400] = {
+	DECLARE_THUMB_EMITTER_BLOCK(_ThumbInstruction, 4)
+};
+
+const ThumbInstruction _thumbv5Table[0x400] = {
+	DECLARE_THUMB_EMITTER_BLOCK(_ThumbInstruction, 5)
 };