From 3f27b09ec162439b252f30246746b16577b990b0 Mon Sep 17 00:00:00 2001
From: Vicki Pfau <vi@endrift.com>
Date: Mon, 27 Feb 2017 00:41:47 -0800
Subject: [PATCH] ARM9: Implement SMUL<x><y>

---
 include/mgba/internal/arm/decoder.h     |  4 +++
 include/mgba/internal/arm/emitter-arm.h |  8 ++---
 src/arm/decoder-arm.c                   |  4 +++
 src/arm/decoder.c                       |  4 +++
 src/arm/isa-arm.c                       | 43 ++++++++++++++-----------
 5 files changed, 41 insertions(+), 22 deletions(-)

diff --git a/include/mgba/internal/arm/decoder.h b/include/mgba/internal/arm/decoder.h
index b66dd337d..6f33cf748 100644
--- a/include/mgba/internal/arm/decoder.h
+++ b/include/mgba/internal/arm/decoder.h
@@ -192,6 +192,10 @@ enum ARMMnemonic {
 	ARM_MN_SMLABT,
 	ARM_MN_SMLATB,
 	ARM_MN_SMLATT,
+	ARM_MN_SMULBB,
+	ARM_MN_SMULBT,
+	ARM_MN_SMULTB,
+	ARM_MN_SMULTT,
 	ARM_MN_SMLAL,
 	ARM_MN_SMULL,
 	ARM_MN_STC,
diff --git a/include/mgba/internal/arm/emitter-arm.h b/include/mgba/internal/arm/emitter-arm.h
index 12c14767d..22492c380 100644
--- a/include/mgba/internal/arm/emitter-arm.h
+++ b/include/mgba/internal/arm/emitter-arm.h
@@ -155,13 +155,13 @@
 	DECLARE_INSTRUCTION_ARM(EMITTER, ILL), \
 	DECLARE_INSTRUCTION_ARM(EMITTER, ILL), \
 	DECLARE_INSTRUCTION_ARM(EMITTER, ILL), \
+	MIN_V(DECLARE_INSTRUCTION_ARM(EMITTER, SMULBB), DECLARE_INSTRUCTION_ARM(EMITTER, ILL), V >= 5), \
 	DECLARE_INSTRUCTION_ARM(EMITTER, ILL), \
-	DECLARE_INSTRUCTION_ARM(EMITTER, ILL), \
-	DECLARE_INSTRUCTION_ARM(EMITTER, ILL), \
+	MIN_V(DECLARE_INSTRUCTION_ARM(EMITTER, SMULTB), DECLARE_INSTRUCTION_ARM(EMITTER, ILL), V >= 5), \
 	DECLARE_INSTRUCTION_ARM(EMITTER, STRHIPW), \
+	MIN_V(DECLARE_INSTRUCTION_ARM(EMITTER, SMULBT), DECLARE_INSTRUCTION_ARM(EMITTER, ILL), V >= 5), \
 	DECLARE_INSTRUCTION_ARM(EMITTER, ILL), \
-	DECLARE_INSTRUCTION_ARM(EMITTER, ILL), \
-	DECLARE_INSTRUCTION_ARM(EMITTER, ILL), \
+	MIN_V(DECLARE_INSTRUCTION_ARM(EMITTER, SMULTT), DECLARE_INSTRUCTION_ARM(EMITTER, ILL), V >= 5), \
 	DECLARE_INSTRUCTION_ARM(EMITTER, ILL), \
 	DECLARE_ARM_ALU_BLOCK(EMITTER, CMN, ILL, LDRHIPW, LDRSBIPW, LDRSHIPW), \
 	DECLARE_ARM_ALU_BLOCK(EMITTER, ORR, SMLAL, STRHPU, ILL, ILL), \
diff --git a/src/arm/decoder-arm.c b/src/arm/decoder-arm.c
index e479d8260..e0e901458 100644
--- a/src/arm/decoder-arm.c
+++ b/src/arm/decoder-arm.c
@@ -315,6 +315,10 @@ DEFINE_MULTIPLY_DECODER_EX_ARM(SMLABB, SMLABB, 0, ARM_OPERAND_REGISTER_4)
 DEFINE_MULTIPLY_DECODER_EX_ARM(SMLABT, SMLABT, 0, ARM_OPERAND_REGISTER_4)
 DEFINE_MULTIPLY_DECODER_EX_ARM(SMLATB, SMLATB, 0, ARM_OPERAND_REGISTER_4)
 DEFINE_MULTIPLY_DECODER_EX_ARM(SMLATT, SMLATT, 0, ARM_OPERAND_REGISTER_4)
+DEFINE_MULTIPLY_DECODER_EX_ARM(SMULBB, SMULBB, 0, 0)
+DEFINE_MULTIPLY_DECODER_EX_ARM(SMULBT, SMULBT, 0, 0)
+DEFINE_MULTIPLY_DECODER_EX_ARM(SMULTB, SMULTB, 0, 0)
+DEFINE_MULTIPLY_DECODER_EX_ARM(SMULTT, SMULTT, 0, 0)
 
 // Begin load/store definitions
 
diff --git a/src/arm/decoder.c b/src/arm/decoder.c
index bd8caaa07..cac6f2365 100644
--- a/src/arm/decoder.c
+++ b/src/arm/decoder.c
@@ -280,6 +280,10 @@ static const char* _armMnemonicStrings[] = {
 	"smlabt",
 	"smlatb",
 	"smlatt",
+	"smulbb",
+	"smulbt",
+	"smultb",
+	"smultt",
 	"smlal",
 	"smull",
 	"stc",
diff --git a/src/arm/isa-arm.c b/src/arm/isa-arm.c
index 4e6e0f733..94a8dc2f8 100644
--- a/src/arm/isa-arm.c
+++ b/src/arm/isa-arm.c
@@ -361,6 +361,7 @@ static inline void _immediate(struct ARMCore* cpu, uint32_t opcode) {
 		int rs = (opcode >> 8) & 0xF; \
 		int rn = (opcode >> 12) & 0xF; \
 		int rm = opcode & 0xF; \
+		UNUSED(rn); \
 		if (rd == ARM_PC) { \
 			return; \
 		} \
@@ -368,12 +369,26 @@ static inline void _immediate(struct ARMCore* cpu, uint32_t opcode) {
 		int32_t x; \
 		int32_t y; \
 		BODY; \
-		int32_t dn = cpu->gprs[rn]; \
-		int32_t d = x * y; \
-		cpu->gprs[rd] = d + dn; \
-		cpu->cpsr.q = cpu->cpsr.q || ARM_V_ADDITION(d, dn, cpu->gprs[rd]); \
 		currentCycles += cpu->memory.activeNonseqCycles32 - cpu->memory.activeSeqCycles32)
 
+#define DEFINE_MULTIPLY_INSTRUCTION_XY_ARM(NAME, BODY) \
+	DEFINE_MULTIPLY_INSTRUCTION_3_ARM(NAME ## BB, \
+		x = ARM_SXT_16(cpu->gprs[rm]); \
+		y = ARM_SXT_16(cpu->gprs[rs]); \
+		BODY) \
+	DEFINE_MULTIPLY_INSTRUCTION_3_ARM(NAME ## BT, \
+		x = ARM_SXT_16(cpu->gprs[rm]); \
+		y = ARM_SXT_16(cpu->gprs[rs] >> 16); \
+		BODY) \
+	DEFINE_MULTIPLY_INSTRUCTION_3_ARM(NAME ## TB, \
+		x = ARM_SXT_16(cpu->gprs[rm] >> 16); \
+		y = ARM_SXT_16(cpu->gprs[rs]); \
+		BODY) \
+	DEFINE_MULTIPLY_INSTRUCTION_3_ARM(NAME ## TT, \
+		x = ARM_SXT_16(cpu->gprs[rm] >> 16); \
+		y = ARM_SXT_16(cpu->gprs[rs] >> 16); \
+		BODY)
+
 #define DEFINE_LOAD_STORE_INSTRUCTION_EX_ARM(NAME, ADDRESS, WRITEBACK, BODY) \
 	DEFINE_INSTRUCTION_ARM(NAME, \
 		uint32_t address; \
@@ -541,21 +556,13 @@ DEFINE_MULTIPLY_INSTRUCTION_2_ARM(SMLAL,
 	cpu->gprs[rdHi] = cpu->gprs[rdHi] + (d >> 32) + ARM_CARRY_FROM(dm, dn, cpu->gprs[rd]);,
 	ARM_NEUTRAL_HI_S(cpu->gprs[rd], cpu->gprs[rdHi]), 3)
 
-DEFINE_MULTIPLY_INSTRUCTION_3_ARM(SMLABB,
-	x = ARM_SXT_16(cpu->gprs[rm]);
-	y = ARM_SXT_16(cpu->gprs[rs]);)
+DEFINE_MULTIPLY_INSTRUCTION_XY_ARM(SMLA,
+	int32_t dn = cpu->gprs[rn]; \
+	int32_t d = x * y; \
+	cpu->gprs[rd] = d + dn; \
+	cpu->cpsr.q = cpu->cpsr.q || ARM_V_ADDITION(d, dn, cpu->gprs[rd]);)
 
-DEFINE_MULTIPLY_INSTRUCTION_3_ARM(SMLABT,
-	x = ARM_SXT_16(cpu->gprs[rm]);
-	y = ARM_SXT_16(cpu->gprs[rs] >> 16);)
-
-DEFINE_MULTIPLY_INSTRUCTION_3_ARM(SMLATB,
-	x = ARM_SXT_16(cpu->gprs[rm] >> 16);
-	y = ARM_SXT_16(cpu->gprs[rs]);)
-
-DEFINE_MULTIPLY_INSTRUCTION_3_ARM(SMLATT,
-	x = ARM_SXT_16(cpu->gprs[rm] >> 16);
-	y = ARM_SXT_16(cpu->gprs[rs] >> 16);)
+DEFINE_MULTIPLY_INSTRUCTION_XY_ARM(SMUL, cpu->gprs[rd] = x * y;)
 
 DEFINE_MULTIPLY_INSTRUCTION_2_ARM(SMULL,
 	int64_t d = ((int64_t) cpu->gprs[rm]) * ((int64_t) cpu->gprs[rs]);