diff --git a/CHANGES b/CHANGES index 65a046440..b2032f386 100644 --- a/CHANGES +++ b/CHANGES @@ -12,6 +12,7 @@ Bugfixes: Misc: - DS: Set boot complete bit in RAM on boot (fixes mgba.io/i/576, mgba.io/i/580, mgba.io/i/586) - DS Memory: Ensure DS9 I/O is 8-byte aligned + - ARM9: Implement SMLAW and SMULW - Qt: Add .nds files to the extension list in Info.plist 0.6.0: (Future) diff --git a/include/mgba/internal/arm/decoder.h b/include/mgba/internal/arm/decoder.h index 6f33cf748..586941a27 100644 --- a/include/mgba/internal/arm/decoder.h +++ b/include/mgba/internal/arm/decoder.h @@ -190,14 +190,18 @@ enum ARMMnemonic { ARM_MN_SBC, ARM_MN_SMLABB, ARM_MN_SMLABT, + ARM_MN_SMLAL, ARM_MN_SMLATB, ARM_MN_SMLATT, + ARM_MN_SMLAWB, + ARM_MN_SMLAWT, ARM_MN_SMULBB, ARM_MN_SMULBT, + ARM_MN_SMULL, ARM_MN_SMULTB, ARM_MN_SMULTT, - ARM_MN_SMLAL, - ARM_MN_SMULL, + ARM_MN_SMULWB, + ARM_MN_SMULWT, ARM_MN_STC, ARM_MN_STM, ARM_MN_STR, diff --git a/include/mgba/internal/arm/emitter-arm.h b/include/mgba/internal/arm/emitter-arm.h index 98c8377e6..83fcbec37 100644 --- a/include/mgba/internal/arm/emitter-arm.h +++ b/include/mgba/internal/arm/emitter-arm.h @@ -143,13 +143,13 @@ DECLARE_INSTRUCTION_ARM(EMITTER, ILL), \ DECLARE_INSTRUCTION_ARM(EMITTER, ILL), \ DECLARE_INSTRUCTION_ARM(EMITTER, BKPT), \ + MIN_V(DECLARE_INSTRUCTION_ARM(EMITTER, SMLAWB), DECLARE_INSTRUCTION_ARM(EMITTER, ILL), V >= 5), \ DECLARE_INSTRUCTION_ARM(EMITTER, ILL), \ - DECLARE_INSTRUCTION_ARM(EMITTER, ILL), \ - DECLARE_INSTRUCTION_ARM(EMITTER, ILL), \ + MIN_V(DECLARE_INSTRUCTION_ARM(EMITTER, SMULWB), DECLARE_INSTRUCTION_ARM(EMITTER, ILL), V >= 5), \ DECLARE_INSTRUCTION_ARM(EMITTER, STRHPW), \ + MIN_V(DECLARE_INSTRUCTION_ARM(EMITTER, SMLAWT), DECLARE_INSTRUCTION_ARM(EMITTER, ILL), V >= 5), \ DECLARE_INSTRUCTION_ARM(EMITTER, ILL), \ - DECLARE_INSTRUCTION_ARM(EMITTER, ILL), \ - DECLARE_INSTRUCTION_ARM(EMITTER, ILL), \ + MIN_V(DECLARE_INSTRUCTION_ARM(EMITTER, SMULWT), DECLARE_INSTRUCTION_ARM(EMITTER, ILL), V >= 5), \ DECLARE_INSTRUCTION_ARM(EMITTER, ILL), \ DECLARE_ARM_ALU_BLOCK(EMITTER, TEQ, ILL, LDRHPW, LDRSBPW, LDRSHPW), \ DECLARE_INSTRUCTION_ARM(EMITTER, MRSR), \ diff --git a/src/arm/decoder-arm.c b/src/arm/decoder-arm.c index 2400c8677..4ea92f973 100644 --- a/src/arm/decoder-arm.c +++ b/src/arm/decoder-arm.c @@ -320,6 +320,11 @@ DEFINE_MULTIPLY_DECODER_EX_ARM(SMULBT, SMULBT, 0, 0) DEFINE_MULTIPLY_DECODER_EX_ARM(SMULTB, SMULTB, 0, 0) DEFINE_MULTIPLY_DECODER_EX_ARM(SMULTT, SMULTT, 0, 0) +DEFINE_MULTIPLY_DECODER_EX_ARM(SMLAWB, SMLAWB, 0, ARM_OPERAND_REGISTER_4) +DEFINE_MULTIPLY_DECODER_EX_ARM(SMLAWT, SMLAWT, 0, ARM_OPERAND_REGISTER_4) +DEFINE_MULTIPLY_DECODER_EX_ARM(SMULWB, SMULWB, 0, 0) +DEFINE_MULTIPLY_DECODER_EX_ARM(SMULWT, SMULWT, 0, 0) + // Begin load/store definitions DEFINE_LOAD_STORE_MODE_2_DECODER_ARM(LDR, LDR, LOAD_CYCLES, ARM_ACCESS_WORD) diff --git a/src/arm/decoder.c b/src/arm/decoder.c index cac6f2365..3767e8c3a 100644 --- a/src/arm/decoder.c +++ b/src/arm/decoder.c @@ -278,14 +278,18 @@ static const char* _armMnemonicStrings[] = { "sbc", "smlabb", "smlabt", + "smlal", "smlatb", "smlatt", + "smlawb", + "smlawt", "smulbb", "smulbt", + "smull", "smultb", "smultt", - "smlal", - "smull", + "smulwb", + "smulwt", "stc", "stm", "str", diff --git a/src/arm/isa-arm.c b/src/arm/isa-arm.c index 77e12411d..e30fafe1c 100644 --- a/src/arm/isa-arm.c +++ b/src/arm/isa-arm.c @@ -401,6 +401,14 @@ static inline void _immediate(struct ARMCore* cpu, uint32_t opcode) { y = ARM_SXT_16(cpu->gprs[rs] >> 16); \ BODY) +#define DEFINE_MULTIPLY_INSTRUCTION_WY_ARM(NAME, BODY) \ + DEFINE_MULTIPLY_INSTRUCTION_3_ARM(NAME ## B, \ + y = ARM_SXT_16(cpu->gprs[rs]); \ + BODY) \ + DEFINE_MULTIPLY_INSTRUCTION_3_ARM(NAME ## T, \ + y = ARM_SXT_16(cpu->gprs[rs] >> 16); \ + BODY) \ + #define DEFINE_LOAD_STORE_INSTRUCTION_EX_ARM(NAME, ADDRESS, WRITEBACK, BODY) \ DEFINE_INSTRUCTION_ARM(NAME, \ uint32_t address; \ @@ -576,6 +584,14 @@ DEFINE_MULTIPLY_INSTRUCTION_XY_ARM(SMLA, DEFINE_MULTIPLY_INSTRUCTION_XY_ARM(SMUL, cpu->gprs[rd] = x * y;) +DEFINE_MULTIPLY_INSTRUCTION_WY_ARM(SMLAW, + int32_t dn = cpu->gprs[rn]; \ + int32_t d = (((int64_t) cpu->gprs[rm]) * ((int64_t) y)) >> 16; \ + cpu->gprs[rd] = d + dn; \ + cpu->cpsr.q = cpu->cpsr.q || ARM_V_ADDITION(d, dn, cpu->gprs[rd]);) + +DEFINE_MULTIPLY_INSTRUCTION_WY_ARM(SMULW, cpu->gprs[rd] = (((int64_t) cpu->gprs[rm]) * ((int64_t) y)) >> 16;) + DEFINE_MULTIPLY_INSTRUCTION_2_ARM(SMULL, int64_t d = ((int64_t) cpu->gprs[rm]) * ((int64_t) cpu->gprs[rs]); cpu->gprs[rd] = d;