From 49ec1ed7024493a52059c2d7c84c2cde2e026b75 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Mon, 15 Feb 2021 23:40:00 -0800 Subject: [PATCH] ARM: Fix long and accumulate multiply timing --- CHANGES | 1 + include/mgba/internal/arm/isa-inlines.h | 12 +++++------ src/arm/isa-arm.c | 28 ++++++++++++------------- src/arm/isa-thumb.c | 2 +- 4 files changed, 21 insertions(+), 22 deletions(-) diff --git a/CHANGES b/CHANGES index d6d3e6a53..bb5513fd3 100644 --- a/CHANGES +++ b/CHANGES @@ -24,6 +24,7 @@ Emulation fixes: - ARM: Fix STR storing PC after address calculation - ARM: Fix Addressing mode 1 shifter on rs == pc (fixes mgba.io/i/1926) - ARM: Fix long multiply-and-accumulate register write order (fixes mgba.io/1/1956) + - ARM: Fix long and accumulate multiply timing - GB: Partially fix timing for skipped BIOS - GB: Downgrade DMG-only ROMs from CGB mode even without boot ROM - GB Audio: Fix serializing sweep time diff --git a/include/mgba/internal/arm/isa-inlines.h b/include/mgba/internal/arm/isa-inlines.h index 93659b6d7..7f7fd4396 100644 --- a/include/mgba/internal/arm/isa-inlines.h +++ b/include/mgba/internal/arm/isa-inlines.h @@ -37,17 +37,17 @@ #define ARM_V_ADDITION(M, N, D) (!(ARM_SIGN((M) ^ (N))) && (ARM_SIGN((M) ^ (D)))) #define ARM_V_SUBTRACTION(M, N, D) ((ARM_SIGN((M) ^ (N))) && (ARM_SIGN((M) ^ (D)))) -#define ARM_WAIT_MUL(R) \ +#define ARM_WAIT_MUL(R, WAIT) \ { \ - int32_t wait; \ + int32_t wait = WAIT; \ if ((R & 0xFFFFFF00) == 0xFFFFFF00 || !(R & 0xFFFFFF00)) { \ - wait = 1; \ + wait += 1; \ } else if ((R & 0xFFFF0000) == 0xFFFF0000 || !(R & 0xFFFF0000)) { \ - wait = 2; \ + wait += 2; \ } else if ((R & 0xFF000000) == 0xFF000000 || !(R & 0xFF000000)) { \ - wait = 3; \ + wait += 3; \ } else { \ - wait = 4; \ + wait += 4; \ } \ currentCycles += cpu->memory.stall(cpu, wait); \ } diff --git a/src/arm/isa-arm.c b/src/arm/isa-arm.c index 6148a1ec8..56dcca987 100644 --- a/src/arm/isa-arm.c +++ b/src/arm/isa-arm.c @@ -325,12 +325,11 @@ ATTRIBUTE_NOINLINE static void _neutralS(struct ARMCore* cpu, int32_t d) { int rd = (opcode >> 16) & 0xF; \ int rs = (opcode >> 8) & 0xF; \ int rm = opcode & 0xF; \ - if (rd == ARM_PC) { \ - return; \ + if (rd != ARM_PC) { \ + ARM_WAIT_MUL(cpu->gprs[rs], 0); \ + BODY; \ + S_BODY; \ } \ - ARM_WAIT_MUL(cpu->gprs[rs]); \ - BODY; \ - S_BODY; \ currentCycles += cpu->memory.activeNonseqCycles32 - cpu->memory.activeSeqCycles32) #define DEFINE_MULTIPLY_INSTRUCTION_2_EX_ARM(NAME, BODY, S_BODY, WAIT) \ @@ -339,12 +338,11 @@ ATTRIBUTE_NOINLINE static void _neutralS(struct ARMCore* cpu, int32_t d) { int rdHi = (opcode >> 16) & 0xF; \ int rs = (opcode >> 8) & 0xF; \ int rm = opcode & 0xF; \ - if (rdHi == ARM_PC || rd == ARM_PC) { \ - return; \ + if (rdHi != ARM_PC && rd != ARM_PC) { \ + ARM_WAIT_MUL(cpu->gprs[rs], WAIT); \ + BODY; \ + S_BODY; \ } \ - currentCycles += cpu->memory.stall(cpu, WAIT); \ - BODY; \ - S_BODY; \ currentCycles += cpu->memory.activeNonseqCycles32 - cpu->memory.activeSeqCycles32) #define DEFINE_MULTIPLY_INSTRUCTION_ARM(NAME, BODY, S_BODY) \ @@ -522,7 +520,7 @@ DEFINE_ALU_INSTRUCTION_S_ONLY_ARM(TST, ARM_NEUTRAL_S(n, cpu->shifterOperand, alu // Begin multiply definitions -DEFINE_MULTIPLY_INSTRUCTION_2_ARM(MLA, cpu->gprs[rdHi] = cpu->gprs[rm] * cpu->gprs[rs] + cpu->gprs[rd], ARM_NEUTRAL_S(, , cpu->gprs[rdHi]), 2) +DEFINE_MULTIPLY_INSTRUCTION_2_ARM(MLA, cpu->gprs[rdHi] = cpu->gprs[rm] * cpu->gprs[rs] + cpu->gprs[rd], ARM_NEUTRAL_S(, , cpu->gprs[rdHi]), 1) DEFINE_MULTIPLY_INSTRUCTION_ARM(MUL, cpu->gprs[rd] = cpu->gprs[rm] * cpu->gprs[rs], ARM_NEUTRAL_S(cpu->gprs[rm], cpu->gprs[rs], cpu->gprs[rd])) DEFINE_MULTIPLY_INSTRUCTION_2_ARM(SMLAL, @@ -530,26 +528,26 @@ DEFINE_MULTIPLY_INSTRUCTION_2_ARM(SMLAL, int32_t dHi = cpu->gprs[rdHi] + (d >> 32); cpu->gprs[rd] = d; cpu->gprs[rdHi] = dHi;, - ARM_NEUTRAL_HI_S(cpu->gprs[rd], dHi), 3) + ARM_NEUTRAL_HI_S(cpu->gprs[rd], dHi), 2) DEFINE_MULTIPLY_INSTRUCTION_2_ARM(SMULL, int64_t d = ((int64_t) cpu->gprs[rm]) * ((int64_t) cpu->gprs[rs]); cpu->gprs[rd] = d; cpu->gprs[rdHi] = d >> 32;, - ARM_NEUTRAL_HI_S(cpu->gprs[rd], cpu->gprs[rdHi]), 2) + ARM_NEUTRAL_HI_S(cpu->gprs[rd], cpu->gprs[rdHi]), 1) DEFINE_MULTIPLY_INSTRUCTION_2_ARM(UMLAL, uint64_t d = ARM_UXT_64(cpu->gprs[rm]) * ARM_UXT_64(cpu->gprs[rs]) + ((uint32_t) cpu->gprs[rd]); uint32_t dHi = ((uint32_t) cpu->gprs[rdHi]) + (d >> 32); cpu->gprs[rd] = d; cpu->gprs[rdHi] = dHi;, - ARM_NEUTRAL_HI_S(cpu->gprs[rd], dHi), 3) + ARM_NEUTRAL_HI_S(cpu->gprs[rd], dHi), 2) DEFINE_MULTIPLY_INSTRUCTION_2_ARM(UMULL, uint64_t d = ARM_UXT_64(cpu->gprs[rm]) * ARM_UXT_64(cpu->gprs[rs]); cpu->gprs[rd] = d; cpu->gprs[rdHi] = d >> 32;, - ARM_NEUTRAL_HI_S(cpu->gprs[rd], cpu->gprs[rdHi]), 2) + ARM_NEUTRAL_HI_S(cpu->gprs[rd], cpu->gprs[rdHi]), 1) // End multiply definitions diff --git a/src/arm/isa-thumb.c b/src/arm/isa-thumb.c index a5c7d9081..7ee211349 100644 --- a/src/arm/isa-thumb.c +++ b/src/arm/isa-thumb.c @@ -232,7 +232,7 @@ DEFINE_DATA_FORM_5_INSTRUCTION_THUMB(NEG, THUMB_SUBTRACTION(cpu->gprs[rd], 0, cp DEFINE_DATA_FORM_5_INSTRUCTION_THUMB(CMP2, int32_t aluOut = cpu->gprs[rd] - cpu->gprs[rn]; THUMB_SUBTRACTION_S(cpu->gprs[rd], cpu->gprs[rn], aluOut)) DEFINE_DATA_FORM_5_INSTRUCTION_THUMB(CMN, int32_t aluOut = cpu->gprs[rd] + cpu->gprs[rn]; THUMB_ADDITION_S(cpu->gprs[rd], cpu->gprs[rn], aluOut)) DEFINE_DATA_FORM_5_INSTRUCTION_THUMB(ORR, cpu->gprs[rd] = cpu->gprs[rd] | cpu->gprs[rn]; THUMB_NEUTRAL_S( , , cpu->gprs[rd])) -DEFINE_DATA_FORM_5_INSTRUCTION_THUMB(MUL, ARM_WAIT_MUL(cpu->gprs[rd]); cpu->gprs[rd] *= cpu->gprs[rn]; THUMB_NEUTRAL_S( , , cpu->gprs[rd]); currentCycles += cpu->memory.activeNonseqCycles16 - cpu->memory.activeSeqCycles16) +DEFINE_DATA_FORM_5_INSTRUCTION_THUMB(MUL, ARM_WAIT_MUL(cpu->gprs[rd], 0); cpu->gprs[rd] *= cpu->gprs[rn]; THUMB_NEUTRAL_S( , , cpu->gprs[rd]); currentCycles += cpu->memory.activeNonseqCycles16 - cpu->memory.activeSeqCycles16) DEFINE_DATA_FORM_5_INSTRUCTION_THUMB(BIC, cpu->gprs[rd] = cpu->gprs[rd] & ~cpu->gprs[rn]; THUMB_NEUTRAL_S( , , cpu->gprs[rd])) DEFINE_DATA_FORM_5_INSTRUCTION_THUMB(MVN, cpu->gprs[rd] = ~cpu->gprs[rn]; THUMB_NEUTRAL_S( , , cpu->gprs[rd]))