ARM: Fix long and accumulate multiply timing

This commit is contained in:
Vicki Pfau 2021-02-15 23:40:00 -08:00
parent 232aab5a08
commit 49ec1ed702
4 changed files with 21 additions and 22 deletions
CHANGES
include/mgba/internal/arm
src/arm

View File

@ -24,6 +24,7 @@ Emulation fixes:
- ARM: Fix STR storing PC after address calculation
- ARM: Fix Addressing mode 1 shifter on rs == pc (fixes mgba.io/i/1926)
- ARM: Fix long multiply-and-accumulate register write order (fixes mgba.io/1/1956)
- ARM: Fix long and accumulate multiply timing
- GB: Partially fix timing for skipped BIOS
- GB: Downgrade DMG-only ROMs from CGB mode even without boot ROM
- GB Audio: Fix serializing sweep time

View File

@ -37,17 +37,17 @@
#define ARM_V_ADDITION(M, N, D) (!(ARM_SIGN((M) ^ (N))) && (ARM_SIGN((M) ^ (D))))
#define ARM_V_SUBTRACTION(M, N, D) ((ARM_SIGN((M) ^ (N))) && (ARM_SIGN((M) ^ (D))))
#define ARM_WAIT_MUL(R) \
#define ARM_WAIT_MUL(R, WAIT) \
{ \
int32_t wait; \
int32_t wait = WAIT; \
if ((R & 0xFFFFFF00) == 0xFFFFFF00 || !(R & 0xFFFFFF00)) { \
wait = 1; \
wait += 1; \
} else if ((R & 0xFFFF0000) == 0xFFFF0000 || !(R & 0xFFFF0000)) { \
wait = 2; \
wait += 2; \
} else if ((R & 0xFF000000) == 0xFF000000 || !(R & 0xFF000000)) { \
wait = 3; \
wait += 3; \
} else { \
wait = 4; \
wait += 4; \
} \
currentCycles += cpu->memory.stall(cpu, wait); \
}

View File

@ -325,12 +325,11 @@ ATTRIBUTE_NOINLINE static void _neutralS(struct ARMCore* cpu, int32_t d) {
int rd = (opcode >> 16) & 0xF; \
int rs = (opcode >> 8) & 0xF; \
int rm = opcode & 0xF; \
if (rd == ARM_PC) { \
return; \
} \
ARM_WAIT_MUL(cpu->gprs[rs]); \
if (rd != ARM_PC) { \
ARM_WAIT_MUL(cpu->gprs[rs], 0); \
BODY; \
S_BODY; \
} \
currentCycles += cpu->memory.activeNonseqCycles32 - cpu->memory.activeSeqCycles32)
#define DEFINE_MULTIPLY_INSTRUCTION_2_EX_ARM(NAME, BODY, S_BODY, WAIT) \
@ -339,12 +338,11 @@ ATTRIBUTE_NOINLINE static void _neutralS(struct ARMCore* cpu, int32_t d) {
int rdHi = (opcode >> 16) & 0xF; \
int rs = (opcode >> 8) & 0xF; \
int rm = opcode & 0xF; \
if (rdHi == ARM_PC || rd == ARM_PC) { \
return; \
} \
currentCycles += cpu->memory.stall(cpu, WAIT); \
if (rdHi != ARM_PC && rd != ARM_PC) { \
ARM_WAIT_MUL(cpu->gprs[rs], WAIT); \
BODY; \
S_BODY; \
} \
currentCycles += cpu->memory.activeNonseqCycles32 - cpu->memory.activeSeqCycles32)
#define DEFINE_MULTIPLY_INSTRUCTION_ARM(NAME, BODY, S_BODY) \
@ -522,7 +520,7 @@ DEFINE_ALU_INSTRUCTION_S_ONLY_ARM(TST, ARM_NEUTRAL_S(n, cpu->shifterOperand, alu
// Begin multiply definitions
DEFINE_MULTIPLY_INSTRUCTION_2_ARM(MLA, cpu->gprs[rdHi] = cpu->gprs[rm] * cpu->gprs[rs] + cpu->gprs[rd], ARM_NEUTRAL_S(, , cpu->gprs[rdHi]), 2)
DEFINE_MULTIPLY_INSTRUCTION_2_ARM(MLA, cpu->gprs[rdHi] = cpu->gprs[rm] * cpu->gprs[rs] + cpu->gprs[rd], ARM_NEUTRAL_S(, , cpu->gprs[rdHi]), 1)
DEFINE_MULTIPLY_INSTRUCTION_ARM(MUL, cpu->gprs[rd] = cpu->gprs[rm] * cpu->gprs[rs], ARM_NEUTRAL_S(cpu->gprs[rm], cpu->gprs[rs], cpu->gprs[rd]))
DEFINE_MULTIPLY_INSTRUCTION_2_ARM(SMLAL,
@ -530,26 +528,26 @@ DEFINE_MULTIPLY_INSTRUCTION_2_ARM(SMLAL,
int32_t dHi = cpu->gprs[rdHi] + (d >> 32);
cpu->gprs[rd] = d;
cpu->gprs[rdHi] = dHi;,
ARM_NEUTRAL_HI_S(cpu->gprs[rd], dHi), 3)
ARM_NEUTRAL_HI_S(cpu->gprs[rd], dHi), 2)
DEFINE_MULTIPLY_INSTRUCTION_2_ARM(SMULL,
int64_t d = ((int64_t) cpu->gprs[rm]) * ((int64_t) cpu->gprs[rs]);
cpu->gprs[rd] = d;
cpu->gprs[rdHi] = d >> 32;,
ARM_NEUTRAL_HI_S(cpu->gprs[rd], cpu->gprs[rdHi]), 2)
ARM_NEUTRAL_HI_S(cpu->gprs[rd], cpu->gprs[rdHi]), 1)
DEFINE_MULTIPLY_INSTRUCTION_2_ARM(UMLAL,
uint64_t d = ARM_UXT_64(cpu->gprs[rm]) * ARM_UXT_64(cpu->gprs[rs]) + ((uint32_t) cpu->gprs[rd]);
uint32_t dHi = ((uint32_t) cpu->gprs[rdHi]) + (d >> 32);
cpu->gprs[rd] = d;
cpu->gprs[rdHi] = dHi;,
ARM_NEUTRAL_HI_S(cpu->gprs[rd], dHi), 3)
ARM_NEUTRAL_HI_S(cpu->gprs[rd], dHi), 2)
DEFINE_MULTIPLY_INSTRUCTION_2_ARM(UMULL,
uint64_t d = ARM_UXT_64(cpu->gprs[rm]) * ARM_UXT_64(cpu->gprs[rs]);
cpu->gprs[rd] = d;
cpu->gprs[rdHi] = d >> 32;,
ARM_NEUTRAL_HI_S(cpu->gprs[rd], cpu->gprs[rdHi]), 2)
ARM_NEUTRAL_HI_S(cpu->gprs[rd], cpu->gprs[rdHi]), 1)
// End multiply definitions

View File

@ -232,7 +232,7 @@ DEFINE_DATA_FORM_5_INSTRUCTION_THUMB(NEG, THUMB_SUBTRACTION(cpu->gprs[rd], 0, cp
DEFINE_DATA_FORM_5_INSTRUCTION_THUMB(CMP2, int32_t aluOut = cpu->gprs[rd] - cpu->gprs[rn]; THUMB_SUBTRACTION_S(cpu->gprs[rd], cpu->gprs[rn], aluOut))
DEFINE_DATA_FORM_5_INSTRUCTION_THUMB(CMN, int32_t aluOut = cpu->gprs[rd] + cpu->gprs[rn]; THUMB_ADDITION_S(cpu->gprs[rd], cpu->gprs[rn], aluOut))
DEFINE_DATA_FORM_5_INSTRUCTION_THUMB(ORR, cpu->gprs[rd] = cpu->gprs[rd] | cpu->gprs[rn]; THUMB_NEUTRAL_S( , , cpu->gprs[rd]))
DEFINE_DATA_FORM_5_INSTRUCTION_THUMB(MUL, ARM_WAIT_MUL(cpu->gprs[rd]); cpu->gprs[rd] *= cpu->gprs[rn]; THUMB_NEUTRAL_S( , , cpu->gprs[rd]); currentCycles += cpu->memory.activeNonseqCycles16 - cpu->memory.activeSeqCycles16)
DEFINE_DATA_FORM_5_INSTRUCTION_THUMB(MUL, ARM_WAIT_MUL(cpu->gprs[rd], 0); cpu->gprs[rd] *= cpu->gprs[rn]; THUMB_NEUTRAL_S( , , cpu->gprs[rd]); currentCycles += cpu->memory.activeNonseqCycles16 - cpu->memory.activeSeqCycles16)
DEFINE_DATA_FORM_5_INSTRUCTION_THUMB(BIC, cpu->gprs[rd] = cpu->gprs[rd] & ~cpu->gprs[rn]; THUMB_NEUTRAL_S( , , cpu->gprs[rd]))
DEFINE_DATA_FORM_5_INSTRUCTION_THUMB(MVN, cpu->gprs[rd] = ~cpu->gprs[rn]; THUMB_NEUTRAL_S( , , cpu->gprs[rd]))