ARM7: Fix MLA/*MULL/*MLAL timing

This commit is contained in:
Jeffrey Pfau 2017-01-01 14:46:58 -08:00
parent ae9feee147
commit 180418a74f
2 changed files with 31 additions and 13 deletions

View File

@ -11,6 +11,7 @@ Bugfixes:
- GB Timer: Improve DIV reset behavior - GB Timer: Improve DIV reset behavior
- GBA Memory: Improve initial skipped BIOS state - GBA Memory: Improve initial skipped BIOS state
- GBA BIOS: Implement BitUnPack - GBA BIOS: Implement BitUnPack
- ARM7: Fix MLA/*MULL/*MLAL timing
Misc: Misc:
- SDL: Remove scancode key input - SDL: Remove scancode key input
- GBA Video: Clean up unused timers - GBA Video: Clean up unused timers

View File

@ -316,11 +316,10 @@ static inline void _immediate(struct ARMCore* cpu, uint32_t opcode) {
#define DEFINE_MULTIPLY_INSTRUCTION_EX_ARM(NAME, BODY, S_BODY) \ #define DEFINE_MULTIPLY_INSTRUCTION_EX_ARM(NAME, BODY, S_BODY) \
DEFINE_INSTRUCTION_ARM(NAME, \ DEFINE_INSTRUCTION_ARM(NAME, \
int rd = (opcode >> 12) & 0xF; \ int rd = (opcode >> 16) & 0xF; \
int rdHi = (opcode >> 16) & 0xF; \
int rs = (opcode >> 8) & 0xF; \ int rs = (opcode >> 8) & 0xF; \
int rm = opcode & 0xF; \ int rm = opcode & 0xF; \
if (rdHi == ARM_PC || rd == ARM_PC) { \ if (rd == ARM_PC) { \
return; \ return; \
} \ } \
ARM_WAIT_MUL(cpu->gprs[rs]); \ ARM_WAIT_MUL(cpu->gprs[rs]); \
@ -328,10 +327,28 @@ static inline void _immediate(struct ARMCore* cpu, uint32_t opcode) {
S_BODY; \ S_BODY; \
currentCycles += cpu->memory.activeNonseqCycles32 - cpu->memory.activeSeqCycles32) currentCycles += cpu->memory.activeNonseqCycles32 - cpu->memory.activeSeqCycles32)
#define DEFINE_MULTIPLY_INSTRUCTION_2_EX_ARM(NAME, BODY, S_BODY, WAIT) \
DEFINE_INSTRUCTION_ARM(NAME, \
int rd = (opcode >> 12) & 0xF; \
int rdHi = (opcode >> 16) & 0xF; \
int rs = (opcode >> 8) & 0xF; \
int rm = opcode & 0xF; \
if (rdHi == ARM_PC || rd == ARM_PC) { \
return; \
} \
currentCycles += cpu->memory.stall(cpu, WAIT); \
BODY; \
S_BODY; \
currentCycles += cpu->memory.activeNonseqCycles32 - cpu->memory.activeSeqCycles32)
#define DEFINE_MULTIPLY_INSTRUCTION_ARM(NAME, BODY, S_BODY) \ #define DEFINE_MULTIPLY_INSTRUCTION_ARM(NAME, BODY, S_BODY) \
DEFINE_MULTIPLY_INSTRUCTION_EX_ARM(NAME, BODY, ) \ DEFINE_MULTIPLY_INSTRUCTION_EX_ARM(NAME, BODY, ) \
DEFINE_MULTIPLY_INSTRUCTION_EX_ARM(NAME ## S, BODY, S_BODY) DEFINE_MULTIPLY_INSTRUCTION_EX_ARM(NAME ## S, BODY, S_BODY)
#define DEFINE_MULTIPLY_INSTRUCTION_2_ARM(NAME, BODY, S_BODY, WAIT) \
DEFINE_MULTIPLY_INSTRUCTION_2_EX_ARM(NAME, BODY, , WAIT) \
DEFINE_MULTIPLY_INSTRUCTION_2_EX_ARM(NAME ## S, BODY, S_BODY, WAIT)
#define DEFINE_LOAD_STORE_INSTRUCTION_EX_ARM(NAME, ADDRESS, WRITEBACK, BODY) \ #define DEFINE_LOAD_STORE_INSTRUCTION_EX_ARM(NAME, ADDRESS, WRITEBACK, BODY) \
DEFINE_INSTRUCTION_ARM(NAME, \ DEFINE_INSTRUCTION_ARM(NAME, \
uint32_t address; \ uint32_t address; \
@ -485,36 +502,36 @@ DEFINE_ALU_INSTRUCTION_S_ONLY_ARM(TST, ARM_NEUTRAL_S(cpu->gprs[rn], cpu->shifter
// Begin multiply definitions // Begin multiply definitions
DEFINE_MULTIPLY_INSTRUCTION_ARM(MLA, cpu->gprs[rdHi] = cpu->gprs[rm] * cpu->gprs[rs] + cpu->gprs[rd], ARM_NEUTRAL_S(, , cpu->gprs[rdHi])) DEFINE_MULTIPLY_INSTRUCTION_2_ARM(MLA, cpu->gprs[rdHi] = cpu->gprs[rm] * cpu->gprs[rs] + cpu->gprs[rd], ARM_NEUTRAL_S(, , cpu->gprs[rdHi]), 2)
DEFINE_MULTIPLY_INSTRUCTION_ARM(MUL, cpu->gprs[rdHi] = cpu->gprs[rm] * cpu->gprs[rs], ARM_NEUTRAL_S(cpu->gprs[rm], cpu->gprs[rs], cpu->gprs[rdHi])) DEFINE_MULTIPLY_INSTRUCTION_ARM(MUL, cpu->gprs[rd] = cpu->gprs[rm] * cpu->gprs[rs], ARM_NEUTRAL_S(cpu->gprs[rm], cpu->gprs[rs], cpu->gprs[rd]))
DEFINE_MULTIPLY_INSTRUCTION_ARM(SMLAL, DEFINE_MULTIPLY_INSTRUCTION_2_ARM(SMLAL,
int64_t d = ((int64_t) cpu->gprs[rm]) * ((int64_t) cpu->gprs[rs]); int64_t d = ((int64_t) cpu->gprs[rm]) * ((int64_t) cpu->gprs[rs]);
int32_t dm = cpu->gprs[rd]; int32_t dm = cpu->gprs[rd];
int32_t dn = d; int32_t dn = d;
cpu->gprs[rd] = dm + dn; cpu->gprs[rd] = dm + dn;
cpu->gprs[rdHi] = cpu->gprs[rdHi] + (d >> 32) + ARM_CARRY_FROM(dm, dn, cpu->gprs[rd]);, cpu->gprs[rdHi] = cpu->gprs[rdHi] + (d >> 32) + ARM_CARRY_FROM(dm, dn, cpu->gprs[rd]);,
ARM_NEUTRAL_HI_S(cpu->gprs[rd], cpu->gprs[rdHi])) ARM_NEUTRAL_HI_S(cpu->gprs[rd], cpu->gprs[rdHi]), 3)
DEFINE_MULTIPLY_INSTRUCTION_ARM(SMULL, DEFINE_MULTIPLY_INSTRUCTION_2_ARM(SMULL,
int64_t d = ((int64_t) cpu->gprs[rm]) * ((int64_t) cpu->gprs[rs]); int64_t d = ((int64_t) cpu->gprs[rm]) * ((int64_t) cpu->gprs[rs]);
cpu->gprs[rd] = d; cpu->gprs[rd] = d;
cpu->gprs[rdHi] = d >> 32;, cpu->gprs[rdHi] = d >> 32;,
ARM_NEUTRAL_HI_S(cpu->gprs[rd], cpu->gprs[rdHi])) ARM_NEUTRAL_HI_S(cpu->gprs[rd], cpu->gprs[rdHi]), 2)
DEFINE_MULTIPLY_INSTRUCTION_ARM(UMLAL, DEFINE_MULTIPLY_INSTRUCTION_2_ARM(UMLAL,
uint64_t d = ARM_UXT_64(cpu->gprs[rm]) * ARM_UXT_64(cpu->gprs[rs]); uint64_t d = ARM_UXT_64(cpu->gprs[rm]) * ARM_UXT_64(cpu->gprs[rs]);
int32_t dm = cpu->gprs[rd]; int32_t dm = cpu->gprs[rd];
int32_t dn = d; int32_t dn = d;
cpu->gprs[rd] = dm + dn; cpu->gprs[rd] = dm + dn;
cpu->gprs[rdHi] = cpu->gprs[rdHi] + (d >> 32) + ARM_CARRY_FROM(dm, dn, cpu->gprs[rd]);, cpu->gprs[rdHi] = cpu->gprs[rdHi] + (d >> 32) + ARM_CARRY_FROM(dm, dn, cpu->gprs[rd]);,
ARM_NEUTRAL_HI_S(cpu->gprs[rd], cpu->gprs[rdHi])) ARM_NEUTRAL_HI_S(cpu->gprs[rd], cpu->gprs[rdHi]), 3)
DEFINE_MULTIPLY_INSTRUCTION_ARM(UMULL, DEFINE_MULTIPLY_INSTRUCTION_2_ARM(UMULL,
uint64_t d = ARM_UXT_64(cpu->gprs[rm]) * ARM_UXT_64(cpu->gprs[rs]); uint64_t d = ARM_UXT_64(cpu->gprs[rm]) * ARM_UXT_64(cpu->gprs[rs]);
cpu->gprs[rd] = d; cpu->gprs[rd] = d;
cpu->gprs[rdHi] = d >> 32;, cpu->gprs[rdHi] = d >> 32;,
ARM_NEUTRAL_HI_S(cpu->gprs[rd], cpu->gprs[rdHi])) ARM_NEUTRAL_HI_S(cpu->gprs[rd], cpu->gprs[rdHi]), 2)
// End multiply definitions // End multiply definitions