ARM7: Fix unsigned multiply timing

This commit is contained in:
Vicki Pfau 2021-09-20 15:59:28 -07:00
parent 1aa5f1b9ff
commit 86fe869087
4 changed files with 34 additions and 18 deletions

View File

@ -11,6 +11,7 @@ Features:
- Discord Rich Presence now supports time elapsed - Discord Rich Presence now supports time elapsed
- Additional scaling shaders - Additional scaling shaders
Emulation fixes: Emulation fixes:
- ARM7: Fix unsigned multiply timing
- GB I/O: Fix incrementing SGB controller when P14 is low (fixes mgba.io/i/2202) - GB I/O: Fix incrementing SGB controller when P14 is low (fixes mgba.io/i/2202)
- GB Memory: Add cursory cartridge open bus emulation (fixes mgba.io/i/2032) - GB Memory: Add cursory cartridge open bus emulation (fixes mgba.io/i/2032)
- GB Video: Render SGB border when unmasking with ATTR/PAL_SET (fixes mgba.io/i/2261) - GB Video: Render SGB border when unmasking with ATTR/PAL_SET (fixes mgba.io/i/2261)

View File

@ -37,7 +37,7 @@
#define ARM_V_ADDITION(M, N, D) (!(ARM_SIGN((M) ^ (N))) && (ARM_SIGN((M) ^ (D)))) #define ARM_V_ADDITION(M, N, D) (!(ARM_SIGN((M) ^ (N))) && (ARM_SIGN((M) ^ (D))))
#define ARM_V_SUBTRACTION(M, N, D) ((ARM_SIGN((M) ^ (N))) && (ARM_SIGN((M) ^ (D)))) #define ARM_V_SUBTRACTION(M, N, D) ((ARM_SIGN((M) ^ (N))) && (ARM_SIGN((M) ^ (D))))
#define ARM_WAIT_MUL(R, WAIT) \ #define ARM_WAIT_SMUL(R, WAIT) \
{ \ { \
int32_t wait = WAIT; \ int32_t wait = WAIT; \
if ((R & 0xFFFFFF00) == 0xFFFFFF00 || !(R & 0xFFFFFF00)) { \ if ((R & 0xFFFFFF00) == 0xFFFFFF00 || !(R & 0xFFFFFF00)) { \
@ -52,6 +52,21 @@
currentCycles += cpu->memory.stall(cpu, wait); \ currentCycles += cpu->memory.stall(cpu, wait); \
} }
#define ARM_WAIT_UMUL(R, WAIT) \
{ \
int32_t wait = WAIT; \
if (!(R & 0xFFFFFF00)) { \
wait += 1; \
} else if (!(R & 0xFFFF0000)) { \
wait += 2; \
} else if (!(R & 0xFF000000)) { \
wait += 3; \
} else { \
wait += 4; \
} \
currentCycles += cpu->memory.stall(cpu, wait); \
}
#define ARM_STUB cpu->irqh.hitStub(cpu, opcode) #define ARM_STUB cpu->irqh.hitStub(cpu, opcode)
#define ARM_ILL cpu->irqh.hitIllegal(cpu, opcode) #define ARM_ILL cpu->irqh.hitIllegal(cpu, opcode)

View File

@ -320,38 +320,38 @@ ATTRIBUTE_NOINLINE static void _neutralS(struct ARMCore* cpu, int32_t d) {
DEFINE_ALU_INSTRUCTION_EX_ARM(NAME ## _ROR, S_BODY, _shiftROR, BODY) \ DEFINE_ALU_INSTRUCTION_EX_ARM(NAME ## _ROR, S_BODY, _shiftROR, BODY) \
DEFINE_ALU_INSTRUCTION_EX_ARM(NAME ## I, S_BODY, _immediate, BODY) DEFINE_ALU_INSTRUCTION_EX_ARM(NAME ## I, S_BODY, _immediate, BODY)
#define DEFINE_MULTIPLY_INSTRUCTION_EX_ARM(NAME, BODY, S_BODY) \ #define DEFINE_MULTIPLY_INSTRUCTION_EX_ARM(NAME, BODY, S_BODY, SIGNED) \
DEFINE_INSTRUCTION_ARM(NAME, \ DEFINE_INSTRUCTION_ARM(NAME, \
int rd = (opcode >> 16) & 0xF; \ int rd = (opcode >> 16) & 0xF; \
int rs = (opcode >> 8) & 0xF; \ int rs = (opcode >> 8) & 0xF; \
int rm = opcode & 0xF; \ int rm = opcode & 0xF; \
if (rd != ARM_PC) { \ if (rd != ARM_PC) { \
ARM_WAIT_MUL(cpu->gprs[rs], 0); \ ARM_WAIT_ ## SIGNED ## MUL(cpu->gprs[rs], 0); \
BODY; \ BODY; \
S_BODY; \ S_BODY; \
} \ } \
currentCycles += cpu->memory.activeNonseqCycles32 - cpu->memory.activeSeqCycles32) currentCycles += cpu->memory.activeNonseqCycles32 - cpu->memory.activeSeqCycles32)
#define DEFINE_MULTIPLY_INSTRUCTION_2_EX_ARM(NAME, BODY, S_BODY, WAIT) \ #define DEFINE_MULTIPLY_INSTRUCTION_2_EX_ARM(NAME, BODY, S_BODY, SIGNED, WAIT) \
DEFINE_INSTRUCTION_ARM(NAME, \ DEFINE_INSTRUCTION_ARM(NAME, \
int rd = (opcode >> 12) & 0xF; \ int rd = (opcode >> 12) & 0xF; \
int rdHi = (opcode >> 16) & 0xF; \ int rdHi = (opcode >> 16) & 0xF; \
int rs = (opcode >> 8) & 0xF; \ int rs = (opcode >> 8) & 0xF; \
int rm = opcode & 0xF; \ int rm = opcode & 0xF; \
if (rdHi != ARM_PC && rd != ARM_PC) { \ if (rdHi != ARM_PC && rd != ARM_PC) { \
ARM_WAIT_MUL(cpu->gprs[rs], WAIT); \ ARM_WAIT_ ## SIGNED ## MUL(cpu->gprs[rs], WAIT); \
BODY; \ BODY; \
S_BODY; \ S_BODY; \
} \ } \
currentCycles += cpu->memory.activeNonseqCycles32 - cpu->memory.activeSeqCycles32) currentCycles += cpu->memory.activeNonseqCycles32 - cpu->memory.activeSeqCycles32)
#define DEFINE_MULTIPLY_INSTRUCTION_ARM(NAME, BODY, S_BODY) \ #define DEFINE_MULTIPLY_INSTRUCTION_ARM(NAME, BODY, S_BODY, SIGNED) \
DEFINE_MULTIPLY_INSTRUCTION_EX_ARM(NAME, BODY, ) \ DEFINE_MULTIPLY_INSTRUCTION_EX_ARM(NAME, BODY, , SIGNED) \
DEFINE_MULTIPLY_INSTRUCTION_EX_ARM(NAME ## S, BODY, S_BODY) DEFINE_MULTIPLY_INSTRUCTION_EX_ARM(NAME ## S, BODY, S_BODY, SIGNED)
#define DEFINE_MULTIPLY_INSTRUCTION_2_ARM(NAME, BODY, S_BODY, WAIT) \ #define DEFINE_MULTIPLY_INSTRUCTION_2_ARM(NAME, BODY, S_BODY, SIGNED, WAIT) \
DEFINE_MULTIPLY_INSTRUCTION_2_EX_ARM(NAME, BODY, , WAIT) \ DEFINE_MULTIPLY_INSTRUCTION_2_EX_ARM(NAME, BODY, , SIGNED, WAIT) \
DEFINE_MULTIPLY_INSTRUCTION_2_EX_ARM(NAME ## S, BODY, S_BODY, WAIT) DEFINE_MULTIPLY_INSTRUCTION_2_EX_ARM(NAME ## S, BODY, S_BODY, SIGNED, WAIT)
#define DEFINE_LOAD_STORE_INSTRUCTION_EX_ARM(NAME, ADDRESS, WRITEBACK, LS, BODY) \ #define DEFINE_LOAD_STORE_INSTRUCTION_EX_ARM(NAME, ADDRESS, WRITEBACK, LS, BODY) \
DEFINE_INSTRUCTION_ARM(NAME, \ DEFINE_INSTRUCTION_ARM(NAME, \
@ -520,34 +520,34 @@ DEFINE_ALU_INSTRUCTION_S_ONLY_ARM(TST, ARM_NEUTRAL_S(n, cpu->shifterOperand, alu
// Begin multiply definitions // Begin multiply definitions
DEFINE_MULTIPLY_INSTRUCTION_2_ARM(MLA, cpu->gprs[rdHi] = cpu->gprs[rm] * cpu->gprs[rs] + cpu->gprs[rd], ARM_NEUTRAL_S(, , cpu->gprs[rdHi]), 1) DEFINE_MULTIPLY_INSTRUCTION_2_ARM(MLA, cpu->gprs[rdHi] = cpu->gprs[rm] * cpu->gprs[rs] + cpu->gprs[rd], ARM_NEUTRAL_S(, , cpu->gprs[rdHi]), S, 1)
DEFINE_MULTIPLY_INSTRUCTION_ARM(MUL, cpu->gprs[rd] = cpu->gprs[rm] * cpu->gprs[rs], ARM_NEUTRAL_S(cpu->gprs[rm], cpu->gprs[rs], cpu->gprs[rd])) DEFINE_MULTIPLY_INSTRUCTION_ARM(MUL, cpu->gprs[rd] = cpu->gprs[rm] * cpu->gprs[rs], ARM_NEUTRAL_S(cpu->gprs[rm], cpu->gprs[rs], cpu->gprs[rd]), S)
DEFINE_MULTIPLY_INSTRUCTION_2_ARM(SMLAL, DEFINE_MULTIPLY_INSTRUCTION_2_ARM(SMLAL,
int64_t d = ((int64_t) cpu->gprs[rm]) * ((int64_t) cpu->gprs[rs]) + ((uint32_t) cpu->gprs[rd]); int64_t d = ((int64_t) cpu->gprs[rm]) * ((int64_t) cpu->gprs[rs]) + ((uint32_t) cpu->gprs[rd]);
int32_t dHi = cpu->gprs[rdHi] + (d >> 32); int32_t dHi = cpu->gprs[rdHi] + (d >> 32);
cpu->gprs[rd] = d; cpu->gprs[rd] = d;
cpu->gprs[rdHi] = dHi;, cpu->gprs[rdHi] = dHi;,
ARM_NEUTRAL_HI_S(cpu->gprs[rd], dHi), 2) ARM_NEUTRAL_HI_S(cpu->gprs[rd], dHi), S, 2)
DEFINE_MULTIPLY_INSTRUCTION_2_ARM(SMULL, DEFINE_MULTIPLY_INSTRUCTION_2_ARM(SMULL,
int64_t d = ((int64_t) cpu->gprs[rm]) * ((int64_t) cpu->gprs[rs]); int64_t d = ((int64_t) cpu->gprs[rm]) * ((int64_t) cpu->gprs[rs]);
cpu->gprs[rd] = d; cpu->gprs[rd] = d;
cpu->gprs[rdHi] = d >> 32;, cpu->gprs[rdHi] = d >> 32;,
ARM_NEUTRAL_HI_S(cpu->gprs[rd], cpu->gprs[rdHi]), 1) ARM_NEUTRAL_HI_S(cpu->gprs[rd], cpu->gprs[rdHi]), S, 1)
DEFINE_MULTIPLY_INSTRUCTION_2_ARM(UMLAL, DEFINE_MULTIPLY_INSTRUCTION_2_ARM(UMLAL,
uint64_t d = ARM_UXT_64(cpu->gprs[rm]) * ARM_UXT_64(cpu->gprs[rs]) + ((uint32_t) cpu->gprs[rd]); uint64_t d = ARM_UXT_64(cpu->gprs[rm]) * ARM_UXT_64(cpu->gprs[rs]) + ((uint32_t) cpu->gprs[rd]);
uint32_t dHi = ((uint32_t) cpu->gprs[rdHi]) + (d >> 32); uint32_t dHi = ((uint32_t) cpu->gprs[rdHi]) + (d >> 32);
cpu->gprs[rd] = d; cpu->gprs[rd] = d;
cpu->gprs[rdHi] = dHi;, cpu->gprs[rdHi] = dHi;,
ARM_NEUTRAL_HI_S(cpu->gprs[rd], dHi), 2) ARM_NEUTRAL_HI_S(cpu->gprs[rd], dHi), U, 2)
DEFINE_MULTIPLY_INSTRUCTION_2_ARM(UMULL, DEFINE_MULTIPLY_INSTRUCTION_2_ARM(UMULL,
uint64_t d = ARM_UXT_64(cpu->gprs[rm]) * ARM_UXT_64(cpu->gprs[rs]); uint64_t d = ARM_UXT_64(cpu->gprs[rm]) * ARM_UXT_64(cpu->gprs[rs]);
cpu->gprs[rd] = d; cpu->gprs[rd] = d;
cpu->gprs[rdHi] = d >> 32;, cpu->gprs[rdHi] = d >> 32;,
ARM_NEUTRAL_HI_S(cpu->gprs[rd], cpu->gprs[rdHi]), 1) ARM_NEUTRAL_HI_S(cpu->gprs[rd], cpu->gprs[rdHi]), U, 1)
// End multiply definitions // End multiply definitions

View File

@ -232,7 +232,7 @@ DEFINE_DATA_FORM_5_INSTRUCTION_THUMB(NEG, THUMB_SUBTRACTION(cpu->gprs[rd], 0, cp
DEFINE_DATA_FORM_5_INSTRUCTION_THUMB(CMP2, int32_t aluOut = cpu->gprs[rd] - cpu->gprs[rn]; THUMB_SUBTRACTION_S(cpu->gprs[rd], cpu->gprs[rn], aluOut)) DEFINE_DATA_FORM_5_INSTRUCTION_THUMB(CMP2, int32_t aluOut = cpu->gprs[rd] - cpu->gprs[rn]; THUMB_SUBTRACTION_S(cpu->gprs[rd], cpu->gprs[rn], aluOut))
DEFINE_DATA_FORM_5_INSTRUCTION_THUMB(CMN, int32_t aluOut = cpu->gprs[rd] + cpu->gprs[rn]; THUMB_ADDITION_S(cpu->gprs[rd], cpu->gprs[rn], aluOut)) DEFINE_DATA_FORM_5_INSTRUCTION_THUMB(CMN, int32_t aluOut = cpu->gprs[rd] + cpu->gprs[rn]; THUMB_ADDITION_S(cpu->gprs[rd], cpu->gprs[rn], aluOut))
DEFINE_DATA_FORM_5_INSTRUCTION_THUMB(ORR, cpu->gprs[rd] = cpu->gprs[rd] | cpu->gprs[rn]; THUMB_NEUTRAL_S( , , cpu->gprs[rd])) DEFINE_DATA_FORM_5_INSTRUCTION_THUMB(ORR, cpu->gprs[rd] = cpu->gprs[rd] | cpu->gprs[rn]; THUMB_NEUTRAL_S( , , cpu->gprs[rd]))
DEFINE_DATA_FORM_5_INSTRUCTION_THUMB(MUL, ARM_WAIT_MUL(cpu->gprs[rd], 0); cpu->gprs[rd] *= cpu->gprs[rn]; THUMB_NEUTRAL_S( , , cpu->gprs[rd]); currentCycles += cpu->memory.activeNonseqCycles16 - cpu->memory.activeSeqCycles16) DEFINE_DATA_FORM_5_INSTRUCTION_THUMB(MUL, ARM_WAIT_SMUL(cpu->gprs[rd], 0); cpu->gprs[rd] *= cpu->gprs[rn]; THUMB_NEUTRAL_S( , , cpu->gprs[rd]); currentCycles += cpu->memory.activeNonseqCycles16 - cpu->memory.activeSeqCycles16)
DEFINE_DATA_FORM_5_INSTRUCTION_THUMB(BIC, cpu->gprs[rd] = cpu->gprs[rd] & ~cpu->gprs[rn]; THUMB_NEUTRAL_S( , , cpu->gprs[rd])) DEFINE_DATA_FORM_5_INSTRUCTION_THUMB(BIC, cpu->gprs[rd] = cpu->gprs[rd] & ~cpu->gprs[rn]; THUMB_NEUTRAL_S( , , cpu->gprs[rd]))
DEFINE_DATA_FORM_5_INSTRUCTION_THUMB(MVN, cpu->gprs[rd] = ~cpu->gprs[rn]; THUMB_NEUTRAL_S( , , cpu->gprs[rd])) DEFINE_DATA_FORM_5_INSTRUCTION_THUMB(MVN, cpu->gprs[rd] = ~cpu->gprs[rn]; THUMB_NEUTRAL_S( , , cpu->gprs[rd]))