diff --git a/src/arm/arm.h b/src/arm/arm.h index ec04f5d72..576feedda 100644 --- a/src/arm/arm.h +++ b/src/arm/arm.h @@ -110,8 +110,7 @@ struct ARMMemory { uint32_t activeSeqCycles16; uint32_t activeNonseqCycles32; uint32_t activeNonseqCycles16; - uint32_t activeUncachedCycles32; - uint32_t activeUncachedCycles16; + int32_t (*stall)(struct ARMCore*, int32_t wait); void (*setActiveRegion)(struct ARMCore*, uint32_t address); }; diff --git a/src/arm/isa-arm.c b/src/arm/isa-arm.c index ca4327c6e..83b59cc70 100644 --- a/src/arm/isa-arm.c +++ b/src/arm/isa-arm.c @@ -259,7 +259,7 @@ static inline void _immediate(struct ARMCore* cpu, uint32_t opcode) { #define ADDR_MODE_4_WRITEBACK_STM cpu->gprs[rn] = address; #define ARM_LOAD_POST_BODY \ - currentCycles += 1 + cpu->memory.activeNonseqCycles32 - cpu->memory.activeSeqCycles32; \ + currentCycles += cpu->memory.activeNonseqCycles32 - cpu->memory.activeSeqCycles32; \ if (rd == ARM_PC) { \ ARM_WRITE_PC; \ } @@ -567,7 +567,7 @@ DEFINE_LOAD_STORE_T_INSTRUCTION_ARM(STRT, DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_ARM(LDM, load, - currentCycles += 1 + cpu->memory.activeNonseqCycles32 - cpu->memory.activeSeqCycles32; + currentCycles += cpu->memory.activeNonseqCycles32 - cpu->memory.activeSeqCycles32; if (rs & 0x8000) { ARM_WRITE_PC; }) diff --git a/src/arm/isa-inlines.h b/src/arm/isa-inlines.h index f9a8daf9d..20eaa1f65 100644 --- a/src/arm/isa-inlines.h +++ b/src/arm/isa-inlines.h @@ -36,14 +36,18 @@ #define ARM_V_SUBTRACTION(M, N, D) ((ARM_SIGN((M) ^ (N))) && (ARM_SIGN((M) ^ (D)))) #define ARM_WAIT_MUL(R) \ - if ((R & 0xFFFFFF00) == 0xFFFFFF00 || !(R & 0xFFFFFF00)) { \ - currentCycles += 1; \ - } else if ((R & 0xFFFF0000) == 0xFFFF0000 || !(R & 0xFFFF0000)) { \ - currentCycles += 2; \ - } else if ((R & 0xFF000000) == 0xFF000000 || !(R & 0xFF000000)) { \ - currentCycles += 3; \ - } else { \ - currentCycles += 4; \ + { \ + int32_t wait; \ + if ((R & 0xFFFFFF00) == 0xFFFFFF00 || !(R & 0xFFFFFF00)) { \ + wait = 1; \ + } else if ((R & 0xFFFF0000) == 0xFFFF0000 || !(R & 0xFFFF0000)) { \ + wait = 2; \ + } else if ((R & 0xFF000000) == 0xFF000000 || !(R & 0xFF000000)) { \ + wait = 3; \ + } else { \ + wait = 4; \ + } \ + currentCycles += cpu->memory.stall(cpu, wait); \ } #define ARM_STUB cpu->irqh.hitStub(cpu, opcode) @@ -55,7 +59,7 @@ LOAD_32(cpu->prefetch[0], cpu->gprs[ARM_PC] & cpu->memory.activeMask, cpu->memory.activeRegion); \ cpu->gprs[ARM_PC] += WORD_SIZE_ARM; \ LOAD_32(cpu->prefetch[1], cpu->gprs[ARM_PC] & cpu->memory.activeMask, cpu->memory.activeRegion); \ - currentCycles += 2 + cpu->memory.activeUncachedCycles32 + cpu->memory.activeSeqCycles32; + currentCycles += 2 + cpu->memory.activeNonseqCycles32 + cpu->memory.activeSeqCycles32; #define THUMB_WRITE_PC \ cpu->gprs[ARM_PC] = (cpu->gprs[ARM_PC] & -WORD_SIZE_THUMB); \ @@ -63,7 +67,7 @@ LOAD_16(cpu->prefetch[0], cpu->gprs[ARM_PC] & cpu->memory.activeMask, cpu->memory.activeRegion); \ cpu->gprs[ARM_PC] += WORD_SIZE_THUMB; \ LOAD_16(cpu->prefetch[1], cpu->gprs[ARM_PC] & cpu->memory.activeMask, cpu->memory.activeRegion); \ - currentCycles += 2 + cpu->memory.activeUncachedCycles16 + cpu->memory.activeSeqCycles16; + currentCycles += 2 + cpu->memory.activeNonseqCycles16 + cpu->memory.activeSeqCycles16; static inline int _ARMModeHasSPSR(enum PrivilegeMode mode) { return mode != MODE_SYSTEM && mode != MODE_USER; diff --git a/src/arm/isa-thumb.c b/src/arm/isa-thumb.c index 9440eaf78..4132303fa 100644 --- a/src/arm/isa-thumb.c +++ b/src/arm/isa-thumb.c @@ -42,7 +42,7 @@ #define THUMB_PREFETCH_CYCLES (1 + cpu->memory.activeSeqCycles16) #define THUMB_LOAD_POST_BODY \ - currentCycles += 1 + cpu->memory.activeNonseqCycles16 - cpu->memory.activeSeqCycles16; + currentCycles += cpu->memory.activeNonseqCycles16 - cpu->memory.activeSeqCycles16; #define THUMB_STORE_POST_BODY \ currentCycles += cpu->memory.activeNonseqCycles16 - cpu->memory.activeSeqCycles16; diff --git a/src/gba/gba.c b/src/gba/gba.c index 596784062..c2305bda5 100644 --- a/src/gba/gba.c +++ b/src/gba/gba.c @@ -91,7 +91,7 @@ static void GBAInit(struct ARMCore* cpu, struct ARMComponent* component) { gba->idleDetectionStep = 0; gba->idleDetectionFailures = 0; - gba->realisticTiming = false; + gba->realisticTiming = true; gba->performingDMA = false; } diff --git a/src/gba/memory.c b/src/gba/memory.c index be843b55e..428c66e4c 100644 --- a/src/gba/memory.c +++ b/src/gba/memory.c @@ -22,6 +22,7 @@ static uint32_t _deadbeef[1] = { 0xE710B710 }; // Illegal instruction on both AR static void GBASetActiveRegion(struct ARMCore* cpu, uint32_t region); static void GBAMemoryServiceDMA(struct GBA* gba, int number, struct GBADMA* info); +static int32_t GBAMemoryStall(struct ARMCore* cpu, int32_t wait); static const char GBA_BASE_WAITSTATES[16] = { 0, 0, 2, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4 }; static const char GBA_BASE_WAITSTATES_32[16] = { 0, 0, 5, 0, 0, 1, 1, 0, 7, 7, 9, 9, 13, 13, 9 }; @@ -41,6 +42,7 @@ void GBAMemoryInit(struct GBA* gba) { cpu->memory.store16 = GBAStore16; cpu->memory.store8 = GBAStore8; cpu->memory.storeMultiple = GBAStoreMultiple; + cpu->memory.stall = GBAMemoryStall; gba->memory.bios = (uint32_t*) hleBios; gba->memory.fullBios = 0; @@ -76,8 +78,6 @@ void GBAMemoryInit(struct GBA* gba) { cpu->memory.activeSeqCycles16 = 0; cpu->memory.activeNonseqCycles32 = 0; cpu->memory.activeNonseqCycles16 = 0; - cpu->memory.activeUncachedCycles32 = 0; - cpu->memory.activeUncachedCycles16 = 0; gba->memory.biosPrefetch = 0; } @@ -278,12 +278,10 @@ static void GBASetActiveRegion(struct ARMCore* cpu, uint32_t address) { } return; } - cpu->memory.activeSeqCycles32 = memory->waitstatesPrefetchSeq32[newRegion]; - cpu->memory.activeSeqCycles16 = memory->waitstatesPrefetchSeq16[newRegion]; - cpu->memory.activeNonseqCycles32 = memory->waitstatesPrefetchNonseq32[newRegion]; - cpu->memory.activeNonseqCycles16 = memory->waitstatesPrefetchNonseq16[newRegion]; - cpu->memory.activeUncachedCycles32 = memory->waitstatesNonseq32[newRegion]; - cpu->memory.activeUncachedCycles16 = memory->waitstatesNonseq16[newRegion]; + cpu->memory.activeSeqCycles32 = memory->waitstatesSeq32[memory->activeRegion]; + cpu->memory.activeSeqCycles16 = memory->waitstatesSeq16[memory->activeRegion]; + cpu->memory.activeNonseqCycles32 = memory->waitstatesNonseq32[memory->activeRegion]; + cpu->memory.activeNonseqCycles16 = memory->waitstatesNonseq16[memory->activeRegion]; } #define LOAD_BAD \ @@ -412,7 +410,11 @@ uint32_t GBALoad32(struct ARMCore* cpu, uint32_t address, int* cycleCounter) { } if (cycleCounter) { - *cycleCounter += 1 + wait; + wait += 2; + if (address >> BASE_OFFSET < REGION_CART0) { + wait = GBAMemoryStall(cpu, wait); + } + *cycleCounter += wait; } // Unaligned 32-bit loads are "rotated" so they make some semblance of sense int rotate = (address & 3) << 3; @@ -503,7 +505,11 @@ uint32_t GBALoad16(struct ARMCore* cpu, uint32_t address, int* cycleCounter) { } if (cycleCounter) { - *cycleCounter += 1 + wait; + wait += 2; + if (address >> BASE_OFFSET < REGION_CART0) { + wait = GBAMemoryStall(cpu, wait); + } + *cycleCounter += wait; } // Unaligned 16-bit loads are "unpredictable", but the GBA rotates them, so we have to, too. int rotate = (address & 1) << 3; @@ -595,7 +601,11 @@ uint32_t GBALoad8(struct ARMCore* cpu, uint32_t address, int* cycleCounter) { } if (cycleCounter) { - *cycleCounter += 1 + wait; + wait += 2; + if (address >> BASE_OFFSET < REGION_CART0) { + wait = GBAMemoryStall(cpu, wait); + } + *cycleCounter += wait; } return value; } @@ -682,7 +692,11 @@ void GBAStore32(struct ARMCore* cpu, uint32_t address, int32_t value, int* cycle } if (cycleCounter) { - *cycleCounter += 1 + wait; + ++wait; + if (address >> BASE_OFFSET < REGION_CART0) { + wait = GBAMemoryStall(cpu, wait); + } + *cycleCounter += wait; } } @@ -742,7 +756,11 @@ void GBAStore16(struct ARMCore* cpu, uint32_t address, int16_t value, int* cycle } if (cycleCounter) { - *cycleCounter += 1 + wait; + ++wait; + if (address >> BASE_OFFSET < REGION_CART0) { + wait = GBAMemoryStall(cpu, wait); + } + *cycleCounter += wait; } } @@ -808,7 +826,11 @@ void GBAStore8(struct ARMCore* cpu, uint32_t address, int8_t value, int* cycleCo } if (cycleCounter) { - *cycleCounter += 1 + wait; + ++wait; + if (address >> BASE_OFFSET < REGION_CART0) { + wait = GBAMemoryStall(cpu, wait); + } + *cycleCounter += wait; } } @@ -1100,6 +1122,10 @@ uint32_t GBALoadMultiple(struct ARMCore* cpu, uint32_t address, int mask, enum L } if (cycleCounter) { + ++wait; + if (address >> BASE_OFFSET < REGION_CART0) { + wait = GBAMemoryStall(cpu, wait); + } *cycleCounter += wait; } @@ -1206,6 +1232,9 @@ uint32_t GBAStoreMultiple(struct ARMCore* cpu, uint32_t address, int mask, enum } if (cycleCounter) { + if (address >> BASE_OFFSET < REGION_CART0) { + wait = GBAMemoryStall(cpu, wait); + } *cycleCounter += wait; } @@ -1253,50 +1282,13 @@ void GBAAdjustWaitstates(struct GBA* gba, uint16_t parameters) { memory->waitstatesSeq32[REGION_CART1] = memory->waitstatesSeq32[REGION_CART1_EX] = 2 * memory->waitstatesSeq16[REGION_CART1] + 1; memory->waitstatesSeq32[REGION_CART2] = memory->waitstatesSeq32[REGION_CART2_EX] = 2 * memory->waitstatesSeq16[REGION_CART2] + 1; - if (!prefetch) { - memory->waitstatesPrefetchSeq16[REGION_CART0] = memory->waitstatesPrefetchSeq16[REGION_CART0_EX] = memory->waitstatesSeq16[REGION_CART0]; - memory->waitstatesPrefetchSeq16[REGION_CART1] = memory->waitstatesPrefetchSeq16[REGION_CART1_EX] = memory->waitstatesSeq16[REGION_CART1]; - memory->waitstatesPrefetchSeq16[REGION_CART2] = memory->waitstatesPrefetchSeq16[REGION_CART2_EX] = memory->waitstatesSeq16[REGION_CART2]; + memory->prefetch = prefetch; - memory->waitstatesPrefetchSeq32[REGION_CART0] = memory->waitstatesPrefetchSeq32[REGION_CART0_EX] = memory->waitstatesSeq32[REGION_CART0]; - memory->waitstatesPrefetchSeq32[REGION_CART1] = memory->waitstatesPrefetchSeq32[REGION_CART1_EX] = memory->waitstatesSeq32[REGION_CART1]; - memory->waitstatesPrefetchSeq32[REGION_CART2] = memory->waitstatesPrefetchSeq32[REGION_CART2_EX] = memory->waitstatesSeq32[REGION_CART2]; + cpu->memory.activeSeqCycles32 = memory->waitstatesSeq32[memory->activeRegion]; + cpu->memory.activeSeqCycles16 = memory->waitstatesSeq16[memory->activeRegion]; - memory->waitstatesPrefetchNonseq16[REGION_CART0] = memory->waitstatesPrefetchNonseq16[REGION_CART0_EX] = memory->waitstatesNonseq16[REGION_CART0]; - memory->waitstatesPrefetchNonseq16[REGION_CART1] = memory->waitstatesPrefetchNonseq16[REGION_CART1_EX] = memory->waitstatesNonseq16[REGION_CART1]; - memory->waitstatesPrefetchNonseq16[REGION_CART2] = memory->waitstatesPrefetchNonseq16[REGION_CART2_EX] = memory->waitstatesNonseq16[REGION_CART2]; - - memory->waitstatesPrefetchNonseq32[REGION_CART0] = memory->waitstatesPrefetchNonseq32[REGION_CART0_EX] = memory->waitstatesNonseq32[REGION_CART0]; - memory->waitstatesPrefetchNonseq32[REGION_CART1] = memory->waitstatesPrefetchNonseq32[REGION_CART1_EX] = memory->waitstatesNonseq32[REGION_CART1]; - memory->waitstatesPrefetchNonseq32[REGION_CART2] = memory->waitstatesPrefetchNonseq32[REGION_CART2_EX] = memory->waitstatesNonseq32[REGION_CART2]; - } else { - // Assume it stalls one cycle to pull a value from the prefetch - // This needs more research to tell if it's accurate or not - memory->waitstatesPrefetchSeq16[REGION_CART0] = memory->waitstatesPrefetchSeq16[REGION_CART0_EX] = 1; - memory->waitstatesPrefetchSeq16[REGION_CART1] = memory->waitstatesPrefetchSeq16[REGION_CART1_EX] = 1; - memory->waitstatesPrefetchSeq16[REGION_CART2] = memory->waitstatesPrefetchSeq16[REGION_CART2_EX] = 1; - - memory->waitstatesPrefetchSeq32[REGION_CART0] = memory->waitstatesPrefetchSeq32[REGION_CART0_EX] = 2; - memory->waitstatesPrefetchSeq32[REGION_CART1] = memory->waitstatesPrefetchSeq32[REGION_CART1_EX] = 2; - memory->waitstatesPrefetchSeq32[REGION_CART2] = memory->waitstatesPrefetchSeq32[REGION_CART2_EX] = 2; - - memory->waitstatesPrefetchNonseq16[REGION_CART0] = memory->waitstatesPrefetchNonseq16[REGION_CART0_EX] = 1; - memory->waitstatesPrefetchNonseq16[REGION_CART1] = memory->waitstatesPrefetchNonseq16[REGION_CART1_EX] = 1; - memory->waitstatesPrefetchNonseq16[REGION_CART2] = memory->waitstatesPrefetchNonseq16[REGION_CART2_EX] = 1; - - memory->waitstatesPrefetchNonseq32[REGION_CART0] = memory->waitstatesPrefetchNonseq32[REGION_CART0_EX] = 2; - memory->waitstatesPrefetchNonseq32[REGION_CART1] = memory->waitstatesPrefetchNonseq32[REGION_CART1_EX] = 2; - memory->waitstatesPrefetchNonseq32[REGION_CART2] = memory->waitstatesPrefetchNonseq32[REGION_CART2_EX] = 2; - } - - cpu->memory.activeSeqCycles32 = memory->waitstatesPrefetchSeq32[memory->activeRegion]; - cpu->memory.activeSeqCycles16 = memory->waitstatesPrefetchSeq16[memory->activeRegion]; - - cpu->memory.activeNonseqCycles32 = memory->waitstatesPrefetchNonseq32[memory->activeRegion]; - cpu->memory.activeNonseqCycles16 = memory->waitstatesPrefetchNonseq16[memory->activeRegion]; - - cpu->memory.activeUncachedCycles32 = memory->waitstatesNonseq32[memory->activeRegion]; - cpu->memory.activeUncachedCycles16 = memory->waitstatesNonseq16[memory->activeRegion]; + cpu->memory.activeNonseqCycles32 = memory->waitstatesNonseq32[memory->activeRegion]; + cpu->memory.activeNonseqCycles16 = memory->waitstatesNonseq16[memory->activeRegion]; } void GBAMemoryWriteDMASAD(struct GBA* gba, int dma, uint32_t address) { @@ -1528,6 +1520,31 @@ void GBAMemoryServiceDMA(struct GBA* gba, int number, struct GBADMA* info) { cpu->cycles += cycles; } +int32_t GBAMemoryStall(struct ARMCore* cpu, int32_t wait) { + struct GBA* gba = (struct GBA*) cpu->master; + struct GBAMemory* memory = &gba->memory; + + if (!memory->prefetch || memory->activeRegion < REGION_CART0) { + return wait; + } + + int32_t stall = 5 - memory->waitstatesSeq16[memory->activeRegion]; // Figure out where this value comes from + + // Base number of cycles for this insn is N + int32_t base = memory->waitstatesSeq16[memory->activeRegion] + 1; + if (cpu->executionMode == MODE_ARM) { + base <<= 1; + } + if (base <= wait) { + --base; + } else { + base = wait; + } + + cpu->cycles -= stall + base - 1; + return wait; +} + void GBAMemorySerialize(const struct GBAMemory* memory, struct GBASerializedState* state) { memcpy(state->wram, memory->wram, SIZE_WORKING_RAM); memcpy(state->iwram, memory->iwram, SIZE_WORKING_IRAM); diff --git a/src/gba/memory.h b/src/gba/memory.h index 053e71f20..f135cf0b9 100644 --- a/src/gba/memory.h +++ b/src/gba/memory.h @@ -131,6 +131,7 @@ struct GBAMemory { char waitstatesPrefetchNonseq32[16]; char waitstatesPrefetchNonseq16[16]; int activeRegion; + bool prefetch; uint32_t biosPrefetch; struct GBADMA dma[4];