mirror of https://github.com/mgba-emu/mgba.git
GBA Memory: Implement game pak prefetch approximation
This commit is contained in:
parent
bdb7635156
commit
051af279c9
|
@ -110,8 +110,7 @@ struct ARMMemory {
|
|||
uint32_t activeSeqCycles16;
|
||||
uint32_t activeNonseqCycles32;
|
||||
uint32_t activeNonseqCycles16;
|
||||
uint32_t activeUncachedCycles32;
|
||||
uint32_t activeUncachedCycles16;
|
||||
int32_t (*stall)(struct ARMCore*, int32_t wait);
|
||||
void (*setActiveRegion)(struct ARMCore*, uint32_t address);
|
||||
};
|
||||
|
||||
|
|
|
@ -259,7 +259,7 @@ static inline void _immediate(struct ARMCore* cpu, uint32_t opcode) {
|
|||
#define ADDR_MODE_4_WRITEBACK_STM cpu->gprs[rn] = address;
|
||||
|
||||
#define ARM_LOAD_POST_BODY \
|
||||
currentCycles += 1 + cpu->memory.activeNonseqCycles32 - cpu->memory.activeSeqCycles32; \
|
||||
currentCycles += cpu->memory.activeNonseqCycles32 - cpu->memory.activeSeqCycles32; \
|
||||
if (rd == ARM_PC) { \
|
||||
ARM_WRITE_PC; \
|
||||
}
|
||||
|
@ -567,7 +567,7 @@ DEFINE_LOAD_STORE_T_INSTRUCTION_ARM(STRT,
|
|||
|
||||
DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_ARM(LDM,
|
||||
load,
|
||||
currentCycles += 1 + cpu->memory.activeNonseqCycles32 - cpu->memory.activeSeqCycles32;
|
||||
currentCycles += cpu->memory.activeNonseqCycles32 - cpu->memory.activeSeqCycles32;
|
||||
if (rs & 0x8000) {
|
||||
ARM_WRITE_PC;
|
||||
})
|
||||
|
|
|
@ -36,14 +36,18 @@
|
|||
#define ARM_V_SUBTRACTION(M, N, D) ((ARM_SIGN((M) ^ (N))) && (ARM_SIGN((M) ^ (D))))
|
||||
|
||||
#define ARM_WAIT_MUL(R) \
|
||||
if ((R & 0xFFFFFF00) == 0xFFFFFF00 || !(R & 0xFFFFFF00)) { \
|
||||
currentCycles += 1; \
|
||||
} else if ((R & 0xFFFF0000) == 0xFFFF0000 || !(R & 0xFFFF0000)) { \
|
||||
currentCycles += 2; \
|
||||
} else if ((R & 0xFF000000) == 0xFF000000 || !(R & 0xFF000000)) { \
|
||||
currentCycles += 3; \
|
||||
} else { \
|
||||
currentCycles += 4; \
|
||||
{ \
|
||||
int32_t wait; \
|
||||
if ((R & 0xFFFFFF00) == 0xFFFFFF00 || !(R & 0xFFFFFF00)) { \
|
||||
wait = 1; \
|
||||
} else if ((R & 0xFFFF0000) == 0xFFFF0000 || !(R & 0xFFFF0000)) { \
|
||||
wait = 2; \
|
||||
} else if ((R & 0xFF000000) == 0xFF000000 || !(R & 0xFF000000)) { \
|
||||
wait = 3; \
|
||||
} else { \
|
||||
wait = 4; \
|
||||
} \
|
||||
currentCycles += cpu->memory.stall(cpu, wait); \
|
||||
}
|
||||
|
||||
#define ARM_STUB cpu->irqh.hitStub(cpu, opcode)
|
||||
|
@ -55,7 +59,7 @@
|
|||
LOAD_32(cpu->prefetch[0], cpu->gprs[ARM_PC] & cpu->memory.activeMask, cpu->memory.activeRegion); \
|
||||
cpu->gprs[ARM_PC] += WORD_SIZE_ARM; \
|
||||
LOAD_32(cpu->prefetch[1], cpu->gprs[ARM_PC] & cpu->memory.activeMask, cpu->memory.activeRegion); \
|
||||
currentCycles += 2 + cpu->memory.activeUncachedCycles32 + cpu->memory.activeSeqCycles32;
|
||||
currentCycles += 2 + cpu->memory.activeNonseqCycles32 + cpu->memory.activeSeqCycles32;
|
||||
|
||||
#define THUMB_WRITE_PC \
|
||||
cpu->gprs[ARM_PC] = (cpu->gprs[ARM_PC] & -WORD_SIZE_THUMB); \
|
||||
|
@ -63,7 +67,7 @@
|
|||
LOAD_16(cpu->prefetch[0], cpu->gprs[ARM_PC] & cpu->memory.activeMask, cpu->memory.activeRegion); \
|
||||
cpu->gprs[ARM_PC] += WORD_SIZE_THUMB; \
|
||||
LOAD_16(cpu->prefetch[1], cpu->gprs[ARM_PC] & cpu->memory.activeMask, cpu->memory.activeRegion); \
|
||||
currentCycles += 2 + cpu->memory.activeUncachedCycles16 + cpu->memory.activeSeqCycles16;
|
||||
currentCycles += 2 + cpu->memory.activeNonseqCycles16 + cpu->memory.activeSeqCycles16;
|
||||
|
||||
static inline int _ARMModeHasSPSR(enum PrivilegeMode mode) {
|
||||
return mode != MODE_SYSTEM && mode != MODE_USER;
|
||||
|
|
|
@ -42,7 +42,7 @@
|
|||
#define THUMB_PREFETCH_CYCLES (1 + cpu->memory.activeSeqCycles16)
|
||||
|
||||
#define THUMB_LOAD_POST_BODY \
|
||||
currentCycles += 1 + cpu->memory.activeNonseqCycles16 - cpu->memory.activeSeqCycles16;
|
||||
currentCycles += cpu->memory.activeNonseqCycles16 - cpu->memory.activeSeqCycles16;
|
||||
|
||||
#define THUMB_STORE_POST_BODY \
|
||||
currentCycles += cpu->memory.activeNonseqCycles16 - cpu->memory.activeSeqCycles16;
|
||||
|
|
|
@ -91,7 +91,7 @@ static void GBAInit(struct ARMCore* cpu, struct ARMComponent* component) {
|
|||
gba->idleDetectionStep = 0;
|
||||
gba->idleDetectionFailures = 0;
|
||||
|
||||
gba->realisticTiming = false;
|
||||
gba->realisticTiming = true;
|
||||
|
||||
gba->performingDMA = false;
|
||||
}
|
||||
|
|
129
src/gba/memory.c
129
src/gba/memory.c
|
@ -22,6 +22,7 @@ static uint32_t _deadbeef[1] = { 0xE710B710 }; // Illegal instruction on both AR
|
|||
|
||||
static void GBASetActiveRegion(struct ARMCore* cpu, uint32_t region);
|
||||
static void GBAMemoryServiceDMA(struct GBA* gba, int number, struct GBADMA* info);
|
||||
static int32_t GBAMemoryStall(struct ARMCore* cpu, int32_t wait);
|
||||
|
||||
static const char GBA_BASE_WAITSTATES[16] = { 0, 0, 2, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4 };
|
||||
static const char GBA_BASE_WAITSTATES_32[16] = { 0, 0, 5, 0, 0, 1, 1, 0, 7, 7, 9, 9, 13, 13, 9 };
|
||||
|
@ -41,6 +42,7 @@ void GBAMemoryInit(struct GBA* gba) {
|
|||
cpu->memory.store16 = GBAStore16;
|
||||
cpu->memory.store8 = GBAStore8;
|
||||
cpu->memory.storeMultiple = GBAStoreMultiple;
|
||||
cpu->memory.stall = GBAMemoryStall;
|
||||
|
||||
gba->memory.bios = (uint32_t*) hleBios;
|
||||
gba->memory.fullBios = 0;
|
||||
|
@ -76,8 +78,6 @@ void GBAMemoryInit(struct GBA* gba) {
|
|||
cpu->memory.activeSeqCycles16 = 0;
|
||||
cpu->memory.activeNonseqCycles32 = 0;
|
||||
cpu->memory.activeNonseqCycles16 = 0;
|
||||
cpu->memory.activeUncachedCycles32 = 0;
|
||||
cpu->memory.activeUncachedCycles16 = 0;
|
||||
gba->memory.biosPrefetch = 0;
|
||||
}
|
||||
|
||||
|
@ -278,12 +278,10 @@ static void GBASetActiveRegion(struct ARMCore* cpu, uint32_t address) {
|
|||
}
|
||||
return;
|
||||
}
|
||||
cpu->memory.activeSeqCycles32 = memory->waitstatesPrefetchSeq32[newRegion];
|
||||
cpu->memory.activeSeqCycles16 = memory->waitstatesPrefetchSeq16[newRegion];
|
||||
cpu->memory.activeNonseqCycles32 = memory->waitstatesPrefetchNonseq32[newRegion];
|
||||
cpu->memory.activeNonseqCycles16 = memory->waitstatesPrefetchNonseq16[newRegion];
|
||||
cpu->memory.activeUncachedCycles32 = memory->waitstatesNonseq32[newRegion];
|
||||
cpu->memory.activeUncachedCycles16 = memory->waitstatesNonseq16[newRegion];
|
||||
cpu->memory.activeSeqCycles32 = memory->waitstatesSeq32[memory->activeRegion];
|
||||
cpu->memory.activeSeqCycles16 = memory->waitstatesSeq16[memory->activeRegion];
|
||||
cpu->memory.activeNonseqCycles32 = memory->waitstatesNonseq32[memory->activeRegion];
|
||||
cpu->memory.activeNonseqCycles16 = memory->waitstatesNonseq16[memory->activeRegion];
|
||||
}
|
||||
|
||||
#define LOAD_BAD \
|
||||
|
@ -412,7 +410,11 @@ uint32_t GBALoad32(struct ARMCore* cpu, uint32_t address, int* cycleCounter) {
|
|||
}
|
||||
|
||||
if (cycleCounter) {
|
||||
*cycleCounter += 1 + wait;
|
||||
wait += 2;
|
||||
if (address >> BASE_OFFSET < REGION_CART0) {
|
||||
wait = GBAMemoryStall(cpu, wait);
|
||||
}
|
||||
*cycleCounter += wait;
|
||||
}
|
||||
// Unaligned 32-bit loads are "rotated" so they make some semblance of sense
|
||||
int rotate = (address & 3) << 3;
|
||||
|
@ -503,7 +505,11 @@ uint32_t GBALoad16(struct ARMCore* cpu, uint32_t address, int* cycleCounter) {
|
|||
}
|
||||
|
||||
if (cycleCounter) {
|
||||
*cycleCounter += 1 + wait;
|
||||
wait += 2;
|
||||
if (address >> BASE_OFFSET < REGION_CART0) {
|
||||
wait = GBAMemoryStall(cpu, wait);
|
||||
}
|
||||
*cycleCounter += wait;
|
||||
}
|
||||
// Unaligned 16-bit loads are "unpredictable", but the GBA rotates them, so we have to, too.
|
||||
int rotate = (address & 1) << 3;
|
||||
|
@ -595,7 +601,11 @@ uint32_t GBALoad8(struct ARMCore* cpu, uint32_t address, int* cycleCounter) {
|
|||
}
|
||||
|
||||
if (cycleCounter) {
|
||||
*cycleCounter += 1 + wait;
|
||||
wait += 2;
|
||||
if (address >> BASE_OFFSET < REGION_CART0) {
|
||||
wait = GBAMemoryStall(cpu, wait);
|
||||
}
|
||||
*cycleCounter += wait;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
@ -682,7 +692,11 @@ void GBAStore32(struct ARMCore* cpu, uint32_t address, int32_t value, int* cycle
|
|||
}
|
||||
|
||||
if (cycleCounter) {
|
||||
*cycleCounter += 1 + wait;
|
||||
++wait;
|
||||
if (address >> BASE_OFFSET < REGION_CART0) {
|
||||
wait = GBAMemoryStall(cpu, wait);
|
||||
}
|
||||
*cycleCounter += wait;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -742,7 +756,11 @@ void GBAStore16(struct ARMCore* cpu, uint32_t address, int16_t value, int* cycle
|
|||
}
|
||||
|
||||
if (cycleCounter) {
|
||||
*cycleCounter += 1 + wait;
|
||||
++wait;
|
||||
if (address >> BASE_OFFSET < REGION_CART0) {
|
||||
wait = GBAMemoryStall(cpu, wait);
|
||||
}
|
||||
*cycleCounter += wait;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -808,7 +826,11 @@ void GBAStore8(struct ARMCore* cpu, uint32_t address, int8_t value, int* cycleCo
|
|||
}
|
||||
|
||||
if (cycleCounter) {
|
||||
*cycleCounter += 1 + wait;
|
||||
++wait;
|
||||
if (address >> BASE_OFFSET < REGION_CART0) {
|
||||
wait = GBAMemoryStall(cpu, wait);
|
||||
}
|
||||
*cycleCounter += wait;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1100,6 +1122,10 @@ uint32_t GBALoadMultiple(struct ARMCore* cpu, uint32_t address, int mask, enum L
|
|||
}
|
||||
|
||||
if (cycleCounter) {
|
||||
++wait;
|
||||
if (address >> BASE_OFFSET < REGION_CART0) {
|
||||
wait = GBAMemoryStall(cpu, wait);
|
||||
}
|
||||
*cycleCounter += wait;
|
||||
}
|
||||
|
||||
|
@ -1206,6 +1232,9 @@ uint32_t GBAStoreMultiple(struct ARMCore* cpu, uint32_t address, int mask, enum
|
|||
}
|
||||
|
||||
if (cycleCounter) {
|
||||
if (address >> BASE_OFFSET < REGION_CART0) {
|
||||
wait = GBAMemoryStall(cpu, wait);
|
||||
}
|
||||
*cycleCounter += wait;
|
||||
}
|
||||
|
||||
|
@ -1253,50 +1282,13 @@ void GBAAdjustWaitstates(struct GBA* gba, uint16_t parameters) {
|
|||
memory->waitstatesSeq32[REGION_CART1] = memory->waitstatesSeq32[REGION_CART1_EX] = 2 * memory->waitstatesSeq16[REGION_CART1] + 1;
|
||||
memory->waitstatesSeq32[REGION_CART2] = memory->waitstatesSeq32[REGION_CART2_EX] = 2 * memory->waitstatesSeq16[REGION_CART2] + 1;
|
||||
|
||||
if (!prefetch) {
|
||||
memory->waitstatesPrefetchSeq16[REGION_CART0] = memory->waitstatesPrefetchSeq16[REGION_CART0_EX] = memory->waitstatesSeq16[REGION_CART0];
|
||||
memory->waitstatesPrefetchSeq16[REGION_CART1] = memory->waitstatesPrefetchSeq16[REGION_CART1_EX] = memory->waitstatesSeq16[REGION_CART1];
|
||||
memory->waitstatesPrefetchSeq16[REGION_CART2] = memory->waitstatesPrefetchSeq16[REGION_CART2_EX] = memory->waitstatesSeq16[REGION_CART2];
|
||||
memory->prefetch = prefetch;
|
||||
|
||||
memory->waitstatesPrefetchSeq32[REGION_CART0] = memory->waitstatesPrefetchSeq32[REGION_CART0_EX] = memory->waitstatesSeq32[REGION_CART0];
|
||||
memory->waitstatesPrefetchSeq32[REGION_CART1] = memory->waitstatesPrefetchSeq32[REGION_CART1_EX] = memory->waitstatesSeq32[REGION_CART1];
|
||||
memory->waitstatesPrefetchSeq32[REGION_CART2] = memory->waitstatesPrefetchSeq32[REGION_CART2_EX] = memory->waitstatesSeq32[REGION_CART2];
|
||||
cpu->memory.activeSeqCycles32 = memory->waitstatesSeq32[memory->activeRegion];
|
||||
cpu->memory.activeSeqCycles16 = memory->waitstatesSeq16[memory->activeRegion];
|
||||
|
||||
memory->waitstatesPrefetchNonseq16[REGION_CART0] = memory->waitstatesPrefetchNonseq16[REGION_CART0_EX] = memory->waitstatesNonseq16[REGION_CART0];
|
||||
memory->waitstatesPrefetchNonseq16[REGION_CART1] = memory->waitstatesPrefetchNonseq16[REGION_CART1_EX] = memory->waitstatesNonseq16[REGION_CART1];
|
||||
memory->waitstatesPrefetchNonseq16[REGION_CART2] = memory->waitstatesPrefetchNonseq16[REGION_CART2_EX] = memory->waitstatesNonseq16[REGION_CART2];
|
||||
|
||||
memory->waitstatesPrefetchNonseq32[REGION_CART0] = memory->waitstatesPrefetchNonseq32[REGION_CART0_EX] = memory->waitstatesNonseq32[REGION_CART0];
|
||||
memory->waitstatesPrefetchNonseq32[REGION_CART1] = memory->waitstatesPrefetchNonseq32[REGION_CART1_EX] = memory->waitstatesNonseq32[REGION_CART1];
|
||||
memory->waitstatesPrefetchNonseq32[REGION_CART2] = memory->waitstatesPrefetchNonseq32[REGION_CART2_EX] = memory->waitstatesNonseq32[REGION_CART2];
|
||||
} else {
|
||||
// Assume it stalls one cycle to pull a value from the prefetch
|
||||
// This needs more research to tell if it's accurate or not
|
||||
memory->waitstatesPrefetchSeq16[REGION_CART0] = memory->waitstatesPrefetchSeq16[REGION_CART0_EX] = 1;
|
||||
memory->waitstatesPrefetchSeq16[REGION_CART1] = memory->waitstatesPrefetchSeq16[REGION_CART1_EX] = 1;
|
||||
memory->waitstatesPrefetchSeq16[REGION_CART2] = memory->waitstatesPrefetchSeq16[REGION_CART2_EX] = 1;
|
||||
|
||||
memory->waitstatesPrefetchSeq32[REGION_CART0] = memory->waitstatesPrefetchSeq32[REGION_CART0_EX] = 2;
|
||||
memory->waitstatesPrefetchSeq32[REGION_CART1] = memory->waitstatesPrefetchSeq32[REGION_CART1_EX] = 2;
|
||||
memory->waitstatesPrefetchSeq32[REGION_CART2] = memory->waitstatesPrefetchSeq32[REGION_CART2_EX] = 2;
|
||||
|
||||
memory->waitstatesPrefetchNonseq16[REGION_CART0] = memory->waitstatesPrefetchNonseq16[REGION_CART0_EX] = 1;
|
||||
memory->waitstatesPrefetchNonseq16[REGION_CART1] = memory->waitstatesPrefetchNonseq16[REGION_CART1_EX] = 1;
|
||||
memory->waitstatesPrefetchNonseq16[REGION_CART2] = memory->waitstatesPrefetchNonseq16[REGION_CART2_EX] = 1;
|
||||
|
||||
memory->waitstatesPrefetchNonseq32[REGION_CART0] = memory->waitstatesPrefetchNonseq32[REGION_CART0_EX] = 2;
|
||||
memory->waitstatesPrefetchNonseq32[REGION_CART1] = memory->waitstatesPrefetchNonseq32[REGION_CART1_EX] = 2;
|
||||
memory->waitstatesPrefetchNonseq32[REGION_CART2] = memory->waitstatesPrefetchNonseq32[REGION_CART2_EX] = 2;
|
||||
}
|
||||
|
||||
cpu->memory.activeSeqCycles32 = memory->waitstatesPrefetchSeq32[memory->activeRegion];
|
||||
cpu->memory.activeSeqCycles16 = memory->waitstatesPrefetchSeq16[memory->activeRegion];
|
||||
|
||||
cpu->memory.activeNonseqCycles32 = memory->waitstatesPrefetchNonseq32[memory->activeRegion];
|
||||
cpu->memory.activeNonseqCycles16 = memory->waitstatesPrefetchNonseq16[memory->activeRegion];
|
||||
|
||||
cpu->memory.activeUncachedCycles32 = memory->waitstatesNonseq32[memory->activeRegion];
|
||||
cpu->memory.activeUncachedCycles16 = memory->waitstatesNonseq16[memory->activeRegion];
|
||||
cpu->memory.activeNonseqCycles32 = memory->waitstatesNonseq32[memory->activeRegion];
|
||||
cpu->memory.activeNonseqCycles16 = memory->waitstatesNonseq16[memory->activeRegion];
|
||||
}
|
||||
|
||||
void GBAMemoryWriteDMASAD(struct GBA* gba, int dma, uint32_t address) {
|
||||
|
@ -1528,6 +1520,31 @@ void GBAMemoryServiceDMA(struct GBA* gba, int number, struct GBADMA* info) {
|
|||
cpu->cycles += cycles;
|
||||
}
|
||||
|
||||
int32_t GBAMemoryStall(struct ARMCore* cpu, int32_t wait) {
|
||||
struct GBA* gba = (struct GBA*) cpu->master;
|
||||
struct GBAMemory* memory = &gba->memory;
|
||||
|
||||
if (!memory->prefetch || memory->activeRegion < REGION_CART0) {
|
||||
return wait;
|
||||
}
|
||||
|
||||
int32_t stall = 5 - memory->waitstatesSeq16[memory->activeRegion]; // Figure out where this value comes from
|
||||
|
||||
// Base number of cycles for this insn is N
|
||||
int32_t base = memory->waitstatesSeq16[memory->activeRegion] + 1;
|
||||
if (cpu->executionMode == MODE_ARM) {
|
||||
base <<= 1;
|
||||
}
|
||||
if (base <= wait) {
|
||||
--base;
|
||||
} else {
|
||||
base = wait;
|
||||
}
|
||||
|
||||
cpu->cycles -= stall + base - 1;
|
||||
return wait;
|
||||
}
|
||||
|
||||
void GBAMemorySerialize(const struct GBAMemory* memory, struct GBASerializedState* state) {
|
||||
memcpy(state->wram, memory->wram, SIZE_WORKING_RAM);
|
||||
memcpy(state->iwram, memory->iwram, SIZE_WORKING_IRAM);
|
||||
|
|
|
@ -131,6 +131,7 @@ struct GBAMemory {
|
|||
char waitstatesPrefetchNonseq32[16];
|
||||
char waitstatesPrefetchNonseq16[16];
|
||||
int activeRegion;
|
||||
bool prefetch;
|
||||
uint32_t biosPrefetch;
|
||||
|
||||
struct GBADMA dma[4];
|
||||
|
|
Loading…
Reference in New Issue