GBA Memory: Implement game pak prefetch approximation

This commit is contained in:
Jeffrey Pfau 2015-06-22 00:27:21 -07:00
parent bdb7635156
commit 051af279c9
7 changed files with 93 additions and 72 deletions

View File

@ -110,8 +110,7 @@ struct ARMMemory {
uint32_t activeSeqCycles16;
uint32_t activeNonseqCycles32;
uint32_t activeNonseqCycles16;
uint32_t activeUncachedCycles32;
uint32_t activeUncachedCycles16;
int32_t (*stall)(struct ARMCore*, int32_t wait);
void (*setActiveRegion)(struct ARMCore*, uint32_t address);
};

View File

@ -259,7 +259,7 @@ static inline void _immediate(struct ARMCore* cpu, uint32_t opcode) {
#define ADDR_MODE_4_WRITEBACK_STM cpu->gprs[rn] = address;
#define ARM_LOAD_POST_BODY \
currentCycles += 1 + cpu->memory.activeNonseqCycles32 - cpu->memory.activeSeqCycles32; \
currentCycles += cpu->memory.activeNonseqCycles32 - cpu->memory.activeSeqCycles32; \
if (rd == ARM_PC) { \
ARM_WRITE_PC; \
}
@ -567,7 +567,7 @@ DEFINE_LOAD_STORE_T_INSTRUCTION_ARM(STRT,
DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_ARM(LDM,
load,
currentCycles += 1 + cpu->memory.activeNonseqCycles32 - cpu->memory.activeSeqCycles32;
currentCycles += cpu->memory.activeNonseqCycles32 - cpu->memory.activeSeqCycles32;
if (rs & 0x8000) {
ARM_WRITE_PC;
})

View File

@ -36,14 +36,18 @@
#define ARM_V_SUBTRACTION(M, N, D) ((ARM_SIGN((M) ^ (N))) && (ARM_SIGN((M) ^ (D))))
#define ARM_WAIT_MUL(R) \
if ((R & 0xFFFFFF00) == 0xFFFFFF00 || !(R & 0xFFFFFF00)) { \
currentCycles += 1; \
} else if ((R & 0xFFFF0000) == 0xFFFF0000 || !(R & 0xFFFF0000)) { \
currentCycles += 2; \
} else if ((R & 0xFF000000) == 0xFF000000 || !(R & 0xFF000000)) { \
currentCycles += 3; \
} else { \
currentCycles += 4; \
{ \
int32_t wait; \
if ((R & 0xFFFFFF00) == 0xFFFFFF00 || !(R & 0xFFFFFF00)) { \
wait = 1; \
} else if ((R & 0xFFFF0000) == 0xFFFF0000 || !(R & 0xFFFF0000)) { \
wait = 2; \
} else if ((R & 0xFF000000) == 0xFF000000 || !(R & 0xFF000000)) { \
wait = 3; \
} else { \
wait = 4; \
} \
currentCycles += cpu->memory.stall(cpu, wait); \
}
#define ARM_STUB cpu->irqh.hitStub(cpu, opcode)
@ -55,7 +59,7 @@
LOAD_32(cpu->prefetch[0], cpu->gprs[ARM_PC] & cpu->memory.activeMask, cpu->memory.activeRegion); \
cpu->gprs[ARM_PC] += WORD_SIZE_ARM; \
LOAD_32(cpu->prefetch[1], cpu->gprs[ARM_PC] & cpu->memory.activeMask, cpu->memory.activeRegion); \
currentCycles += 2 + cpu->memory.activeUncachedCycles32 + cpu->memory.activeSeqCycles32;
currentCycles += 2 + cpu->memory.activeNonseqCycles32 + cpu->memory.activeSeqCycles32;
#define THUMB_WRITE_PC \
cpu->gprs[ARM_PC] = (cpu->gprs[ARM_PC] & -WORD_SIZE_THUMB); \
@ -63,7 +67,7 @@
LOAD_16(cpu->prefetch[0], cpu->gprs[ARM_PC] & cpu->memory.activeMask, cpu->memory.activeRegion); \
cpu->gprs[ARM_PC] += WORD_SIZE_THUMB; \
LOAD_16(cpu->prefetch[1], cpu->gprs[ARM_PC] & cpu->memory.activeMask, cpu->memory.activeRegion); \
currentCycles += 2 + cpu->memory.activeUncachedCycles16 + cpu->memory.activeSeqCycles16;
currentCycles += 2 + cpu->memory.activeNonseqCycles16 + cpu->memory.activeSeqCycles16;
static inline int _ARMModeHasSPSR(enum PrivilegeMode mode) {
return mode != MODE_SYSTEM && mode != MODE_USER;

View File

@ -42,7 +42,7 @@
#define THUMB_PREFETCH_CYCLES (1 + cpu->memory.activeSeqCycles16)
#define THUMB_LOAD_POST_BODY \
currentCycles += 1 + cpu->memory.activeNonseqCycles16 - cpu->memory.activeSeqCycles16;
currentCycles += cpu->memory.activeNonseqCycles16 - cpu->memory.activeSeqCycles16;
#define THUMB_STORE_POST_BODY \
currentCycles += cpu->memory.activeNonseqCycles16 - cpu->memory.activeSeqCycles16;

View File

@ -91,7 +91,7 @@ static void GBAInit(struct ARMCore* cpu, struct ARMComponent* component) {
gba->idleDetectionStep = 0;
gba->idleDetectionFailures = 0;
gba->realisticTiming = false;
gba->realisticTiming = true;
gba->performingDMA = false;
}

View File

@ -22,6 +22,7 @@ static uint32_t _deadbeef[1] = { 0xE710B710 }; // Illegal instruction on both AR
static void GBASetActiveRegion(struct ARMCore* cpu, uint32_t region);
static void GBAMemoryServiceDMA(struct GBA* gba, int number, struct GBADMA* info);
static int32_t GBAMemoryStall(struct ARMCore* cpu, int32_t wait);
static const char GBA_BASE_WAITSTATES[16] = { 0, 0, 2, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4 };
static const char GBA_BASE_WAITSTATES_32[16] = { 0, 0, 5, 0, 0, 1, 1, 0, 7, 7, 9, 9, 13, 13, 9 };
@ -41,6 +42,7 @@ void GBAMemoryInit(struct GBA* gba) {
cpu->memory.store16 = GBAStore16;
cpu->memory.store8 = GBAStore8;
cpu->memory.storeMultiple = GBAStoreMultiple;
cpu->memory.stall = GBAMemoryStall;
gba->memory.bios = (uint32_t*) hleBios;
gba->memory.fullBios = 0;
@ -76,8 +78,6 @@ void GBAMemoryInit(struct GBA* gba) {
cpu->memory.activeSeqCycles16 = 0;
cpu->memory.activeNonseqCycles32 = 0;
cpu->memory.activeNonseqCycles16 = 0;
cpu->memory.activeUncachedCycles32 = 0;
cpu->memory.activeUncachedCycles16 = 0;
gba->memory.biosPrefetch = 0;
}
@ -278,12 +278,10 @@ static void GBASetActiveRegion(struct ARMCore* cpu, uint32_t address) {
}
return;
}
cpu->memory.activeSeqCycles32 = memory->waitstatesPrefetchSeq32[newRegion];
cpu->memory.activeSeqCycles16 = memory->waitstatesPrefetchSeq16[newRegion];
cpu->memory.activeNonseqCycles32 = memory->waitstatesPrefetchNonseq32[newRegion];
cpu->memory.activeNonseqCycles16 = memory->waitstatesPrefetchNonseq16[newRegion];
cpu->memory.activeUncachedCycles32 = memory->waitstatesNonseq32[newRegion];
cpu->memory.activeUncachedCycles16 = memory->waitstatesNonseq16[newRegion];
cpu->memory.activeSeqCycles32 = memory->waitstatesSeq32[memory->activeRegion];
cpu->memory.activeSeqCycles16 = memory->waitstatesSeq16[memory->activeRegion];
cpu->memory.activeNonseqCycles32 = memory->waitstatesNonseq32[memory->activeRegion];
cpu->memory.activeNonseqCycles16 = memory->waitstatesNonseq16[memory->activeRegion];
}
#define LOAD_BAD \
@ -412,7 +410,11 @@ uint32_t GBALoad32(struct ARMCore* cpu, uint32_t address, int* cycleCounter) {
}
if (cycleCounter) {
*cycleCounter += 1 + wait;
wait += 2;
if (address >> BASE_OFFSET < REGION_CART0) {
wait = GBAMemoryStall(cpu, wait);
}
*cycleCounter += wait;
}
// Unaligned 32-bit loads are "rotated" so they make some semblance of sense
int rotate = (address & 3) << 3;
@ -503,7 +505,11 @@ uint32_t GBALoad16(struct ARMCore* cpu, uint32_t address, int* cycleCounter) {
}
if (cycleCounter) {
*cycleCounter += 1 + wait;
wait += 2;
if (address >> BASE_OFFSET < REGION_CART0) {
wait = GBAMemoryStall(cpu, wait);
}
*cycleCounter += wait;
}
// Unaligned 16-bit loads are "unpredictable", but the GBA rotates them, so we have to, too.
int rotate = (address & 1) << 3;
@ -595,7 +601,11 @@ uint32_t GBALoad8(struct ARMCore* cpu, uint32_t address, int* cycleCounter) {
}
if (cycleCounter) {
*cycleCounter += 1 + wait;
wait += 2;
if (address >> BASE_OFFSET < REGION_CART0) {
wait = GBAMemoryStall(cpu, wait);
}
*cycleCounter += wait;
}
return value;
}
@ -682,7 +692,11 @@ void GBAStore32(struct ARMCore* cpu, uint32_t address, int32_t value, int* cycle
}
if (cycleCounter) {
*cycleCounter += 1 + wait;
++wait;
if (address >> BASE_OFFSET < REGION_CART0) {
wait = GBAMemoryStall(cpu, wait);
}
*cycleCounter += wait;
}
}
@ -742,7 +756,11 @@ void GBAStore16(struct ARMCore* cpu, uint32_t address, int16_t value, int* cycle
}
if (cycleCounter) {
*cycleCounter += 1 + wait;
++wait;
if (address >> BASE_OFFSET < REGION_CART0) {
wait = GBAMemoryStall(cpu, wait);
}
*cycleCounter += wait;
}
}
@ -808,7 +826,11 @@ void GBAStore8(struct ARMCore* cpu, uint32_t address, int8_t value, int* cycleCo
}
if (cycleCounter) {
*cycleCounter += 1 + wait;
++wait;
if (address >> BASE_OFFSET < REGION_CART0) {
wait = GBAMemoryStall(cpu, wait);
}
*cycleCounter += wait;
}
}
@ -1100,6 +1122,10 @@ uint32_t GBALoadMultiple(struct ARMCore* cpu, uint32_t address, int mask, enum L
}
if (cycleCounter) {
++wait;
if (address >> BASE_OFFSET < REGION_CART0) {
wait = GBAMemoryStall(cpu, wait);
}
*cycleCounter += wait;
}
@ -1206,6 +1232,9 @@ uint32_t GBAStoreMultiple(struct ARMCore* cpu, uint32_t address, int mask, enum
}
if (cycleCounter) {
if (address >> BASE_OFFSET < REGION_CART0) {
wait = GBAMemoryStall(cpu, wait);
}
*cycleCounter += wait;
}
@ -1253,50 +1282,13 @@ void GBAAdjustWaitstates(struct GBA* gba, uint16_t parameters) {
memory->waitstatesSeq32[REGION_CART1] = memory->waitstatesSeq32[REGION_CART1_EX] = 2 * memory->waitstatesSeq16[REGION_CART1] + 1;
memory->waitstatesSeq32[REGION_CART2] = memory->waitstatesSeq32[REGION_CART2_EX] = 2 * memory->waitstatesSeq16[REGION_CART2] + 1;
if (!prefetch) {
memory->waitstatesPrefetchSeq16[REGION_CART0] = memory->waitstatesPrefetchSeq16[REGION_CART0_EX] = memory->waitstatesSeq16[REGION_CART0];
memory->waitstatesPrefetchSeq16[REGION_CART1] = memory->waitstatesPrefetchSeq16[REGION_CART1_EX] = memory->waitstatesSeq16[REGION_CART1];
memory->waitstatesPrefetchSeq16[REGION_CART2] = memory->waitstatesPrefetchSeq16[REGION_CART2_EX] = memory->waitstatesSeq16[REGION_CART2];
memory->prefetch = prefetch;
memory->waitstatesPrefetchSeq32[REGION_CART0] = memory->waitstatesPrefetchSeq32[REGION_CART0_EX] = memory->waitstatesSeq32[REGION_CART0];
memory->waitstatesPrefetchSeq32[REGION_CART1] = memory->waitstatesPrefetchSeq32[REGION_CART1_EX] = memory->waitstatesSeq32[REGION_CART1];
memory->waitstatesPrefetchSeq32[REGION_CART2] = memory->waitstatesPrefetchSeq32[REGION_CART2_EX] = memory->waitstatesSeq32[REGION_CART2];
cpu->memory.activeSeqCycles32 = memory->waitstatesSeq32[memory->activeRegion];
cpu->memory.activeSeqCycles16 = memory->waitstatesSeq16[memory->activeRegion];
memory->waitstatesPrefetchNonseq16[REGION_CART0] = memory->waitstatesPrefetchNonseq16[REGION_CART0_EX] = memory->waitstatesNonseq16[REGION_CART0];
memory->waitstatesPrefetchNonseq16[REGION_CART1] = memory->waitstatesPrefetchNonseq16[REGION_CART1_EX] = memory->waitstatesNonseq16[REGION_CART1];
memory->waitstatesPrefetchNonseq16[REGION_CART2] = memory->waitstatesPrefetchNonseq16[REGION_CART2_EX] = memory->waitstatesNonseq16[REGION_CART2];
memory->waitstatesPrefetchNonseq32[REGION_CART0] = memory->waitstatesPrefetchNonseq32[REGION_CART0_EX] = memory->waitstatesNonseq32[REGION_CART0];
memory->waitstatesPrefetchNonseq32[REGION_CART1] = memory->waitstatesPrefetchNonseq32[REGION_CART1_EX] = memory->waitstatesNonseq32[REGION_CART1];
memory->waitstatesPrefetchNonseq32[REGION_CART2] = memory->waitstatesPrefetchNonseq32[REGION_CART2_EX] = memory->waitstatesNonseq32[REGION_CART2];
} else {
// Assume it stalls one cycle to pull a value from the prefetch
// This needs more research to tell if it's accurate or not
memory->waitstatesPrefetchSeq16[REGION_CART0] = memory->waitstatesPrefetchSeq16[REGION_CART0_EX] = 1;
memory->waitstatesPrefetchSeq16[REGION_CART1] = memory->waitstatesPrefetchSeq16[REGION_CART1_EX] = 1;
memory->waitstatesPrefetchSeq16[REGION_CART2] = memory->waitstatesPrefetchSeq16[REGION_CART2_EX] = 1;
memory->waitstatesPrefetchSeq32[REGION_CART0] = memory->waitstatesPrefetchSeq32[REGION_CART0_EX] = 2;
memory->waitstatesPrefetchSeq32[REGION_CART1] = memory->waitstatesPrefetchSeq32[REGION_CART1_EX] = 2;
memory->waitstatesPrefetchSeq32[REGION_CART2] = memory->waitstatesPrefetchSeq32[REGION_CART2_EX] = 2;
memory->waitstatesPrefetchNonseq16[REGION_CART0] = memory->waitstatesPrefetchNonseq16[REGION_CART0_EX] = 1;
memory->waitstatesPrefetchNonseq16[REGION_CART1] = memory->waitstatesPrefetchNonseq16[REGION_CART1_EX] = 1;
memory->waitstatesPrefetchNonseq16[REGION_CART2] = memory->waitstatesPrefetchNonseq16[REGION_CART2_EX] = 1;
memory->waitstatesPrefetchNonseq32[REGION_CART0] = memory->waitstatesPrefetchNonseq32[REGION_CART0_EX] = 2;
memory->waitstatesPrefetchNonseq32[REGION_CART1] = memory->waitstatesPrefetchNonseq32[REGION_CART1_EX] = 2;
memory->waitstatesPrefetchNonseq32[REGION_CART2] = memory->waitstatesPrefetchNonseq32[REGION_CART2_EX] = 2;
}
cpu->memory.activeSeqCycles32 = memory->waitstatesPrefetchSeq32[memory->activeRegion];
cpu->memory.activeSeqCycles16 = memory->waitstatesPrefetchSeq16[memory->activeRegion];
cpu->memory.activeNonseqCycles32 = memory->waitstatesPrefetchNonseq32[memory->activeRegion];
cpu->memory.activeNonseqCycles16 = memory->waitstatesPrefetchNonseq16[memory->activeRegion];
cpu->memory.activeUncachedCycles32 = memory->waitstatesNonseq32[memory->activeRegion];
cpu->memory.activeUncachedCycles16 = memory->waitstatesNonseq16[memory->activeRegion];
cpu->memory.activeNonseqCycles32 = memory->waitstatesNonseq32[memory->activeRegion];
cpu->memory.activeNonseqCycles16 = memory->waitstatesNonseq16[memory->activeRegion];
}
void GBAMemoryWriteDMASAD(struct GBA* gba, int dma, uint32_t address) {
@ -1528,6 +1520,31 @@ void GBAMemoryServiceDMA(struct GBA* gba, int number, struct GBADMA* info) {
cpu->cycles += cycles;
}
int32_t GBAMemoryStall(struct ARMCore* cpu, int32_t wait) {
struct GBA* gba = (struct GBA*) cpu->master;
struct GBAMemory* memory = &gba->memory;
if (!memory->prefetch || memory->activeRegion < REGION_CART0) {
return wait;
}
int32_t stall = 5 - memory->waitstatesSeq16[memory->activeRegion]; // Figure out where this value comes from
// Base number of cycles for this insn is N
int32_t base = memory->waitstatesSeq16[memory->activeRegion] + 1;
if (cpu->executionMode == MODE_ARM) {
base <<= 1;
}
if (base <= wait) {
--base;
} else {
base = wait;
}
cpu->cycles -= stall + base - 1;
return wait;
}
void GBAMemorySerialize(const struct GBAMemory* memory, struct GBASerializedState* state) {
memcpy(state->wram, memory->wram, SIZE_WORKING_RAM);
memcpy(state->iwram, memory->iwram, SIZE_WORKING_IRAM);

View File

@ -131,6 +131,7 @@ struct GBAMemory {
char waitstatesPrefetchNonseq32[16];
char waitstatesPrefetchNonseq16[16];
int activeRegion;
bool prefetch;
uint32_t biosPrefetch;
struct GBADMA dma[4];