From 2ea11feda65c7b632c78a8f0ecca565aaf4ad9ac Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Mon, 17 Jun 2024 02:27:31 -0700 Subject: [PATCH] GBA Memory: Improve VRAM access stall cycle estimation --- CHANGES | 1 + include/mgba/internal/gba/video.h | 8 ++- src/gba/memory.c | 80 +++++++++++++++++++++------- src/gba/video.c | 86 +++++++++++++++++++++++++++++-- 4 files changed, 151 insertions(+), 24 deletions(-) diff --git a/CHANGES b/CHANGES index 626e9aa41..6119069f3 100644 --- a/CHANGES +++ b/CHANGES @@ -35,6 +35,7 @@ Misc: - GB Serialize: Add missing savestate support for MBC6 and NT (newer) - GBA: Improve detection of valid ELF ROMs - GBA Audio: Remove broken XQ audio pending rewrite + - GBA Memory: Improve VRAM access stall cycle estimation - GBA Video: Add special circlular window handling in OpenGL renderer - Libretro: Add Super Game Boy Color support (closes mgba.io/i/3188) - mGUI: Enable auto-softpatching (closes mgba.io/i/2899) diff --git a/include/mgba/internal/gba/video.h b/include/mgba/internal/gba/video.h index 5b12d1541..b29b3d607 100644 --- a/include/mgba/internal/gba/video.h +++ b/include/mgba/internal/gba/video.h @@ -16,6 +16,12 @@ CXX_GUARD_START mLOG_DECLARE_CATEGORY(GBA_VIDEO); +#define GBA_VSTALL_T4(X) (0x011 << (X)) +#define GBA_VSTALL_T8(X) (0x010 << (X)) +#define GBA_VSTALL_A2 0x100 +#define GBA_VSTALL_A3 0x200 +#define GBA_VSTALL_B 0x400 + enum { VIDEO_HBLANK_PIXELS = 68, VIDEO_HDRAW_LENGTH = 1008, @@ -208,7 +214,7 @@ struct GBAVideo { struct mTimingEvent event; int vcount; - int shouldStall; + unsigned stallMask; uint16_t palette[512]; uint16_t* vram; diff --git a/src/gba/memory.c b/src/gba/memory.c index 4c72ca80e..62b26b1e3 100644 --- a/src/gba/memory.c +++ b/src/gba/memory.c @@ -401,7 +401,7 @@ static void GBASetActiveRegion(struct ARMCore* cpu, uint32_t address) { LOAD_32(value, address & 0x0001FFFC, gba->video.vram); \ } \ ++wait; \ - if (gba->video.shouldStall && (address & 0x0001FFFF) < ((GBARegisterDISPCNTGetMode(gba->memory.io[GBA_REG(DISPCNT)]) >= 3) ? 0x00014000 : 0x00010000)) { \ + if (gba->video.stallMask && (address & 0x0001FFFF) < ((GBARegisterDISPCNTGetMode(gba->memory.io[GBA_REG(DISPCNT)]) >= 3) ? 0x00014000 : 0x00010000)) { \ wait += GBAMemoryStallVRAM(gba, wait, 1); \ } @@ -561,7 +561,7 @@ uint32_t GBALoad16(struct ARMCore* cpu, uint32_t address, int* cycleCounter) { } else { LOAD_16(value, address & 0x0001FFFE, gba->video.vram); } - if (gba->video.shouldStall && (address & 0x0001FFFF) < ((GBARegisterDISPCNTGetMode(gba->memory.io[GBA_REG(DISPCNT)]) >= 3) ? 0x00014000 : 0x00010000)) { + if (gba->video.stallMask && (address & 0x0001FFFF) < ((GBARegisterDISPCNTGetMode(gba->memory.io[GBA_REG(DISPCNT)]) >= 3) ? 0x00014000 : 0x00010000)) { wait += GBAMemoryStallVRAM(gba, wait, 0); } break; @@ -676,7 +676,7 @@ uint32_t GBALoad8(struct ARMCore* cpu, uint32_t address, int* cycleCounter) { } else { value = ((uint8_t*) gba->video.vram)[address & 0x0001FFFF]; } - if (gba->video.shouldStall) { + if (gba->video.stallMask) { wait += GBAMemoryStallVRAM(gba, wait, 0); } break; @@ -781,7 +781,7 @@ uint32_t GBALoad8(struct ARMCore* cpu, uint32_t address, int* cycleCounter) { } \ } \ ++wait; \ - if (gba->video.shouldStall && (address & 0x0001FFFF) < ((GBARegisterDISPCNTGetMode(gba->memory.io[GBA_REG(DISPCNT)]) >= 3) ? 0x00014000 : 0x00010000)) { \ + if (gba->video.stallMask && (address & 0x0001FFFF) < ((GBARegisterDISPCNTGetMode(gba->memory.io[GBA_REG(DISPCNT)]) >= 3) ? 0x00014000 : 0x00010000)) { \ wait += GBAMemoryStallVRAM(gba, wait, 1); \ } @@ -908,7 +908,7 @@ void GBAStore16(struct ARMCore* cpu, uint32_t address, int16_t value, int* cycle gba->video.renderer->writeVRAM(gba->video.renderer, address & 0x0001FFFE); } } - if (gba->video.shouldStall && (address & 0x0001FFFF) < ((GBARegisterDISPCNTGetMode(gba->memory.io[GBA_REG(DISPCNT)]) >= 3) ? 0x00014000 : 0x00010000)) { + if (gba->video.stallMask && (address & 0x0001FFFF) < ((GBARegisterDISPCNTGetMode(gba->memory.io[GBA_REG(DISPCNT)]) >= 3) ? 0x00014000 : 0x00010000)) { wait += GBAMemoryStallVRAM(gba, wait, 0); } break; @@ -1038,7 +1038,7 @@ void GBAStore8(struct ARMCore* cpu, uint32_t address, int8_t value, int* cycleCo gba->video.renderer->vram[(address & 0x1FFFE) >> 1] = ((uint8_t) value) | (value << 8); gba->video.renderer->writeVRAM(gba->video.renderer, address & 0x0001FFFE); } - if (gba->video.shouldStall) { + if (gba->video.stallMask) { wait += GBAMemoryStallVRAM(gba, wait, 0); } break; @@ -1778,20 +1778,64 @@ int32_t GBAMemoryStall(struct ARMCore* cpu, int32_t wait) { } int32_t GBAMemoryStallVRAM(struct GBA* gba, int32_t wait, int extra) { - UNUSED(extra); - // TODO - uint16_t dispcnt = gba->memory.io[GBA_REG(DISPCNT)]; - int32_t stall = 0; - switch (GBARegisterDISPCNTGetMode(dispcnt)) { - case 2: - if (GBARegisterDISPCNTIsBg2Enable(dispcnt) && GBARegisterDISPCNTIsBg3Enable(dispcnt)) { - // If both backgrounds are enabled, VRAM access is entirely blocked during hdraw - stall = mTimingUntil(&gba->timing, &gba->video.event); + static const uint16_t stallLUT[32] = { + GBA_VSTALL_T4(0) | GBA_VSTALL_A3, + GBA_VSTALL_T4(1) | GBA_VSTALL_A3, + GBA_VSTALL_T4(2) | GBA_VSTALL_A2, + GBA_VSTALL_T4(3) | GBA_VSTALL_A3 | GBA_VSTALL_B, + + GBA_VSTALL_T4(0) | GBA_VSTALL_A3, + GBA_VSTALL_T4(1) | GBA_VSTALL_A3, + GBA_VSTALL_T4(2) | GBA_VSTALL_A2, + GBA_VSTALL_T4(3) | GBA_VSTALL_A3 | GBA_VSTALL_B, + + GBA_VSTALL_A3, + GBA_VSTALL_A3, + GBA_VSTALL_A2, + GBA_VSTALL_A3 | GBA_VSTALL_B, + + GBA_VSTALL_T8(0) | GBA_VSTALL_A3, + GBA_VSTALL_T8(1) | GBA_VSTALL_A3, + GBA_VSTALL_T8(2) | GBA_VSTALL_A2, + GBA_VSTALL_T8(3) | GBA_VSTALL_A3 | GBA_VSTALL_B, + + GBA_VSTALL_A3, + GBA_VSTALL_A3, + GBA_VSTALL_A2, + GBA_VSTALL_A3 | GBA_VSTALL_B, + + GBA_VSTALL_T4(0) | GBA_VSTALL_A3, + GBA_VSTALL_T4(1) | GBA_VSTALL_A3, + GBA_VSTALL_T4(2) | GBA_VSTALL_A2, + GBA_VSTALL_T4(3) | GBA_VSTALL_A3 | GBA_VSTALL_B, + + GBA_VSTALL_A3, + GBA_VSTALL_A3, + GBA_VSTALL_A2, + GBA_VSTALL_A3 | GBA_VSTALL_B, + + GBA_VSTALL_T8(0) | GBA_VSTALL_A3, + GBA_VSTALL_T8(1) | GBA_VSTALL_A3, + GBA_VSTALL_T8(2) | GBA_VSTALL_A2, + GBA_VSTALL_T8(3) | GBA_VSTALL_A3 | GBA_VSTALL_B, + }; + + int32_t until = mTimingUntil(&gba->timing, &gba->video.event); + int period = -until & 0x1F; + + int32_t stall = until; + + int i; + for (i = 0; i < 16; ++i) { + if (!(stallLUT[(period + i) & 0x1F] & gba->video.stallMask)) { + if (!extra) { + stall = i; + break; + } + --extra; } - break; - default: - return 0; } + stall -= wait; if (stall < 0) { return 0; diff --git a/src/gba/video.c b/src/gba/video.c index bfe556943..5a4d2c8c0 100644 --- a/src/gba/video.c +++ b/src/gba/video.c @@ -32,6 +32,7 @@ static void GBAVideoDummyRendererPutPixels(struct GBAVideoRenderer* renderer, si static void _startHblank(struct mTiming*, void* context, uint32_t cyclesLate); static void _startHdraw(struct mTiming*, void* context, uint32_t cyclesLate); +static unsigned _calculateStallMask(struct GBA* gba); MGBA_EXPORT const int GBAVideoObjSizes[16][2] = { { 8, 8 }, @@ -78,7 +79,7 @@ void GBAVideoReset(struct GBAVideo* video) { video->frameCounter = 0; video->frameskipCounter = 0; - video->shouldStall = 0; + video->stallMask = 0; memset(video->palette, 0, sizeof(video->palette)); memset(video->oam.raw, 0, sizeof(video->oam.raw)); @@ -149,7 +150,7 @@ void _startHdraw(struct mTiming* timing, void* context, uint32_t cyclesLate) { video->p->memory.io[GBA_REG(VCOUNT)] = video->vcount; if (video->vcount < GBA_VIDEO_VERTICAL_PIXELS) { - video->shouldStall = 1; + video->stallMask = _calculateStallMask(video->p); } GBARegisterDISPSTAT dispstat = video->p->memory.io[GBA_REG(DISPSTAT)]; @@ -214,7 +215,7 @@ void _startHblank(struct mTiming* timing, void* context, uint32_t cyclesLate) { if (GBARegisterDISPSTATIsHblankIRQ(dispstat)) { GBARaiseIRQ(video->p, GBA_IRQ_HBLANK, cyclesLate - 6); // TODO: Where does this fudge factor come from? } - video->shouldStall = 0; + video->stallMask = 0; video->p->memory.io[GBA_REG(DISPSTAT)] = dispstat; } @@ -224,6 +225,81 @@ void GBAVideoWriteDISPSTAT(struct GBAVideo* video, uint16_t value) { // TODO: Does a VCounter IRQ trigger on write? } +static unsigned _calculateStallMask(struct GBA* gba) { + unsigned mask = 0; + + unsigned dispcnt = gba->memory.io[GBA_REG(DISPCNT)]; + switch (GBARegisterDISPCNTGetMode(dispcnt)) { + case 0: + if (GBARegisterDISPCNTIsBg0Enable(dispcnt)) { + if (GBARegisterBGCNTIs256Color(gba->memory.io[GBA_REG(BG0CNT)])) { + mask |= GBA_VSTALL_T8(0); + } else { + mask |= GBA_VSTALL_T4(0); + } + } + if (GBARegisterDISPCNTIsBg1Enable(dispcnt)) { + if (GBARegisterBGCNTIs256Color(gba->memory.io[GBA_REG(BG1CNT)])) { + mask |= GBA_VSTALL_T8(1); + } else { + mask |= GBA_VSTALL_T4(1); + } + } + if (GBARegisterDISPCNTIsBg2Enable(dispcnt)) { + if (GBARegisterBGCNTIs256Color(gba->memory.io[GBA_REG(BG2CNT)])) { + mask |= GBA_VSTALL_T8(2); + } else { + mask |= GBA_VSTALL_T4(2); + } + } + if (GBARegisterDISPCNTIsBg3Enable(dispcnt)) { + if (GBARegisterBGCNTIs256Color(gba->memory.io[GBA_REG(BG3CNT)])) { + mask |= GBA_VSTALL_T8(3); + } else { + mask |= GBA_VSTALL_T4(3); + } + } + break; + case 1: + if (GBARegisterDISPCNTIsBg0Enable(dispcnt)) { + if (GBARegisterBGCNTIs256Color(gba->memory.io[GBA_REG(BG0CNT)])) { + mask |= GBA_VSTALL_T8(0); + } else { + mask |= GBA_VSTALL_T4(0); + } + } + if (GBARegisterDISPCNTIsBg1Enable(dispcnt)) { + if (GBARegisterBGCNTIs256Color(gba->memory.io[GBA_REG(BG1CNT)])) { + mask |= GBA_VSTALL_T8(1); + } else { + mask |= GBA_VSTALL_T4(1); + } + } + if (GBARegisterDISPCNTIsBg2Enable(dispcnt)) { + mask |= GBA_VSTALL_A2; + } + break; + case 2: + if (GBARegisterDISPCNTIsBg2Enable(dispcnt)) { + mask |= GBA_VSTALL_A2; + } + if (GBARegisterDISPCNTIsBg3Enable(dispcnt)) { + mask |= GBA_VSTALL_A3; + } + break; + case 3: + case 4: + case 5: + if (GBARegisterDISPCNTIsBg2Enable(dispcnt)) { + mask |= GBA_VSTALL_B; + } + break; + default: + break; + } + return mask; +} + static void GBAVideoDummyRendererInit(struct GBAVideoRenderer* renderer) { UNUSED(renderer); // Nothing to do @@ -353,7 +429,7 @@ void GBAVideoDeserialize(struct GBAVideo* video, const struct GBASerializedState } LOAD_32(video->frameCounter, 0, &state->video.frameCounter); - video->shouldStall = 0; + video->stallMask = 0; int32_t flags; LOAD_32(flags, 0, &state->video.flags); GBARegisterDISPSTAT dispstat = state->io[GBA_REG(DISPSTAT)]; @@ -370,7 +446,7 @@ void GBAVideoDeserialize(struct GBAVideo* video, const struct GBASerializedState break; case 2: video->event.callback = _startHblank; - video->shouldStall = 1; + video->stallMask = _calculateStallMask(video->p); break; case 3: video->event.callback = _startHdraw;