GBA Memory: Improve VRAM access stall cycle estimation

This commit is contained in:
Vicki Pfau 2024-06-17 02:27:31 -07:00
parent 458300b02e
commit 2ea11feda6
4 changed files with 151 additions and 24 deletions

View File

@ -35,6 +35,7 @@ Misc:
- GB Serialize: Add missing savestate support for MBC6 and NT (newer) - GB Serialize: Add missing savestate support for MBC6 and NT (newer)
- GBA: Improve detection of valid ELF ROMs - GBA: Improve detection of valid ELF ROMs
- GBA Audio: Remove broken XQ audio pending rewrite - GBA Audio: Remove broken XQ audio pending rewrite
- GBA Memory: Improve VRAM access stall cycle estimation
- GBA Video: Add special circlular window handling in OpenGL renderer - GBA Video: Add special circlular window handling in OpenGL renderer
- Libretro: Add Super Game Boy Color support (closes mgba.io/i/3188) - Libretro: Add Super Game Boy Color support (closes mgba.io/i/3188)
- mGUI: Enable auto-softpatching (closes mgba.io/i/2899) - mGUI: Enable auto-softpatching (closes mgba.io/i/2899)

View File

@ -16,6 +16,12 @@ CXX_GUARD_START
mLOG_DECLARE_CATEGORY(GBA_VIDEO); mLOG_DECLARE_CATEGORY(GBA_VIDEO);
#define GBA_VSTALL_T4(X) (0x011 << (X))
#define GBA_VSTALL_T8(X) (0x010 << (X))
#define GBA_VSTALL_A2 0x100
#define GBA_VSTALL_A3 0x200
#define GBA_VSTALL_B 0x400
enum { enum {
VIDEO_HBLANK_PIXELS = 68, VIDEO_HBLANK_PIXELS = 68,
VIDEO_HDRAW_LENGTH = 1008, VIDEO_HDRAW_LENGTH = 1008,
@ -208,7 +214,7 @@ struct GBAVideo {
struct mTimingEvent event; struct mTimingEvent event;
int vcount; int vcount;
int shouldStall; unsigned stallMask;
uint16_t palette[512]; uint16_t palette[512];
uint16_t* vram; uint16_t* vram;

View File

@ -401,7 +401,7 @@ static void GBASetActiveRegion(struct ARMCore* cpu, uint32_t address) {
LOAD_32(value, address & 0x0001FFFC, gba->video.vram); \ LOAD_32(value, address & 0x0001FFFC, gba->video.vram); \
} \ } \
++wait; \ ++wait; \
if (gba->video.shouldStall && (address & 0x0001FFFF) < ((GBARegisterDISPCNTGetMode(gba->memory.io[GBA_REG(DISPCNT)]) >= 3) ? 0x00014000 : 0x00010000)) { \ if (gba->video.stallMask && (address & 0x0001FFFF) < ((GBARegisterDISPCNTGetMode(gba->memory.io[GBA_REG(DISPCNT)]) >= 3) ? 0x00014000 : 0x00010000)) { \
wait += GBAMemoryStallVRAM(gba, wait, 1); \ wait += GBAMemoryStallVRAM(gba, wait, 1); \
} }
@ -561,7 +561,7 @@ uint32_t GBALoad16(struct ARMCore* cpu, uint32_t address, int* cycleCounter) {
} else { } else {
LOAD_16(value, address & 0x0001FFFE, gba->video.vram); LOAD_16(value, address & 0x0001FFFE, gba->video.vram);
} }
if (gba->video.shouldStall && (address & 0x0001FFFF) < ((GBARegisterDISPCNTGetMode(gba->memory.io[GBA_REG(DISPCNT)]) >= 3) ? 0x00014000 : 0x00010000)) { if (gba->video.stallMask && (address & 0x0001FFFF) < ((GBARegisterDISPCNTGetMode(gba->memory.io[GBA_REG(DISPCNT)]) >= 3) ? 0x00014000 : 0x00010000)) {
wait += GBAMemoryStallVRAM(gba, wait, 0); wait += GBAMemoryStallVRAM(gba, wait, 0);
} }
break; break;
@ -676,7 +676,7 @@ uint32_t GBALoad8(struct ARMCore* cpu, uint32_t address, int* cycleCounter) {
} else { } else {
value = ((uint8_t*) gba->video.vram)[address & 0x0001FFFF]; value = ((uint8_t*) gba->video.vram)[address & 0x0001FFFF];
} }
if (gba->video.shouldStall) { if (gba->video.stallMask) {
wait += GBAMemoryStallVRAM(gba, wait, 0); wait += GBAMemoryStallVRAM(gba, wait, 0);
} }
break; break;
@ -781,7 +781,7 @@ uint32_t GBALoad8(struct ARMCore* cpu, uint32_t address, int* cycleCounter) {
} \ } \
} \ } \
++wait; \ ++wait; \
if (gba->video.shouldStall && (address & 0x0001FFFF) < ((GBARegisterDISPCNTGetMode(gba->memory.io[GBA_REG(DISPCNT)]) >= 3) ? 0x00014000 : 0x00010000)) { \ if (gba->video.stallMask && (address & 0x0001FFFF) < ((GBARegisterDISPCNTGetMode(gba->memory.io[GBA_REG(DISPCNT)]) >= 3) ? 0x00014000 : 0x00010000)) { \
wait += GBAMemoryStallVRAM(gba, wait, 1); \ wait += GBAMemoryStallVRAM(gba, wait, 1); \
} }
@ -908,7 +908,7 @@ void GBAStore16(struct ARMCore* cpu, uint32_t address, int16_t value, int* cycle
gba->video.renderer->writeVRAM(gba->video.renderer, address & 0x0001FFFE); gba->video.renderer->writeVRAM(gba->video.renderer, address & 0x0001FFFE);
} }
} }
if (gba->video.shouldStall && (address & 0x0001FFFF) < ((GBARegisterDISPCNTGetMode(gba->memory.io[GBA_REG(DISPCNT)]) >= 3) ? 0x00014000 : 0x00010000)) { if (gba->video.stallMask && (address & 0x0001FFFF) < ((GBARegisterDISPCNTGetMode(gba->memory.io[GBA_REG(DISPCNT)]) >= 3) ? 0x00014000 : 0x00010000)) {
wait += GBAMemoryStallVRAM(gba, wait, 0); wait += GBAMemoryStallVRAM(gba, wait, 0);
} }
break; break;
@ -1038,7 +1038,7 @@ void GBAStore8(struct ARMCore* cpu, uint32_t address, int8_t value, int* cycleCo
gba->video.renderer->vram[(address & 0x1FFFE) >> 1] = ((uint8_t) value) | (value << 8); gba->video.renderer->vram[(address & 0x1FFFE) >> 1] = ((uint8_t) value) | (value << 8);
gba->video.renderer->writeVRAM(gba->video.renderer, address & 0x0001FFFE); gba->video.renderer->writeVRAM(gba->video.renderer, address & 0x0001FFFE);
} }
if (gba->video.shouldStall) { if (gba->video.stallMask) {
wait += GBAMemoryStallVRAM(gba, wait, 0); wait += GBAMemoryStallVRAM(gba, wait, 0);
} }
break; break;
@ -1778,20 +1778,64 @@ int32_t GBAMemoryStall(struct ARMCore* cpu, int32_t wait) {
} }
int32_t GBAMemoryStallVRAM(struct GBA* gba, int32_t wait, int extra) { int32_t GBAMemoryStallVRAM(struct GBA* gba, int32_t wait, int extra) {
UNUSED(extra); static const uint16_t stallLUT[32] = {
// TODO GBA_VSTALL_T4(0) | GBA_VSTALL_A3,
uint16_t dispcnt = gba->memory.io[GBA_REG(DISPCNT)]; GBA_VSTALL_T4(1) | GBA_VSTALL_A3,
int32_t stall = 0; GBA_VSTALL_T4(2) | GBA_VSTALL_A2,
switch (GBARegisterDISPCNTGetMode(dispcnt)) { GBA_VSTALL_T4(3) | GBA_VSTALL_A3 | GBA_VSTALL_B,
case 2:
if (GBARegisterDISPCNTIsBg2Enable(dispcnt) && GBARegisterDISPCNTIsBg3Enable(dispcnt)) { GBA_VSTALL_T4(0) | GBA_VSTALL_A3,
// If both backgrounds are enabled, VRAM access is entirely blocked during hdraw GBA_VSTALL_T4(1) | GBA_VSTALL_A3,
stall = mTimingUntil(&gba->timing, &gba->video.event); GBA_VSTALL_T4(2) | GBA_VSTALL_A2,
GBA_VSTALL_T4(3) | GBA_VSTALL_A3 | GBA_VSTALL_B,
GBA_VSTALL_A3,
GBA_VSTALL_A3,
GBA_VSTALL_A2,
GBA_VSTALL_A3 | GBA_VSTALL_B,
GBA_VSTALL_T8(0) | GBA_VSTALL_A3,
GBA_VSTALL_T8(1) | GBA_VSTALL_A3,
GBA_VSTALL_T8(2) | GBA_VSTALL_A2,
GBA_VSTALL_T8(3) | GBA_VSTALL_A3 | GBA_VSTALL_B,
GBA_VSTALL_A3,
GBA_VSTALL_A3,
GBA_VSTALL_A2,
GBA_VSTALL_A3 | GBA_VSTALL_B,
GBA_VSTALL_T4(0) | GBA_VSTALL_A3,
GBA_VSTALL_T4(1) | GBA_VSTALL_A3,
GBA_VSTALL_T4(2) | GBA_VSTALL_A2,
GBA_VSTALL_T4(3) | GBA_VSTALL_A3 | GBA_VSTALL_B,
GBA_VSTALL_A3,
GBA_VSTALL_A3,
GBA_VSTALL_A2,
GBA_VSTALL_A3 | GBA_VSTALL_B,
GBA_VSTALL_T8(0) | GBA_VSTALL_A3,
GBA_VSTALL_T8(1) | GBA_VSTALL_A3,
GBA_VSTALL_T8(2) | GBA_VSTALL_A2,
GBA_VSTALL_T8(3) | GBA_VSTALL_A3 | GBA_VSTALL_B,
};
int32_t until = mTimingUntil(&gba->timing, &gba->video.event);
int period = -until & 0x1F;
int32_t stall = until;
int i;
for (i = 0; i < 16; ++i) {
if (!(stallLUT[(period + i) & 0x1F] & gba->video.stallMask)) {
if (!extra) {
stall = i;
break;
}
--extra;
} }
break;
default:
return 0;
} }
stall -= wait; stall -= wait;
if (stall < 0) { if (stall < 0) {
return 0; return 0;

View File

@ -32,6 +32,7 @@ static void GBAVideoDummyRendererPutPixels(struct GBAVideoRenderer* renderer, si
static void _startHblank(struct mTiming*, void* context, uint32_t cyclesLate); static void _startHblank(struct mTiming*, void* context, uint32_t cyclesLate);
static void _startHdraw(struct mTiming*, void* context, uint32_t cyclesLate); static void _startHdraw(struct mTiming*, void* context, uint32_t cyclesLate);
static unsigned _calculateStallMask(struct GBA* gba);
MGBA_EXPORT const int GBAVideoObjSizes[16][2] = { MGBA_EXPORT const int GBAVideoObjSizes[16][2] = {
{ 8, 8 }, { 8, 8 },
@ -78,7 +79,7 @@ void GBAVideoReset(struct GBAVideo* video) {
video->frameCounter = 0; video->frameCounter = 0;
video->frameskipCounter = 0; video->frameskipCounter = 0;
video->shouldStall = 0; video->stallMask = 0;
memset(video->palette, 0, sizeof(video->palette)); memset(video->palette, 0, sizeof(video->palette));
memset(video->oam.raw, 0, sizeof(video->oam.raw)); memset(video->oam.raw, 0, sizeof(video->oam.raw));
@ -149,7 +150,7 @@ void _startHdraw(struct mTiming* timing, void* context, uint32_t cyclesLate) {
video->p->memory.io[GBA_REG(VCOUNT)] = video->vcount; video->p->memory.io[GBA_REG(VCOUNT)] = video->vcount;
if (video->vcount < GBA_VIDEO_VERTICAL_PIXELS) { if (video->vcount < GBA_VIDEO_VERTICAL_PIXELS) {
video->shouldStall = 1; video->stallMask = _calculateStallMask(video->p);
} }
GBARegisterDISPSTAT dispstat = video->p->memory.io[GBA_REG(DISPSTAT)]; GBARegisterDISPSTAT dispstat = video->p->memory.io[GBA_REG(DISPSTAT)];
@ -214,7 +215,7 @@ void _startHblank(struct mTiming* timing, void* context, uint32_t cyclesLate) {
if (GBARegisterDISPSTATIsHblankIRQ(dispstat)) { if (GBARegisterDISPSTATIsHblankIRQ(dispstat)) {
GBARaiseIRQ(video->p, GBA_IRQ_HBLANK, cyclesLate - 6); // TODO: Where does this fudge factor come from? GBARaiseIRQ(video->p, GBA_IRQ_HBLANK, cyclesLate - 6); // TODO: Where does this fudge factor come from?
} }
video->shouldStall = 0; video->stallMask = 0;
video->p->memory.io[GBA_REG(DISPSTAT)] = dispstat; video->p->memory.io[GBA_REG(DISPSTAT)] = dispstat;
} }
@ -224,6 +225,81 @@ void GBAVideoWriteDISPSTAT(struct GBAVideo* video, uint16_t value) {
// TODO: Does a VCounter IRQ trigger on write? // TODO: Does a VCounter IRQ trigger on write?
} }
static unsigned _calculateStallMask(struct GBA* gba) {
unsigned mask = 0;
unsigned dispcnt = gba->memory.io[GBA_REG(DISPCNT)];
switch (GBARegisterDISPCNTGetMode(dispcnt)) {
case 0:
if (GBARegisterDISPCNTIsBg0Enable(dispcnt)) {
if (GBARegisterBGCNTIs256Color(gba->memory.io[GBA_REG(BG0CNT)])) {
mask |= GBA_VSTALL_T8(0);
} else {
mask |= GBA_VSTALL_T4(0);
}
}
if (GBARegisterDISPCNTIsBg1Enable(dispcnt)) {
if (GBARegisterBGCNTIs256Color(gba->memory.io[GBA_REG(BG1CNT)])) {
mask |= GBA_VSTALL_T8(1);
} else {
mask |= GBA_VSTALL_T4(1);
}
}
if (GBARegisterDISPCNTIsBg2Enable(dispcnt)) {
if (GBARegisterBGCNTIs256Color(gba->memory.io[GBA_REG(BG2CNT)])) {
mask |= GBA_VSTALL_T8(2);
} else {
mask |= GBA_VSTALL_T4(2);
}
}
if (GBARegisterDISPCNTIsBg3Enable(dispcnt)) {
if (GBARegisterBGCNTIs256Color(gba->memory.io[GBA_REG(BG3CNT)])) {
mask |= GBA_VSTALL_T8(3);
} else {
mask |= GBA_VSTALL_T4(3);
}
}
break;
case 1:
if (GBARegisterDISPCNTIsBg0Enable(dispcnt)) {
if (GBARegisterBGCNTIs256Color(gba->memory.io[GBA_REG(BG0CNT)])) {
mask |= GBA_VSTALL_T8(0);
} else {
mask |= GBA_VSTALL_T4(0);
}
}
if (GBARegisterDISPCNTIsBg1Enable(dispcnt)) {
if (GBARegisterBGCNTIs256Color(gba->memory.io[GBA_REG(BG1CNT)])) {
mask |= GBA_VSTALL_T8(1);
} else {
mask |= GBA_VSTALL_T4(1);
}
}
if (GBARegisterDISPCNTIsBg2Enable(dispcnt)) {
mask |= GBA_VSTALL_A2;
}
break;
case 2:
if (GBARegisterDISPCNTIsBg2Enable(dispcnt)) {
mask |= GBA_VSTALL_A2;
}
if (GBARegisterDISPCNTIsBg3Enable(dispcnt)) {
mask |= GBA_VSTALL_A3;
}
break;
case 3:
case 4:
case 5:
if (GBARegisterDISPCNTIsBg2Enable(dispcnt)) {
mask |= GBA_VSTALL_B;
}
break;
default:
break;
}
return mask;
}
static void GBAVideoDummyRendererInit(struct GBAVideoRenderer* renderer) { static void GBAVideoDummyRendererInit(struct GBAVideoRenderer* renderer) {
UNUSED(renderer); UNUSED(renderer);
// Nothing to do // Nothing to do
@ -353,7 +429,7 @@ void GBAVideoDeserialize(struct GBAVideo* video, const struct GBASerializedState
} }
LOAD_32(video->frameCounter, 0, &state->video.frameCounter); LOAD_32(video->frameCounter, 0, &state->video.frameCounter);
video->shouldStall = 0; video->stallMask = 0;
int32_t flags; int32_t flags;
LOAD_32(flags, 0, &state->video.flags); LOAD_32(flags, 0, &state->video.flags);
GBARegisterDISPSTAT dispstat = state->io[GBA_REG(DISPSTAT)]; GBARegisterDISPSTAT dispstat = state->io[GBA_REG(DISPSTAT)];
@ -370,7 +446,7 @@ void GBAVideoDeserialize(struct GBAVideo* video, const struct GBASerializedState
break; break;
case 2: case 2:
video->event.callback = _startHblank; video->event.callback = _startHblank;
video->shouldStall = 1; video->stallMask = _calculateStallMask(video->p);
break; break;
case 3: case 3:
video->event.callback = _startHdraw; video->event.callback = _startHdraw;