GBA Memory: Improve VRAM access stall cycle estimation

This commit is contained in:
Vicki Pfau 2024-06-17 02:27:31 -07:00
parent 458300b02e
commit 2ea11feda6
4 changed files with 151 additions and 24 deletions

View File

@ -35,6 +35,7 @@ Misc:
- GB Serialize: Add missing savestate support for MBC6 and NT (newer)
- GBA: Improve detection of valid ELF ROMs
- GBA Audio: Remove broken XQ audio pending rewrite
- GBA Memory: Improve VRAM access stall cycle estimation
- GBA Video: Add special circlular window handling in OpenGL renderer
- Libretro: Add Super Game Boy Color support (closes mgba.io/i/3188)
- mGUI: Enable auto-softpatching (closes mgba.io/i/2899)

View File

@ -16,6 +16,12 @@ CXX_GUARD_START
mLOG_DECLARE_CATEGORY(GBA_VIDEO);
#define GBA_VSTALL_T4(X) (0x011 << (X))
#define GBA_VSTALL_T8(X) (0x010 << (X))
#define GBA_VSTALL_A2 0x100
#define GBA_VSTALL_A3 0x200
#define GBA_VSTALL_B 0x400
enum {
VIDEO_HBLANK_PIXELS = 68,
VIDEO_HDRAW_LENGTH = 1008,
@ -208,7 +214,7 @@ struct GBAVideo {
struct mTimingEvent event;
int vcount;
int shouldStall;
unsigned stallMask;
uint16_t palette[512];
uint16_t* vram;

View File

@ -401,7 +401,7 @@ static void GBASetActiveRegion(struct ARMCore* cpu, uint32_t address) {
LOAD_32(value, address & 0x0001FFFC, gba->video.vram); \
} \
++wait; \
if (gba->video.shouldStall && (address & 0x0001FFFF) < ((GBARegisterDISPCNTGetMode(gba->memory.io[GBA_REG(DISPCNT)]) >= 3) ? 0x00014000 : 0x00010000)) { \
if (gba->video.stallMask && (address & 0x0001FFFF) < ((GBARegisterDISPCNTGetMode(gba->memory.io[GBA_REG(DISPCNT)]) >= 3) ? 0x00014000 : 0x00010000)) { \
wait += GBAMemoryStallVRAM(gba, wait, 1); \
}
@ -561,7 +561,7 @@ uint32_t GBALoad16(struct ARMCore* cpu, uint32_t address, int* cycleCounter) {
} else {
LOAD_16(value, address & 0x0001FFFE, gba->video.vram);
}
if (gba->video.shouldStall && (address & 0x0001FFFF) < ((GBARegisterDISPCNTGetMode(gba->memory.io[GBA_REG(DISPCNT)]) >= 3) ? 0x00014000 : 0x00010000)) {
if (gba->video.stallMask && (address & 0x0001FFFF) < ((GBARegisterDISPCNTGetMode(gba->memory.io[GBA_REG(DISPCNT)]) >= 3) ? 0x00014000 : 0x00010000)) {
wait += GBAMemoryStallVRAM(gba, wait, 0);
}
break;
@ -676,7 +676,7 @@ uint32_t GBALoad8(struct ARMCore* cpu, uint32_t address, int* cycleCounter) {
} else {
value = ((uint8_t*) gba->video.vram)[address & 0x0001FFFF];
}
if (gba->video.shouldStall) {
if (gba->video.stallMask) {
wait += GBAMemoryStallVRAM(gba, wait, 0);
}
break;
@ -781,7 +781,7 @@ uint32_t GBALoad8(struct ARMCore* cpu, uint32_t address, int* cycleCounter) {
} \
} \
++wait; \
if (gba->video.shouldStall && (address & 0x0001FFFF) < ((GBARegisterDISPCNTGetMode(gba->memory.io[GBA_REG(DISPCNT)]) >= 3) ? 0x00014000 : 0x00010000)) { \
if (gba->video.stallMask && (address & 0x0001FFFF) < ((GBARegisterDISPCNTGetMode(gba->memory.io[GBA_REG(DISPCNT)]) >= 3) ? 0x00014000 : 0x00010000)) { \
wait += GBAMemoryStallVRAM(gba, wait, 1); \
}
@ -908,7 +908,7 @@ void GBAStore16(struct ARMCore* cpu, uint32_t address, int16_t value, int* cycle
gba->video.renderer->writeVRAM(gba->video.renderer, address & 0x0001FFFE);
}
}
if (gba->video.shouldStall && (address & 0x0001FFFF) < ((GBARegisterDISPCNTGetMode(gba->memory.io[GBA_REG(DISPCNT)]) >= 3) ? 0x00014000 : 0x00010000)) {
if (gba->video.stallMask && (address & 0x0001FFFF) < ((GBARegisterDISPCNTGetMode(gba->memory.io[GBA_REG(DISPCNT)]) >= 3) ? 0x00014000 : 0x00010000)) {
wait += GBAMemoryStallVRAM(gba, wait, 0);
}
break;
@ -1038,7 +1038,7 @@ void GBAStore8(struct ARMCore* cpu, uint32_t address, int8_t value, int* cycleCo
gba->video.renderer->vram[(address & 0x1FFFE) >> 1] = ((uint8_t) value) | (value << 8);
gba->video.renderer->writeVRAM(gba->video.renderer, address & 0x0001FFFE);
}
if (gba->video.shouldStall) {
if (gba->video.stallMask) {
wait += GBAMemoryStallVRAM(gba, wait, 0);
}
break;
@ -1778,20 +1778,64 @@ int32_t GBAMemoryStall(struct ARMCore* cpu, int32_t wait) {
}
int32_t GBAMemoryStallVRAM(struct GBA* gba, int32_t wait, int extra) {
UNUSED(extra);
// TODO
uint16_t dispcnt = gba->memory.io[GBA_REG(DISPCNT)];
int32_t stall = 0;
switch (GBARegisterDISPCNTGetMode(dispcnt)) {
case 2:
if (GBARegisterDISPCNTIsBg2Enable(dispcnt) && GBARegisterDISPCNTIsBg3Enable(dispcnt)) {
// If both backgrounds are enabled, VRAM access is entirely blocked during hdraw
stall = mTimingUntil(&gba->timing, &gba->video.event);
static const uint16_t stallLUT[32] = {
GBA_VSTALL_T4(0) | GBA_VSTALL_A3,
GBA_VSTALL_T4(1) | GBA_VSTALL_A3,
GBA_VSTALL_T4(2) | GBA_VSTALL_A2,
GBA_VSTALL_T4(3) | GBA_VSTALL_A3 | GBA_VSTALL_B,
GBA_VSTALL_T4(0) | GBA_VSTALL_A3,
GBA_VSTALL_T4(1) | GBA_VSTALL_A3,
GBA_VSTALL_T4(2) | GBA_VSTALL_A2,
GBA_VSTALL_T4(3) | GBA_VSTALL_A3 | GBA_VSTALL_B,
GBA_VSTALL_A3,
GBA_VSTALL_A3,
GBA_VSTALL_A2,
GBA_VSTALL_A3 | GBA_VSTALL_B,
GBA_VSTALL_T8(0) | GBA_VSTALL_A3,
GBA_VSTALL_T8(1) | GBA_VSTALL_A3,
GBA_VSTALL_T8(2) | GBA_VSTALL_A2,
GBA_VSTALL_T8(3) | GBA_VSTALL_A3 | GBA_VSTALL_B,
GBA_VSTALL_A3,
GBA_VSTALL_A3,
GBA_VSTALL_A2,
GBA_VSTALL_A3 | GBA_VSTALL_B,
GBA_VSTALL_T4(0) | GBA_VSTALL_A3,
GBA_VSTALL_T4(1) | GBA_VSTALL_A3,
GBA_VSTALL_T4(2) | GBA_VSTALL_A2,
GBA_VSTALL_T4(3) | GBA_VSTALL_A3 | GBA_VSTALL_B,
GBA_VSTALL_A3,
GBA_VSTALL_A3,
GBA_VSTALL_A2,
GBA_VSTALL_A3 | GBA_VSTALL_B,
GBA_VSTALL_T8(0) | GBA_VSTALL_A3,
GBA_VSTALL_T8(1) | GBA_VSTALL_A3,
GBA_VSTALL_T8(2) | GBA_VSTALL_A2,
GBA_VSTALL_T8(3) | GBA_VSTALL_A3 | GBA_VSTALL_B,
};
int32_t until = mTimingUntil(&gba->timing, &gba->video.event);
int period = -until & 0x1F;
int32_t stall = until;
int i;
for (i = 0; i < 16; ++i) {
if (!(stallLUT[(period + i) & 0x1F] & gba->video.stallMask)) {
if (!extra) {
stall = i;
break;
}
--extra;
}
break;
default:
return 0;
}
stall -= wait;
if (stall < 0) {
return 0;

View File

@ -32,6 +32,7 @@ static void GBAVideoDummyRendererPutPixels(struct GBAVideoRenderer* renderer, si
static void _startHblank(struct mTiming*, void* context, uint32_t cyclesLate);
static void _startHdraw(struct mTiming*, void* context, uint32_t cyclesLate);
static unsigned _calculateStallMask(struct GBA* gba);
MGBA_EXPORT const int GBAVideoObjSizes[16][2] = {
{ 8, 8 },
@ -78,7 +79,7 @@ void GBAVideoReset(struct GBAVideo* video) {
video->frameCounter = 0;
video->frameskipCounter = 0;
video->shouldStall = 0;
video->stallMask = 0;
memset(video->palette, 0, sizeof(video->palette));
memset(video->oam.raw, 0, sizeof(video->oam.raw));
@ -149,7 +150,7 @@ void _startHdraw(struct mTiming* timing, void* context, uint32_t cyclesLate) {
video->p->memory.io[GBA_REG(VCOUNT)] = video->vcount;
if (video->vcount < GBA_VIDEO_VERTICAL_PIXELS) {
video->shouldStall = 1;
video->stallMask = _calculateStallMask(video->p);
}
GBARegisterDISPSTAT dispstat = video->p->memory.io[GBA_REG(DISPSTAT)];
@ -214,7 +215,7 @@ void _startHblank(struct mTiming* timing, void* context, uint32_t cyclesLate) {
if (GBARegisterDISPSTATIsHblankIRQ(dispstat)) {
GBARaiseIRQ(video->p, GBA_IRQ_HBLANK, cyclesLate - 6); // TODO: Where does this fudge factor come from?
}
video->shouldStall = 0;
video->stallMask = 0;
video->p->memory.io[GBA_REG(DISPSTAT)] = dispstat;
}
@ -224,6 +225,81 @@ void GBAVideoWriteDISPSTAT(struct GBAVideo* video, uint16_t value) {
// TODO: Does a VCounter IRQ trigger on write?
}
static unsigned _calculateStallMask(struct GBA* gba) {
unsigned mask = 0;
unsigned dispcnt = gba->memory.io[GBA_REG(DISPCNT)];
switch (GBARegisterDISPCNTGetMode(dispcnt)) {
case 0:
if (GBARegisterDISPCNTIsBg0Enable(dispcnt)) {
if (GBARegisterBGCNTIs256Color(gba->memory.io[GBA_REG(BG0CNT)])) {
mask |= GBA_VSTALL_T8(0);
} else {
mask |= GBA_VSTALL_T4(0);
}
}
if (GBARegisterDISPCNTIsBg1Enable(dispcnt)) {
if (GBARegisterBGCNTIs256Color(gba->memory.io[GBA_REG(BG1CNT)])) {
mask |= GBA_VSTALL_T8(1);
} else {
mask |= GBA_VSTALL_T4(1);
}
}
if (GBARegisterDISPCNTIsBg2Enable(dispcnt)) {
if (GBARegisterBGCNTIs256Color(gba->memory.io[GBA_REG(BG2CNT)])) {
mask |= GBA_VSTALL_T8(2);
} else {
mask |= GBA_VSTALL_T4(2);
}
}
if (GBARegisterDISPCNTIsBg3Enable(dispcnt)) {
if (GBARegisterBGCNTIs256Color(gba->memory.io[GBA_REG(BG3CNT)])) {
mask |= GBA_VSTALL_T8(3);
} else {
mask |= GBA_VSTALL_T4(3);
}
}
break;
case 1:
if (GBARegisterDISPCNTIsBg0Enable(dispcnt)) {
if (GBARegisterBGCNTIs256Color(gba->memory.io[GBA_REG(BG0CNT)])) {
mask |= GBA_VSTALL_T8(0);
} else {
mask |= GBA_VSTALL_T4(0);
}
}
if (GBARegisterDISPCNTIsBg1Enable(dispcnt)) {
if (GBARegisterBGCNTIs256Color(gba->memory.io[GBA_REG(BG1CNT)])) {
mask |= GBA_VSTALL_T8(1);
} else {
mask |= GBA_VSTALL_T4(1);
}
}
if (GBARegisterDISPCNTIsBg2Enable(dispcnt)) {
mask |= GBA_VSTALL_A2;
}
break;
case 2:
if (GBARegisterDISPCNTIsBg2Enable(dispcnt)) {
mask |= GBA_VSTALL_A2;
}
if (GBARegisterDISPCNTIsBg3Enable(dispcnt)) {
mask |= GBA_VSTALL_A3;
}
break;
case 3:
case 4:
case 5:
if (GBARegisterDISPCNTIsBg2Enable(dispcnt)) {
mask |= GBA_VSTALL_B;
}
break;
default:
break;
}
return mask;
}
static void GBAVideoDummyRendererInit(struct GBAVideoRenderer* renderer) {
UNUSED(renderer);
// Nothing to do
@ -353,7 +429,7 @@ void GBAVideoDeserialize(struct GBAVideo* video, const struct GBASerializedState
}
LOAD_32(video->frameCounter, 0, &state->video.frameCounter);
video->shouldStall = 0;
video->stallMask = 0;
int32_t flags;
LOAD_32(flags, 0, &state->video.flags);
GBARegisterDISPSTAT dispstat = state->io[GBA_REG(DISPSTAT)];
@ -370,7 +446,7 @@ void GBAVideoDeserialize(struct GBAVideo* video, const struct GBASerializedState
break;
case 2:
video->event.callback = _startHblank;
video->shouldStall = 1;
video->stallMask = _calculateStallMask(video->p);
break;
case 3:
video->event.callback = _startHdraw;