mirror of https://github.com/mgba-emu/mgba.git
Merge branch 'optimization/idle-loop-detection'
This commit is contained in:
commit
41559813b7
1
CHANGES
1
CHANGES
|
@ -18,6 +18,7 @@ Features:
|
|||
- Support IPv6
|
||||
- Save directory of last loaded file
|
||||
- Support BPS patches
|
||||
- Automatically detect and optimize out idle loops
|
||||
- Configurable game overrides
|
||||
- Support loading 7-Zip files
|
||||
- Drag and drop game loading
|
||||
|
|
|
@ -212,6 +212,18 @@ void GBAConfigMap(const struct GBAConfig* config, struct GBAOptions* opts) {
|
|||
_lookupIntValue(config, "fullscreen", &opts->fullscreen);
|
||||
_lookupIntValue(config, "width", &opts->width);
|
||||
_lookupIntValue(config, "height", &opts->height);
|
||||
|
||||
char* idleOptimization = 0;
|
||||
if (_lookupCharValue(config, "idleOptimization", &idleOptimization)) {
|
||||
if (strcasecmp(idleOptimization, "ignore") == 0) {
|
||||
opts->idleOptimization = IDLE_LOOP_IGNORE;
|
||||
} else if (strcasecmp(idleOptimization, "remove") == 0) {
|
||||
opts->idleOptimization = IDLE_LOOP_REMOVE;
|
||||
} else if (strcasecmp(idleOptimization, "detect") == 0) {
|
||||
opts->idleOptimization = IDLE_LOOP_DETECT;
|
||||
}
|
||||
free(idleOptimization);
|
||||
}
|
||||
}
|
||||
|
||||
void GBAConfigLoadDefaults(struct GBAConfig* config, const struct GBAOptions* opts) {
|
||||
|
@ -231,6 +243,18 @@ void GBAConfigLoadDefaults(struct GBAConfig* config, const struct GBAOptions* op
|
|||
ConfigurationSetIntValue(&config->defaultsTable, 0, "height", opts->height);
|
||||
ConfigurationSetIntValue(&config->defaultsTable, 0, "lockAspectRatio", opts->lockAspectRatio);
|
||||
ConfigurationSetIntValue(&config->defaultsTable, 0, "resampleVideo", opts->resampleVideo);
|
||||
|
||||
switch (opts->idleOptimization) {
|
||||
case IDLE_LOOP_IGNORE:
|
||||
ConfigurationSetValue(&config->defaultsTable, 0, "idleOptimization", "ignore");
|
||||
break;
|
||||
case IDLE_LOOP_REMOVE:
|
||||
ConfigurationSetValue(&config->defaultsTable, 0, "idleOptimization", "remove");
|
||||
break;
|
||||
case IDLE_LOOP_DETECT:
|
||||
ConfigurationSetValue(&config->defaultsTable, 0, "idleOptimization", "detect");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void GBAConfigFreeOpts(struct GBAOptions* opts) {
|
||||
|
|
|
@ -8,6 +8,8 @@
|
|||
|
||||
#include "util/common.h"
|
||||
|
||||
#include "gba.h"
|
||||
|
||||
#include "util/configuration.h"
|
||||
|
||||
struct GBAConfig {
|
||||
|
@ -35,6 +37,8 @@ struct GBAOptions {
|
|||
|
||||
bool videoSync;
|
||||
bool audioSync;
|
||||
|
||||
enum GBAIdleLoopOptimization idleOptimization;
|
||||
};
|
||||
|
||||
void GBAConfigInit(struct GBAConfig*, const char* port);
|
||||
|
|
|
@ -567,6 +567,7 @@ void GBAIOWrite32(struct GBA* gba, uint32_t address, uint32_t value) {
|
|||
}
|
||||
|
||||
uint16_t GBAIORead(struct GBA* gba, uint32_t address) {
|
||||
gba->lastJump = -1; // IO reads need to invalidate detected idle loops
|
||||
switch (address) {
|
||||
case REG_TM0CNT_LO:
|
||||
GBATimerUpdateRegister(gba, 0);
|
||||
|
|
|
@ -7,12 +7,15 @@
|
|||
|
||||
#include "macros.h"
|
||||
|
||||
#include "decoder.h"
|
||||
#include "gba-gpio.h"
|
||||
#include "gba-io.h"
|
||||
#include "gba-serialize.h"
|
||||
#include "hle-bios.h"
|
||||
#include "util/memory.h"
|
||||
|
||||
#define IDLE_LOOP_THRESHOLD 10000
|
||||
|
||||
static uint32_t _popcount32(unsigned bits);
|
||||
static uint32_t _deadbeef[2] = { 0xDEADBEEF, 0xFEEDFACE };
|
||||
|
||||
|
@ -114,18 +117,118 @@ void GBAMemoryReset(struct GBA* gba) {
|
|||
}
|
||||
}
|
||||
|
||||
static void _analyzeForIdleLoop(struct GBA* gba, struct ARMCore* cpu, uint32_t address) {
|
||||
struct ARMInstructionInfo info;
|
||||
uint32_t nextAddress = address;
|
||||
memset(gba->taintedRegisters, 0, sizeof(gba->taintedRegisters));
|
||||
if (cpu->executionMode == MODE_THUMB) {
|
||||
while (true) {
|
||||
uint16_t opcode;
|
||||
LOAD_16(opcode, nextAddress & cpu->memory.activeMask, cpu->memory.activeRegion);
|
||||
ARMDecodeThumb(opcode, &info);
|
||||
switch (info.branchType) {
|
||||
case ARM_BRANCH_NONE:
|
||||
if (info.operandFormat & ARM_OPERAND_MEMORY_2) {
|
||||
if (info.mnemonic == ARM_MN_STR || gba->taintedRegisters[info.memory.baseReg]) {
|
||||
gba->idleDetectionStep = -1;
|
||||
return;
|
||||
}
|
||||
uint32_t loadAddress = gba->cachedRegisters[info.memory.baseReg];
|
||||
uint32_t offset = 0;
|
||||
if (info.memory.format & ARM_MEMORY_IMMEDIATE_OFFSET) {
|
||||
offset = info.memory.offset.immediate;
|
||||
} else if (info.memory.format & ARM_MEMORY_REGISTER_OFFSET) {
|
||||
int reg = info.memory.offset.reg;
|
||||
if (gba->cachedRegisters[reg]) {
|
||||
gba->idleDetectionStep = -1;
|
||||
return;
|
||||
}
|
||||
offset = gba->cachedRegisters[reg];
|
||||
}
|
||||
if (info.memory.format & ARM_MEMORY_OFFSET_SUBTRACT) {
|
||||
loadAddress -= offset;
|
||||
} else {
|
||||
loadAddress += offset;
|
||||
}
|
||||
if ((loadAddress >> BASE_OFFSET) == REGION_IO) {
|
||||
gba->idleDetectionStep = -1;
|
||||
return;
|
||||
}
|
||||
if ((loadAddress >> BASE_OFFSET) < REGION_CART0 || (loadAddress >> BASE_OFFSET) > REGION_CART2_EX) {
|
||||
gba->taintedRegisters[info.op1.reg] = true;
|
||||
} else {
|
||||
switch (info.memory.width) {
|
||||
case 1:
|
||||
gba->cachedRegisters[info.op1.reg] = GBALoad8(cpu, loadAddress, 0);
|
||||
break;
|
||||
case 2:
|
||||
gba->cachedRegisters[info.op1.reg] = GBALoad16(cpu, loadAddress, 0);
|
||||
break;
|
||||
case 4:
|
||||
gba->cachedRegisters[info.op1.reg] = GBALoad32(cpu, loadAddress, 0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (info.operandFormat & ARM_OPERAND_AFFECTED_1) {
|
||||
gba->taintedRegisters[info.op1.reg] = true;
|
||||
}
|
||||
nextAddress += WORD_SIZE_THUMB;
|
||||
break;
|
||||
case ARM_BRANCH:
|
||||
if ((uint32_t) info.op1.immediate + nextAddress + WORD_SIZE_THUMB * 2 == address) {
|
||||
gba->idleLoop = address;
|
||||
gba->idleOptimization = IDLE_LOOP_REMOVE;
|
||||
}
|
||||
gba->idleDetectionStep = -1;
|
||||
return;
|
||||
default:
|
||||
gba->idleDetectionStep = -1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
gba->idleDetectionStep = -1;
|
||||
}
|
||||
}
|
||||
|
||||
static void GBASetActiveRegion(struct ARMCore* cpu, uint32_t address) {
|
||||
struct GBA* gba = (struct GBA*) cpu->master;
|
||||
struct GBAMemory* memory = &gba->memory;
|
||||
|
||||
if (address == gba->busyLoop && memory->activeRegion != REGION_BIOS) {
|
||||
GBAHalt(gba);
|
||||
int newRegion = address >> BASE_OFFSET;
|
||||
if (gba->idleOptimization >= IDLE_LOOP_REMOVE && memory->activeRegion != REGION_BIOS) {
|
||||
if (address == gba->lastJump && address == gba->idleLoop) {
|
||||
GBAHalt(gba);
|
||||
} else if (gba->idleOptimization >= IDLE_LOOP_DETECT && newRegion == memory->activeRegion) {
|
||||
if (address == gba->lastJump) {
|
||||
switch (gba->idleDetectionStep) {
|
||||
case 0:
|
||||
memcpy(gba->cachedRegisters, cpu->gprs, sizeof(gba->cachedRegisters));
|
||||
++gba->idleDetectionStep;
|
||||
break;
|
||||
case 1:
|
||||
if (memcmp(gba->cachedRegisters, cpu->gprs, sizeof(gba->cachedRegisters))) {
|
||||
gba->idleDetectionStep = -1;
|
||||
++gba->idleDetectionFailures;
|
||||
if (gba->idleDetectionFailures > IDLE_LOOP_THRESHOLD) {
|
||||
gba->idleOptimization = IDLE_LOOP_IGNORE;
|
||||
}
|
||||
break;
|
||||
}
|
||||
_analyzeForIdleLoop(gba, cpu, address);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
gba->idleDetectionStep = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int newRegion = address >> BASE_OFFSET;
|
||||
gba->lastJump = address;
|
||||
if (newRegion == memory->activeRegion) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (memory->activeRegion == REGION_BIOS) {
|
||||
memory->biosPrefetch = cpu->prefetch[1];
|
||||
}
|
||||
|
|
|
@ -26,7 +26,7 @@ static const struct GBACartridgeOverride _overrides[] = {
|
|||
{ "V49E", SAVEDATA_SRAM, GPIO_RUMBLE, -1 },
|
||||
|
||||
// Final Fantasy Tactics Advance
|
||||
{ "AFXE", SAVEDATA_FLASH512, GPIO_NONE, 0x8000418 },
|
||||
{ "AFXE", SAVEDATA_FLASH512, GPIO_NONE, 0x8000428 },
|
||||
|
||||
// Koro Koro Puzzle - Happy Panechu!
|
||||
{ "KHPJ", SAVEDATA_EEPROM, GPIO_TILT, -1 },
|
||||
|
@ -240,6 +240,9 @@ void GBAOverrideApply(struct GBA* gba, const struct GBACartridgeOverride* overri
|
|||
}
|
||||
|
||||
if (override->idleLoop != 0xFFFFFFFF) {
|
||||
gba->busyLoop = override->idleLoop;
|
||||
gba->idleLoop = override->idleLoop;
|
||||
if (gba->idleOptimization == IDLE_LOOP_DETECT) {
|
||||
gba->idleOptimization = IDLE_LOOP_REMOVE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -120,6 +120,7 @@ static THREAD_ENTRY _GBAThreadRun(void* context) {
|
|||
gba.sync = &threadContext->sync;
|
||||
threadContext->gba = &gba;
|
||||
gba.logLevel = threadContext->logLevel;
|
||||
gba.idleOptimization = threadContext->idleOptimization;
|
||||
#ifdef USE_PTHREADS
|
||||
pthread_setspecific(_contextKey, threadContext);
|
||||
#else
|
||||
|
@ -260,6 +261,8 @@ void GBAMapOptionsToContext(const struct GBAOptions* opts, struct GBAThread* thr
|
|||
if (opts->audioBuffers) {
|
||||
threadContext->audioBuffers = opts->audioBuffers;
|
||||
}
|
||||
|
||||
threadContext->idleOptimization = opts->idleOptimization;
|
||||
}
|
||||
|
||||
void GBAMapArgumentsToContext(const struct GBAArguments* args, struct GBAThread* threadContext) {
|
||||
|
|
|
@ -72,6 +72,7 @@ struct GBAThread {
|
|||
int activeKeys;
|
||||
struct GBAAVStream* stream;
|
||||
struct Configuration* overrides;
|
||||
enum GBAIdleLoopOptimization idleOptimization;
|
||||
|
||||
bool hasOverride;
|
||||
struct GBACartridgeOverride override;
|
||||
|
|
|
@ -76,7 +76,11 @@ static void GBAInit(struct ARMCore* cpu, struct ARMComponent* component) {
|
|||
|
||||
gba->biosChecksum = GBAChecksum(gba->memory.bios, SIZE_BIOS);
|
||||
|
||||
gba->busyLoop = -1;
|
||||
gba->idleOptimization = IDLE_LOOP_REMOVE;
|
||||
gba->idleLoop = -1;
|
||||
gba->lastJump = 0;
|
||||
gba->idleDetectionStep = 0;
|
||||
gba->idleDetectionFailures = 0;
|
||||
gba->performingDMA = false;
|
||||
}
|
||||
|
||||
|
|
|
@ -75,6 +75,12 @@ enum GBAComponent {
|
|||
GBA_COMPONENT_MAX
|
||||
};
|
||||
|
||||
enum GBAIdleLoopOptimization {
|
||||
IDLE_LOOP_IGNORE = -1,
|
||||
IDLE_LOOP_REMOVE = 0,
|
||||
IDLE_LOOP_DETECT
|
||||
};
|
||||
|
||||
enum {
|
||||
SP_BASE_SYSTEM = 0x03007F00,
|
||||
SP_BASE_IRQ = 0x03007FA0,
|
||||
|
@ -120,7 +126,6 @@ struct GBA {
|
|||
int springIRQ;
|
||||
uint32_t biosChecksum;
|
||||
int* keySource;
|
||||
uint32_t busyLoop;
|
||||
struct GBARotationSource* rotationSource;
|
||||
struct GBALuminanceSource* luminanceSource;
|
||||
struct GBARTCSource* rtcSource;
|
||||
|
@ -136,6 +141,14 @@ struct GBA {
|
|||
const char* activeFile;
|
||||
|
||||
int logLevel;
|
||||
|
||||
enum GBAIdleLoopOptimization idleOptimization;
|
||||
uint32_t idleLoop;
|
||||
uint32_t lastJump;
|
||||
int idleDetectionStep;
|
||||
int idleDetectionFailures;
|
||||
int32_t cachedRegisters[16];
|
||||
bool taintedRegisters[16];
|
||||
};
|
||||
|
||||
struct GBACartridge {
|
||||
|
|
|
@ -56,7 +56,11 @@ int main(int argc, char** argv) {
|
|||
GBAConfigInit(&config, "perf");
|
||||
GBAConfigLoad(&config);
|
||||
|
||||
struct GBAOptions opts = {};
|
||||
struct GBAOptions opts = {
|
||||
.idleOptimization = IDLE_LOOP_DETECT
|
||||
};
|
||||
GBAConfigLoadDefaults(&config, &opts);
|
||||
|
||||
struct GBAArguments args = {};
|
||||
if (!parseArguments(&args, &config, argc, argv, &subparser)) {
|
||||
usage(argv[0], PERF_USAGE);
|
||||
|
@ -69,7 +73,7 @@ int main(int argc, char** argv) {
|
|||
renderer.outputBuffer = malloc(256 * 256 * 4);
|
||||
renderer.outputBufferStride = 256;
|
||||
|
||||
struct GBAThread context = { };
|
||||
struct GBAThread context = {};
|
||||
_thread = &context;
|
||||
|
||||
if (!perfOpts.noVideo) {
|
||||
|
|
Loading…
Reference in New Issue