Merge branch 'optimization/idle-loop-detection'

This commit is contained in:
Jeffrey Pfau 2015-01-28 22:34:06 -08:00
commit 41559813b7
11 changed files with 170 additions and 9 deletions

View File

@ -18,6 +18,7 @@ Features:
- Support IPv6
- Save directory of last loaded file
- Support BPS patches
- Automatically detect and optimize out idle loops
- Configurable game overrides
- Support loading 7-Zip files
- Drag and drop game loading

View File

@ -212,6 +212,18 @@ void GBAConfigMap(const struct GBAConfig* config, struct GBAOptions* opts) {
_lookupIntValue(config, "fullscreen", &opts->fullscreen);
_lookupIntValue(config, "width", &opts->width);
_lookupIntValue(config, "height", &opts->height);
char* idleOptimization = 0;
if (_lookupCharValue(config, "idleOptimization", &idleOptimization)) {
if (strcasecmp(idleOptimization, "ignore") == 0) {
opts->idleOptimization = IDLE_LOOP_IGNORE;
} else if (strcasecmp(idleOptimization, "remove") == 0) {
opts->idleOptimization = IDLE_LOOP_REMOVE;
} else if (strcasecmp(idleOptimization, "detect") == 0) {
opts->idleOptimization = IDLE_LOOP_DETECT;
}
free(idleOptimization);
}
}
void GBAConfigLoadDefaults(struct GBAConfig* config, const struct GBAOptions* opts) {
@ -231,6 +243,18 @@ void GBAConfigLoadDefaults(struct GBAConfig* config, const struct GBAOptions* op
ConfigurationSetIntValue(&config->defaultsTable, 0, "height", opts->height);
ConfigurationSetIntValue(&config->defaultsTable, 0, "lockAspectRatio", opts->lockAspectRatio);
ConfigurationSetIntValue(&config->defaultsTable, 0, "resampleVideo", opts->resampleVideo);
switch (opts->idleOptimization) {
case IDLE_LOOP_IGNORE:
ConfigurationSetValue(&config->defaultsTable, 0, "idleOptimization", "ignore");
break;
case IDLE_LOOP_REMOVE:
ConfigurationSetValue(&config->defaultsTable, 0, "idleOptimization", "remove");
break;
case IDLE_LOOP_DETECT:
ConfigurationSetValue(&config->defaultsTable, 0, "idleOptimization", "detect");
break;
}
}
void GBAConfigFreeOpts(struct GBAOptions* opts) {

View File

@ -8,6 +8,8 @@
#include "util/common.h"
#include "gba.h"
#include "util/configuration.h"
struct GBAConfig {
@ -35,6 +37,8 @@ struct GBAOptions {
bool videoSync;
bool audioSync;
enum GBAIdleLoopOptimization idleOptimization;
};
void GBAConfigInit(struct GBAConfig*, const char* port);

View File

@ -567,6 +567,7 @@ void GBAIOWrite32(struct GBA* gba, uint32_t address, uint32_t value) {
}
uint16_t GBAIORead(struct GBA* gba, uint32_t address) {
gba->lastJump = -1; // IO reads need to invalidate detected idle loops
switch (address) {
case REG_TM0CNT_LO:
GBATimerUpdateRegister(gba, 0);

View File

@ -7,12 +7,15 @@
#include "macros.h"
#include "decoder.h"
#include "gba-gpio.h"
#include "gba-io.h"
#include "gba-serialize.h"
#include "hle-bios.h"
#include "util/memory.h"
#define IDLE_LOOP_THRESHOLD 10000
static uint32_t _popcount32(unsigned bits);
static uint32_t _deadbeef[2] = { 0xDEADBEEF, 0xFEEDFACE };
@ -114,18 +117,118 @@ void GBAMemoryReset(struct GBA* gba) {
}
}
static void _analyzeForIdleLoop(struct GBA* gba, struct ARMCore* cpu, uint32_t address) {
struct ARMInstructionInfo info;
uint32_t nextAddress = address;
memset(gba->taintedRegisters, 0, sizeof(gba->taintedRegisters));
if (cpu->executionMode == MODE_THUMB) {
while (true) {
uint16_t opcode;
LOAD_16(opcode, nextAddress & cpu->memory.activeMask, cpu->memory.activeRegion);
ARMDecodeThumb(opcode, &info);
switch (info.branchType) {
case ARM_BRANCH_NONE:
if (info.operandFormat & ARM_OPERAND_MEMORY_2) {
if (info.mnemonic == ARM_MN_STR || gba->taintedRegisters[info.memory.baseReg]) {
gba->idleDetectionStep = -1;
return;
}
uint32_t loadAddress = gba->cachedRegisters[info.memory.baseReg];
uint32_t offset = 0;
if (info.memory.format & ARM_MEMORY_IMMEDIATE_OFFSET) {
offset = info.memory.offset.immediate;
} else if (info.memory.format & ARM_MEMORY_REGISTER_OFFSET) {
int reg = info.memory.offset.reg;
if (gba->cachedRegisters[reg]) {
gba->idleDetectionStep = -1;
return;
}
offset = gba->cachedRegisters[reg];
}
if (info.memory.format & ARM_MEMORY_OFFSET_SUBTRACT) {
loadAddress -= offset;
} else {
loadAddress += offset;
}
if ((loadAddress >> BASE_OFFSET) == REGION_IO) {
gba->idleDetectionStep = -1;
return;
}
if ((loadAddress >> BASE_OFFSET) < REGION_CART0 || (loadAddress >> BASE_OFFSET) > REGION_CART2_EX) {
gba->taintedRegisters[info.op1.reg] = true;
} else {
switch (info.memory.width) {
case 1:
gba->cachedRegisters[info.op1.reg] = GBALoad8(cpu, loadAddress, 0);
break;
case 2:
gba->cachedRegisters[info.op1.reg] = GBALoad16(cpu, loadAddress, 0);
break;
case 4:
gba->cachedRegisters[info.op1.reg] = GBALoad32(cpu, loadAddress, 0);
break;
}
}
} else if (info.operandFormat & ARM_OPERAND_AFFECTED_1) {
gba->taintedRegisters[info.op1.reg] = true;
}
nextAddress += WORD_SIZE_THUMB;
break;
case ARM_BRANCH:
if ((uint32_t) info.op1.immediate + nextAddress + WORD_SIZE_THUMB * 2 == address) {
gba->idleLoop = address;
gba->idleOptimization = IDLE_LOOP_REMOVE;
}
gba->idleDetectionStep = -1;
return;
default:
gba->idleDetectionStep = -1;
return;
}
}
} else {
gba->idleDetectionStep = -1;
}
}
static void GBASetActiveRegion(struct ARMCore* cpu, uint32_t address) {
struct GBA* gba = (struct GBA*) cpu->master;
struct GBAMemory* memory = &gba->memory;
if (address == gba->busyLoop && memory->activeRegion != REGION_BIOS) {
GBAHalt(gba);
int newRegion = address >> BASE_OFFSET;
if (gba->idleOptimization >= IDLE_LOOP_REMOVE && memory->activeRegion != REGION_BIOS) {
if (address == gba->lastJump && address == gba->idleLoop) {
GBAHalt(gba);
} else if (gba->idleOptimization >= IDLE_LOOP_DETECT && newRegion == memory->activeRegion) {
if (address == gba->lastJump) {
switch (gba->idleDetectionStep) {
case 0:
memcpy(gba->cachedRegisters, cpu->gprs, sizeof(gba->cachedRegisters));
++gba->idleDetectionStep;
break;
case 1:
if (memcmp(gba->cachedRegisters, cpu->gprs, sizeof(gba->cachedRegisters))) {
gba->idleDetectionStep = -1;
++gba->idleDetectionFailures;
if (gba->idleDetectionFailures > IDLE_LOOP_THRESHOLD) {
gba->idleOptimization = IDLE_LOOP_IGNORE;
}
break;
}
_analyzeForIdleLoop(gba, cpu, address);
break;
}
} else {
gba->idleDetectionStep = 0;
}
}
}
int newRegion = address >> BASE_OFFSET;
gba->lastJump = address;
if (newRegion == memory->activeRegion) {
return;
}
if (memory->activeRegion == REGION_BIOS) {
memory->biosPrefetch = cpu->prefetch[1];
}

View File

@ -26,7 +26,7 @@ static const struct GBACartridgeOverride _overrides[] = {
{ "V49E", SAVEDATA_SRAM, GPIO_RUMBLE, -1 },
// Final Fantasy Tactics Advance
{ "AFXE", SAVEDATA_FLASH512, GPIO_NONE, 0x8000418 },
{ "AFXE", SAVEDATA_FLASH512, GPIO_NONE, 0x8000428 },
// Koro Koro Puzzle - Happy Panechu!
{ "KHPJ", SAVEDATA_EEPROM, GPIO_TILT, -1 },
@ -240,6 +240,9 @@ void GBAOverrideApply(struct GBA* gba, const struct GBACartridgeOverride* overri
}
if (override->idleLoop != 0xFFFFFFFF) {
gba->busyLoop = override->idleLoop;
gba->idleLoop = override->idleLoop;
if (gba->idleOptimization == IDLE_LOOP_DETECT) {
gba->idleOptimization = IDLE_LOOP_REMOVE;
}
}
}

View File

@ -120,6 +120,7 @@ static THREAD_ENTRY _GBAThreadRun(void* context) {
gba.sync = &threadContext->sync;
threadContext->gba = &gba;
gba.logLevel = threadContext->logLevel;
gba.idleOptimization = threadContext->idleOptimization;
#ifdef USE_PTHREADS
pthread_setspecific(_contextKey, threadContext);
#else
@ -260,6 +261,8 @@ void GBAMapOptionsToContext(const struct GBAOptions* opts, struct GBAThread* thr
if (opts->audioBuffers) {
threadContext->audioBuffers = opts->audioBuffers;
}
threadContext->idleOptimization = opts->idleOptimization;
}
void GBAMapArgumentsToContext(const struct GBAArguments* args, struct GBAThread* threadContext) {

View File

@ -72,6 +72,7 @@ struct GBAThread {
int activeKeys;
struct GBAAVStream* stream;
struct Configuration* overrides;
enum GBAIdleLoopOptimization idleOptimization;
bool hasOverride;
struct GBACartridgeOverride override;

View File

@ -76,7 +76,11 @@ static void GBAInit(struct ARMCore* cpu, struct ARMComponent* component) {
gba->biosChecksum = GBAChecksum(gba->memory.bios, SIZE_BIOS);
gba->busyLoop = -1;
gba->idleOptimization = IDLE_LOOP_REMOVE;
gba->idleLoop = -1;
gba->lastJump = 0;
gba->idleDetectionStep = 0;
gba->idleDetectionFailures = 0;
gba->performingDMA = false;
}

View File

@ -75,6 +75,12 @@ enum GBAComponent {
GBA_COMPONENT_MAX
};
enum GBAIdleLoopOptimization {
IDLE_LOOP_IGNORE = -1,
IDLE_LOOP_REMOVE = 0,
IDLE_LOOP_DETECT
};
enum {
SP_BASE_SYSTEM = 0x03007F00,
SP_BASE_IRQ = 0x03007FA0,
@ -120,7 +126,6 @@ struct GBA {
int springIRQ;
uint32_t biosChecksum;
int* keySource;
uint32_t busyLoop;
struct GBARotationSource* rotationSource;
struct GBALuminanceSource* luminanceSource;
struct GBARTCSource* rtcSource;
@ -136,6 +141,14 @@ struct GBA {
const char* activeFile;
int logLevel;
enum GBAIdleLoopOptimization idleOptimization;
uint32_t idleLoop;
uint32_t lastJump;
int idleDetectionStep;
int idleDetectionFailures;
int32_t cachedRegisters[16];
bool taintedRegisters[16];
};
struct GBACartridge {

View File

@ -56,7 +56,11 @@ int main(int argc, char** argv) {
GBAConfigInit(&config, "perf");
GBAConfigLoad(&config);
struct GBAOptions opts = {};
struct GBAOptions opts = {
.idleOptimization = IDLE_LOOP_DETECT
};
GBAConfigLoadDefaults(&config, &opts);
struct GBAArguments args = {};
if (!parseArguments(&args, &config, argc, argv, &subparser)) {
usage(argv[0], PERF_USAGE);
@ -69,7 +73,7 @@ int main(int argc, char** argv) {
renderer.outputBuffer = malloc(256 * 256 * 4);
renderer.outputBufferStride = 256;
struct GBAThread context = { };
struct GBAThread context = {};
_thread = &context;
if (!perfOpts.noVideo) {