mirror of https://github.com/mgba-emu/mgba.git
ARM Dynarec: Add loop folding
This commit is contained in:
parent
361d9ceeec
commit
bc48dc84b0
|
@ -8,6 +8,7 @@
|
||||||
#include "arm/isa-thumb.h"
|
#include "arm/isa-thumb.h"
|
||||||
|
|
||||||
#define OP_ADDI 0x02800000
|
#define OP_ADDI 0x02800000
|
||||||
|
#define OP_B 0x0A000000
|
||||||
#define OP_BL 0x0B000000
|
#define OP_BL 0x0B000000
|
||||||
#define OP_CMP 0x01500000
|
#define OP_CMP 0x01500000
|
||||||
#define OP_LDMIA 0x08900000
|
#define OP_LDMIA 0x08900000
|
||||||
|
@ -45,6 +46,13 @@ static uint32_t emitADDI(unsigned dst, unsigned src, unsigned imm) {
|
||||||
return OP_ADDI | calculateAddrMode1(imm) | (dst << 12) | (src << 16);
|
return OP_ADDI | calculateAddrMode1(imm) | (dst << 12) | (src << 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint32_t emitB(void* base, void* target) {
|
||||||
|
uint32_t diff = (intptr_t) target - (intptr_t) base - WORD_SIZE_ARM * 2;
|
||||||
|
diff >>= 2;
|
||||||
|
diff &= 0x00FFFFFF;
|
||||||
|
return OP_B | diff;
|
||||||
|
}
|
||||||
|
|
||||||
static uint32_t emitBL(void* base, void* target) {
|
static uint32_t emitBL(void* base, void* target) {
|
||||||
uint32_t diff = (intptr_t) target - (intptr_t) base - WORD_SIZE_ARM * 2;
|
uint32_t diff = (intptr_t) target - (intptr_t) base - WORD_SIZE_ARM * 2;
|
||||||
diff >>= 2;
|
diff >>= 2;
|
||||||
|
@ -108,8 +116,9 @@ static uint32_t emitSUBS(unsigned dst, unsigned src1, unsigned src2) {
|
||||||
return OP_SUBS | (dst << 12) | (src1 << 16) | src2;
|
return OP_SUBS | (dst << 12) | (src1 << 16) | src2;
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t* updatePC(uint32_t* code, uint32_t oldAddress, uint32_t address) {
|
static uint32_t* updatePC(uint32_t* code, uint32_t address) {
|
||||||
*code++ = emitADDI(5, 5, address - oldAddress) | COND_AL;
|
*code++ = emitMOVW(5, address) | COND_AL;
|
||||||
|
*code++ = emitMOVT(5, address >> 16) | COND_AL;
|
||||||
*code++ = emitSTRI(5, 4, ARM_PC * sizeof(uint32_t)) | COND_AL;
|
*code++ = emitSTRI(5, 4, ARM_PC * sizeof(uint32_t)) | COND_AL;
|
||||||
return code;
|
return code;
|
||||||
}
|
}
|
||||||
|
@ -216,23 +225,27 @@ void ARMDynarecRecompileTrace(struct ARMCore* cpu, struct ARMDynarecTrace* trace
|
||||||
#endif
|
#endif
|
||||||
uint32_t* code = cpu->dynarec.buffer;
|
uint32_t* code = cpu->dynarec.buffer;
|
||||||
uint32_t address = trace->start;
|
uint32_t address = trace->start;
|
||||||
|
struct Label {
|
||||||
|
uint32_t* code;
|
||||||
|
uint32_t pc;
|
||||||
|
}* labels = cpu->dynarec.temporaryMemory;
|
||||||
if (trace->mode == MODE_ARM) {
|
if (trace->mode == MODE_ARM) {
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
trace->entry = (void (*)(struct ARMCore*)) code;
|
trace->entry = (void (*)(struct ARMCore*)) code;
|
||||||
*code++ = emitPUSH(0x4030) | COND_AL;
|
*code++ = emitPUSH(0x4030) | COND_AL;
|
||||||
*code++ = emitMOV(4, 0) | COND_AL;
|
*code++ = emitMOV(4, 0) | COND_AL;
|
||||||
*code++ = emitMOVW(5, address) | COND_AL;
|
*code++ = emitLDRI(5, 0, ARM_PC * sizeof(uint32_t)) | COND_AL;
|
||||||
*code++ = emitMOVT(5, address >> 16) | COND_AL;
|
|
||||||
uint32_t oldAddress = address;
|
|
||||||
struct ARMInstructionInfo info;
|
struct ARMInstructionInfo info;
|
||||||
while (true) {
|
while (true) {
|
||||||
uint16_t instruction = cpu->memory.load16(cpu, address, 0);
|
uint16_t instruction = cpu->memory.load16(cpu, address, 0);
|
||||||
|
struct Label* label = &labels[(address - trace->start) >> 1];
|
||||||
ARMDecodeThumb(instruction, &info);
|
ARMDecodeThumb(instruction, &info);
|
||||||
address += WORD_SIZE_THUMB;
|
address += WORD_SIZE_THUMB;
|
||||||
|
label->code = code;
|
||||||
|
label->pc = address + WORD_SIZE_THUMB;
|
||||||
if (needsUpdatePC(&info)) {
|
if (needsUpdatePC(&info)) {
|
||||||
code = updatePC(code, oldAddress, address + WORD_SIZE_THUMB);
|
code = updatePC(code, address + WORD_SIZE_THUMB);
|
||||||
oldAddress = address + WORD_SIZE_THUMB;
|
|
||||||
}
|
}
|
||||||
if (needsUpdatePrefetch(&info)) {
|
if (needsUpdatePrefetch(&info)) {
|
||||||
code = flushPrefetch(code, cpu->memory.load16(cpu, address, 0), cpu->memory.load16(cpu, address + WORD_SIZE_THUMB, 0));
|
code = flushPrefetch(code, cpu->memory.load16(cpu, address, 0), cpu->memory.load16(cpu, address + WORD_SIZE_THUMB, 0));
|
||||||
|
@ -242,14 +255,33 @@ void ARMDynarecRecompileTrace(struct ARMCore* cpu, struct ARMDynarecTrace* trace
|
||||||
*code = emitBL(code, _thumbTable[instruction >> 6]) | COND_AL;
|
*code = emitBL(code, _thumbTable[instruction >> 6]) | COND_AL;
|
||||||
++code;
|
++code;
|
||||||
if (info.branchType == ARM_BRANCH) {
|
if (info.branchType == ARM_BRANCH) {
|
||||||
|
struct Label* label = NULL;
|
||||||
|
uint32_t base = address + info.op1.immediate + WORD_SIZE_THUMB;
|
||||||
|
if (info.op1.immediate <= 0) {
|
||||||
|
if (base > trace->start) {
|
||||||
|
label = &labels[(base - trace->start) >> 1];
|
||||||
|
}
|
||||||
|
}
|
||||||
// Assume branch not taken
|
// Assume branch not taken
|
||||||
if (info.condition == ARM_CONDITION_AL) {
|
if (info.condition == ARM_CONDITION_AL) {
|
||||||
code = updateEvents(code, cpu);
|
code = updateEvents(code, cpu);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
*code++ = emitADDI(5, 5, address - oldAddress + WORD_SIZE_THUMB) | COND_AL;
|
*code++ = emitMOVW(5, address + WORD_SIZE_THUMB) | COND_AL;
|
||||||
oldAddress = address + WORD_SIZE_THUMB;
|
*code++ = emitMOVT(5, (address + WORD_SIZE_THUMB) >> 16) | COND_AL;
|
||||||
code = updateEvents(code, cpu);
|
*code++ = emitLDRI(1, 4, ARM_PC * sizeof(uint32_t)) | COND_AL;
|
||||||
|
*code++ = emitCMP(1, 5) | COND_AL;
|
||||||
|
if (!label || !label->code) {
|
||||||
|
*code++ = emitPOP(0x8030) | COND_NE;
|
||||||
|
} else {
|
||||||
|
uint32_t* l2 = code;
|
||||||
|
++code;
|
||||||
|
*code++ = emitMOV(5, 1) | COND_AL;
|
||||||
|
code = updateEvents(code, cpu);
|
||||||
|
*code = emitB(code, label->code) | COND_AL;
|
||||||
|
++code;
|
||||||
|
*l2 = emitB(l2, code) | COND_EQ;
|
||||||
|
}
|
||||||
} else if (needsUpdateEvents(&info)) {
|
} else if (needsUpdateEvents(&info)) {
|
||||||
code = updateEvents(code, cpu);
|
code = updateEvents(code, cpu);
|
||||||
}
|
}
|
||||||
|
@ -257,6 +289,7 @@ void ARMDynarecRecompileTrace(struct ARMCore* cpu, struct ARMDynarecTrace* trace
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
memset(labels, 0, sizeof(struct Label) * ((address - trace->start) >> 1));
|
||||||
code = flushPrefetch(code, cpu->memory.load16(cpu, address, 0), cpu->memory.load16(cpu, address + WORD_SIZE_THUMB, 0));
|
code = flushPrefetch(code, cpu->memory.load16(cpu, address, 0), cpu->memory.load16(cpu, address + WORD_SIZE_THUMB, 0));
|
||||||
*code++ = emitPOP(0x8030) | COND_AL;
|
*code++ = emitPOP(0x8030) | COND_AL;
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,7 +12,7 @@ void ARMDynarecInit(struct ARMCore* cpu) {
|
||||||
BumpAllocatorInit(&cpu->dynarec.traceAlloc, sizeof(struct ARMDynarecTrace));
|
BumpAllocatorInit(&cpu->dynarec.traceAlloc, sizeof(struct ARMDynarecTrace));
|
||||||
TableInit(&cpu->dynarec.armTraces, 0x2000, 0);
|
TableInit(&cpu->dynarec.armTraces, 0x2000, 0);
|
||||||
TableInit(&cpu->dynarec.thumbTraces, 0x2000, 0);
|
TableInit(&cpu->dynarec.thumbTraces, 0x2000, 0);
|
||||||
cpu->dynarec.buffer = executableMemoryMap(0x100000);
|
cpu->dynarec.buffer = executableMemoryMap(0x200000);
|
||||||
cpu->dynarec.temporaryMemory = anonymousMemoryMap(0x2000);
|
cpu->dynarec.temporaryMemory = anonymousMemoryMap(0x2000);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -20,7 +20,7 @@ void ARMDynarecDeinit(struct ARMCore* cpu) {
|
||||||
BumpAllocatorDeinit(&cpu->dynarec.traceAlloc);
|
BumpAllocatorDeinit(&cpu->dynarec.traceAlloc);
|
||||||
TableDeinit(&cpu->dynarec.armTraces);
|
TableDeinit(&cpu->dynarec.armTraces);
|
||||||
TableDeinit(&cpu->dynarec.thumbTraces);
|
TableDeinit(&cpu->dynarec.thumbTraces);
|
||||||
mappedMemoryFree(cpu->dynarec.buffer, 0x100000);
|
mappedMemoryFree(cpu->dynarec.buffer, 0x200000);
|
||||||
mappedMemoryFree(cpu->dynarec.temporaryMemory, 0x2000);
|
mappedMemoryFree(cpu->dynarec.temporaryMemory, 0x2000);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue