From 518d50c48ccc971f91fc7f708e27d3df3f13cf2f Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Wed, 11 May 2016 23:07:54 -0700 Subject: [PATCH] ARM Dynarec: First implementation --- CMakeLists.txt | 6 +- src/arm/arm.c | 18 ++- src/arm/arm.h | 20 +++ src/arm/dynarec-arm/dynarec-impl.c | 223 +++++++++++++++++++++++++++++ src/arm/dynarec.c | 69 +++++++++ src/arm/dynarec.h | 29 ++++ src/gba/memory.c | 10 ++ src/platform/posix/memory.c | 4 + src/util/memory.h | 1 + 9 files changed, 376 insertions(+), 4 deletions(-) create mode 100644 src/arm/dynarec-arm/dynarec-impl.c create mode 100644 src/arm/dynarec.c create mode 100644 src/arm/dynarec.h diff --git a/CMakeLists.txt b/CMakeLists.txt index b345e9596..3abe0c277 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -487,12 +487,15 @@ if(USE_EPOXY) set(CPACK_DEBIAN_PACKAGE_DEPENDS "${CPACK_DEBIAN_PACKAGE_DEPENDS},libepoxy0") endif() - set(FEATURE_DEFINES) foreach(FEATURE IN LISTS FEATURES) list(APPEND FEATURE_DEFINES "USE_${FEATURE}") endforeach() +if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^armv[67]") + file(GLOB ARM_DYNAREC_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/arm/dynarec-arm/*.c) +endif() + set(CORE_SRC) if(M_CORE_GB) add_definitions(-DM_CORE_GB) @@ -507,6 +510,7 @@ if(M_CORE_GBA) add_definitions(-DM_CORE_GBA) list(APPEND CORE_SRC ${ARM_SRC} + ${ARM_DYNAREC_SRC} ${CMAKE_CURRENT_SOURCE_DIR}/src/arm/debugger/debugger.c ${CMAKE_CURRENT_SOURCE_DIR}/src/arm/debugger/memory-debugger.c ${GBA_SRC} diff --git a/src/arm/arm.c b/src/arm/arm.c index 9fc35de71..2086f793b 100644 --- a/src/arm/arm.c +++ b/src/arm/arm.c @@ -5,9 +5,10 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "arm.h" -#include "isa-arm.h" -#include "isa-inlines.h" -#include "isa-thumb.h" +#include "arm/dynarec.h" +#include "arm/isa-arm.h" +#include "arm/isa-inlines.h" +#include "arm/isa-thumb.h" static inline enum RegisterBank _ARMSelectBank(enum PrivilegeMode); @@ -69,6 +70,8 @@ static inline enum RegisterBank _ARMSelectBank(enum PrivilegeMode mode) { } void ARMInit(struct ARMCore* cpu) { + cpu->executor = ARM_DYNAREC; + ARMDynarecInit(cpu); cpu->master->init(cpu, cpu->master); size_t i; for (i = 0; i < cpu->numComponents; ++i) { @@ -88,6 +91,7 @@ void ARMDeinit(struct ARMCore* cpu) { cpu->components[i]->deinit(cpu->components[i]); } } + ARMDynarecDeinit(cpu); } void ARMSetComponents(struct ARMCore* cpu, struct mCPUComponent* master, int extra, struct mCPUComponent** extras) { @@ -143,6 +147,9 @@ void ARMReset(struct ARMCore* cpu) { cpu->nextEvent = 0; cpu->halted = 0; + ARMDynarecDeinit(cpu); + ARMDynarecInit(cpu); + cpu->irqh.reset(cpu); } @@ -294,6 +301,11 @@ void ARMRun(struct ARMCore* cpu) { } void ARMRunLoop(struct ARMCore* cpu) { + if (cpu->dynarec.inDynarec) { + cpu->dynarec.inDynarec = false; + cpu->dynarec.currentEntry(cpu); + return; + } if (cpu->executionMode == MODE_THUMB) { while (cpu->cycles < cpu->nextEvent) { ThumbStep(cpu); diff --git a/src/arm/arm.h b/src/arm/arm.h index 72231f24c..cf41a3045 100644 --- a/src/arm/arm.h +++ b/src/arm/arm.h @@ -9,6 +9,8 @@ #include "util/common.h" #include "core/cpu.h" +#include "util/table.h" +#include "util/bump-allocator.h" enum { ARM_SP = 13, @@ -64,6 +66,11 @@ enum LSMDirection { LSM_DB = 3 }; +enum ARMExecutor { + ARM_INTERPRETER = 0, + ARM_DYNAREC = 1, +}; + struct ARMCore; union PSR { @@ -131,6 +138,16 @@ struct ARMInterruptHandler { void (*hitStub)(struct ARMCore* cpu, uint32_t opcode); }; +struct ARMDynarec { + bool inDynarec; + struct BumpAllocator traceAlloc; + struct Table armTraces; + struct Table thumbTraces; + void* buffer; + void (*currentEntry)(struct ARMCore*); + void* temporaryMemory; +}; + struct ARMCore { int32_t gprs[16]; union PSR cpsr; @@ -153,6 +170,9 @@ struct ARMCore { struct ARMMemory memory; struct ARMInterruptHandler irqh; + enum ARMExecutor executor; + struct ARMDynarec dynarec; + struct mCPUComponent* master; size_t numComponents; diff --git a/src/arm/dynarec-arm/dynarec-impl.c b/src/arm/dynarec-arm/dynarec-impl.c new file mode 100644 index 000000000..5e9ca506f --- /dev/null +++ b/src/arm/dynarec-arm/dynarec-impl.c @@ -0,0 +1,223 @@ +/* Copyright (c) 2013-2016 Jeffrey Pfau + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "arm/decoder.h" +#include "arm/dynarec.h" +#include "arm/isa-thumb.h" + +#define OP_ADDI 0x02800000 +#define OP_BL 0x0B000000 +#define OP_CMP 0x01500000 +#define OP_LDMIA 0x08900000 +#define OP_LDRI 0x05100000 +#define OP_MOV 0x01A00000 +#define OP_MOVT 0x03400000 +#define OP_MOVW 0x03000000 +#define OP_POP 0x08BD0000 +#define OP_PUSH 0x092D0000 +#define OP_STMIA 0x08800000 +#define OP_STRI 0x05000000 +#define OP_SUBS 0x00500000 + +#define COND_EQ 0x00000000 +#define COND_NE 0x10000000 +#define COND_MI 0x40000000 +#define COND_LE 0xD0000000 +#define COND_AL 0xE0000000 + +static uint32_t calculateAddrMode1(unsigned imm) { + if (imm < 0x100) { + return imm; + } + int i; + for (i = 0; i < 16; ++i) { + unsigned t = ROR(imm, i * 2); + if (t < 0x100) { + return t | ((16 - i) << 8); + } + } + abort(); +} + +static uint32_t emitADDI(unsigned dst, unsigned src, unsigned imm) { + return OP_ADDI | calculateAddrMode1(imm) | (dst << 12) | (src << 16); +} + +static uint32_t emitBL(void* base, void* target) { + uint32_t diff = (intptr_t) target - (intptr_t) base - WORD_SIZE_ARM * 2; + diff >>= 2; + diff &= 0x00FFFFFF; + return OP_BL | diff; +} + +static uint32_t emitCMP(unsigned src1, unsigned src2) { + return OP_CMP | src2 | (src1 << 16); +} + +static uint32_t emitLDMIA(unsigned base, unsigned mask) { + return OP_LDMIA | (base << 16) | mask; +} + +static uint32_t emitLDRI(unsigned reg, unsigned base, int offset) { + uint32_t op = OP_LDRI | (base << 16) | (reg << 12); + if (offset > 0) { + op |= 0x00800000 | offset; + } else { + op |= -offset & 0xFFF; + } + return op; +} + +static uint32_t emitMOV(unsigned dst, unsigned src) { + return OP_MOV | (dst << 12) | src; +} + +static uint32_t emitMOVT(unsigned dst, uint16_t value) { + return OP_MOVT | (dst << 12) | ((value & 0xF000) << 4) | (value & 0x0FFF); +} + +static uint32_t emitMOVW(unsigned dst, uint16_t value) { + return OP_MOVW | (dst << 12) | ((value & 0xF000) << 4) | (value & 0x0FFF); +} + +static uint32_t emitPOP(unsigned mask) { + return OP_POP | mask; +} + +static uint32_t emitPUSH(unsigned mask) { + return OP_PUSH | mask; +} + +static uint32_t emitSTMIA(unsigned base, unsigned mask) { + return OP_STMIA | (base << 16) | mask; +} + +static uint32_t emitSTRI(unsigned reg, unsigned base, int offset) { + uint32_t op = OP_STRI | (base << 16) | (reg << 12); + if (offset > 0) { + op |= 0x00800000 | offset; + } else { + op |= -offset & 0xFFF; + } + return op; +} + +static uint32_t emitSUBS(unsigned dst, unsigned src1, unsigned src2) { + return OP_SUBS | (dst << 12) | (src1 << 16) | src2; +} + +static uint32_t* updatePC(uint32_t* code, uint32_t address) { + *code++ = emitMOVW(5, address) | COND_AL; + *code++ = emitMOVT(5, address >> 16) | COND_AL; + *code++ = emitSTRI(5, 4, ARM_PC * sizeof(uint32_t)) | COND_AL; + return code; +} + +static uint32_t* updateEvents(uint32_t* code, struct ARMCore* cpu) { + *code++ = emitADDI(0, 4, offsetof(struct ARMCore, cycles)) | COND_AL; + *code++ = emitLDMIA(0, 6) | COND_AL; + *code++ = emitSUBS(0, 2, 1) | COND_AL; + *code++ = emitMOV(0, 4) | COND_AL; + *code = emitBL(code, cpu->irqh.processEvents) | COND_LE; + ++code; + *code++ = emitLDRI(1, 4, ARM_PC * sizeof(uint32_t)) | COND_AL; + *code++ = emitCMP(1, 5) | COND_AL; + *code++ = emitPOP(0x8030) | COND_NE; + return code; +} + +static uint32_t* flushPrefetch(uint32_t* code, uint32_t op0, uint32_t op1) { + *code++ = emitMOVW(1, op0) | COND_EQ; + if (op0 >= 0x10000) { + *code++ = emitMOVT(1, op0 >> 16) | COND_EQ; + } + *code++ = emitMOVW(2, op1) | COND_EQ; + if (op1 >= 0x10000) { + *code++ = emitMOVT(2, op1 >> 16) | COND_EQ; + } + *code++ = emitADDI(0, 4, offsetof(struct ARMCore, prefetch)) | COND_EQ; + *code++ = emitSTMIA(0, 6) | COND_EQ; + return code; +} + +static bool needsUpdateEvents(struct ARMInstructionInfo* info) { + if (info->operandFormat & ARM_OPERAND_MEMORY) { + return true; + } + if (info->branchType || info->traps) { + return true; + } + return false; +} + +static bool needsUpdatePC(struct ARMInstructionInfo* info) { + if (needsUpdateEvents(info)) { + return true; + } + if (info->operandFormat & ARM_OPERAND_REGISTER_1 && info->op1.reg == ARM_PC) { + return true; + } + if (info->operandFormat & ARM_OPERAND_REGISTER_2 && info->op2.reg == ARM_PC) { + return true; + } + if (info->operandFormat & ARM_OPERAND_REGISTER_3 && info->op3.reg == ARM_PC) { + return true; + } + if (info->operandFormat & ARM_OPERAND_REGISTER_4 && info->op4.reg == ARM_PC) { + return true; + } + if (info->operandFormat & ARM_OPERAND_SHIFT_REGISTER_1 && info->op1.shifterReg == ARM_PC) { + return true; + } + if (info->operandFormat & ARM_OPERAND_SHIFT_REGISTER_2 && info->op2.shifterReg == ARM_PC) { + return true; + } + if (info->operandFormat & ARM_OPERAND_SHIFT_REGISTER_3 && info->op3.shifterReg == ARM_PC) { + return true; + } + if (info->operandFormat & ARM_OPERAND_SHIFT_REGISTER_4 && info->op4.shifterReg == ARM_PC) { + return true; + } + return false; +} + +void ARMDynarecRecompileTrace(struct ARMCore* cpu, struct ARMDynarecTrace* trace) { +#ifndef NDEBUG + printf("%08X (%c)\n", trace->start, trace->mode == MODE_THUMB ? 'T' : 'A'); +#endif + uint32_t* code = cpu->dynarec.buffer; + uint32_t address = trace->start; + if (trace->mode == MODE_ARM) { + return; + } else { + trace->entry = (void (*)(struct ARMCore*)) code; + *code++ = emitPUSH(0x4030) | COND_AL; + *code++ = emitMOV(4, 0) | COND_AL; + *code++ = emitLDRI(5, 0, ARM_PC * sizeof(uint32_t)) | COND_AL; + struct ARMInstructionInfo info; + while (true) { + uint16_t instruction = cpu->memory.load16(cpu, address, 0); + ARMDecodeThumb(instruction, &info); + address += WORD_SIZE_THUMB; + if (needsUpdatePC(&info)) { + code = updatePC(code, address + WORD_SIZE_THUMB); + } + *code++ = emitMOVW(1, instruction) | COND_AL; + *code = emitBL(code, _thumbTable[instruction >> 6]) | COND_AL; + ++code; + if (needsUpdateEvents(&info)) { + code = updateEvents(code, cpu); + } + if (info.branchType || info.traps) { + break; + } + *code++ = emitMOV(0, 4) | COND_AL; + } + code = flushPrefetch(code, cpu->memory.load16(cpu, address, 0), cpu->memory.load16(cpu, address + WORD_SIZE_THUMB, 0)); + *code++ = emitPOP(0x8030) | COND_AL; + } + __clear_cache(trace->entry, code); + cpu->dynarec.buffer = code; +} diff --git a/src/arm/dynarec.c b/src/arm/dynarec.c new file mode 100644 index 000000000..e6e21ba24 --- /dev/null +++ b/src/arm/dynarec.c @@ -0,0 +1,69 @@ +/* Copyright (c) 2013-2016 Jeffrey Pfau + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "dynarec.h" + +#include "arm/arm.h" +#include "util/memory.h" + +#define ARM_DYNAREC_THRESHOLD 1 + +void ARMDynarecInit(struct ARMCore* cpu) { + BumpAllocatorInit(&cpu->dynarec.traceAlloc, sizeof(struct ARMDynarecTrace)); + TableInit(&cpu->dynarec.armTraces, 0x2000, 0); + TableInit(&cpu->dynarec.thumbTraces, 0x2000, 0); + cpu->dynarec.buffer = executableMemoryMap(0x100000); + cpu->dynarec.temporaryMemory = anonymousMemoryMap(0x2000); +} + +void ARMDynarecDeinit(struct ARMCore* cpu) { + BumpAllocatorDeinit(&cpu->dynarec.traceAlloc); + TableDeinit(&cpu->dynarec.armTraces); + TableDeinit(&cpu->dynarec.thumbTraces); + mappedMemoryFree(cpu->dynarec.buffer, 0x100000); + mappedMemoryFree(cpu->dynarec.temporaryMemory, 0x2000); +} + +static struct ARMDynarecTrace* ARMDynarecFindTrace(struct ARMCore* cpu, uint32_t address, enum ExecutionMode mode) { + struct ARMDynarecTrace* trace; + if (mode == MODE_ARM) { + trace = TableLookup(&cpu->dynarec.armTraces, address >> 2); + if (!trace) { + trace = BumpAllocatorAlloc(&cpu->dynarec.traceAlloc); + TableInsert(&cpu->dynarec.armTraces, address >> 2, trace); + trace->hits = 0; + trace->entry = NULL; + trace->start = address; + trace->mode = mode; + } + } else { + trace = TableLookup(&cpu->dynarec.thumbTraces, address >> 1); + if (!trace) { + trace = BumpAllocatorAlloc(&cpu->dynarec.traceAlloc); + TableInsert(&cpu->dynarec.thumbTraces, address >> 1, trace); + trace->hits = 0; + trace->entry = NULL; + trace->start = address; + trace->mode = mode; + } + } + return trace; +} + +void ARMDynarecCountTrace(struct ARMCore* cpu, uint32_t address, enum ExecutionMode mode) { + struct ARMDynarecTrace* trace = ARMDynarecFindTrace(cpu, address, mode); + if (trace->hits < ARM_DYNAREC_THRESHOLD) { + ++trace->hits; + return; + } + if (!trace->entry) { + ARMDynarecRecompileTrace(cpu, trace); + } + if (trace->entry) { + cpu->dynarec.inDynarec = true; + cpu->nextEvent = cpu->cycles; + cpu->dynarec.currentEntry = trace->entry; + } +} diff --git a/src/arm/dynarec.h b/src/arm/dynarec.h new file mode 100644 index 000000000..68d141122 --- /dev/null +++ b/src/arm/dynarec.h @@ -0,0 +1,29 @@ +/* Copyright (c) 2013-2016 Jeffrey Pfau + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef ARM_DYNAREC_H +#define ARM_DYNAREC_H + +#include "util/common.h" + +#include "arm/arm.h" + +struct ARMCore; +enum ExecutionMode; + +struct ARMDynarecTrace { + unsigned hits; + enum ExecutionMode mode; + uint32_t start; + void (*entry)(struct ARMCore* cpu); +}; + +void ARMDynarecInit(struct ARMCore* cpu); +void ARMDynarecDeinit(struct ARMCore* cpu); + +void ARMDynarecCountTrace(struct ARMCore* cpu, uint32_t address, enum ExecutionMode mode); +void ARMDynarecRecompileTrace(struct ARMCore* cpu, struct ARMDynarecTrace* trace); + +#endif diff --git a/src/gba/memory.c b/src/gba/memory.c index 63f2a43a8..945c4c31b 100644 --- a/src/gba/memory.c +++ b/src/gba/memory.c @@ -243,7 +243,11 @@ static void GBASetActiveRegion(struct ARMCore* cpu, uint32_t address) { gba->lastJump = address; memory->lastPrefetchedPc = 0; memory->lastPrefetchedLoads = 0; + cpu->dynarec.inDynarec = false; if (newRegion == memory->activeRegion) { + if (cpu->executor == ARM_DYNAREC && newRegion >= REGION_CART0) { + ARMDynarecCountTrace(cpu, address,cpu->executionMode); + } if (newRegion < REGION_CART0 || (address & (SIZE_CART0 - 1)) < memory->romSize) { return; } @@ -255,6 +259,7 @@ static void GBASetActiveRegion(struct ARMCore* cpu, uint32_t address) { if (memory->activeRegion == REGION_BIOS) { memory->biosPrefetch = cpu->prefetch[1]; } + bool readonly = false; memory->activeRegion = newRegion; switch (newRegion) { case REGION_BIOS: @@ -292,6 +297,7 @@ static void GBASetActiveRegion(struct ARMCore* cpu, uint32_t address) { case REGION_CART1_EX: case REGION_CART2: case REGION_CART2_EX: + readonly = true; cpu->memory.activeRegion = memory->rom; cpu->memory.activeMask = memory->romMask; if ((address & (SIZE_CART0 - 1)) < memory->romSize) { @@ -313,6 +319,10 @@ static void GBASetActiveRegion(struct ARMCore* cpu, uint32_t address) { cpu->memory.activeSeqCycles16 = memory->waitstatesSeq16[memory->activeRegion]; cpu->memory.activeNonseqCycles32 = memory->waitstatesNonseq32[memory->activeRegion]; cpu->memory.activeNonseqCycles16 = memory->waitstatesNonseq16[memory->activeRegion]; + + if (readonly && cpu->executor == ARM_DYNAREC) { + ARMDynarecCountTrace(cpu, address,cpu->executionMode); + } } #define LOAD_BAD \ diff --git a/src/platform/posix/memory.c b/src/platform/posix/memory.c index 47760275e..65cdce366 100644 --- a/src/platform/posix/memory.c +++ b/src/platform/posix/memory.c @@ -11,6 +11,10 @@ void* anonymousMemoryMap(size_t size) { return mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); } +void* executableMemoryMap(size_t size) { + return mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0); +} + void mappedMemoryFree(void* memory, size_t size) { munmap(memory, size); } diff --git a/src/util/memory.h b/src/util/memory.h index e78d87c7a..65ce0db21 100644 --- a/src/util/memory.h +++ b/src/util/memory.h @@ -9,6 +9,7 @@ #include "util/common.h" void* anonymousMemoryMap(size_t size); +void* executableMemoryMap(size_t size); void mappedMemoryFree(void* memory, size_t size); #endif