From ca5f7a45ee5ba2d50dedda2692e0319985785572 Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Sun, 19 Oct 2014 03:01:26 -0700 Subject: [PATCH 01/34] Move string utilities out to separate file (really fixing Linux build this time) --- CMakeLists.txt | 11 +++++++---- src/debugger/parser.c | 18 ++++-------------- src/util/string.c | 25 +++++++++++++++++++++++++ src/util/string.h | 9 +++++++++ src/util/vfs.c | 11 +++-------- 5 files changed, 48 insertions(+), 26 deletions(-) create mode 100644 src/util/string.c create mode 100644 src/util/string.h diff --git a/CMakeLists.txt b/CMakeLists.txt index c11d73754..f28deab7e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -82,6 +82,13 @@ find_feature(USE_FFMPEG "libavcodec;libavformat;libavutil") find_feature(USE_PNG "PNG;ZLIB") find_feature(USE_LIBZIP "libzip") +include(CheckFunctionExists) +check_function_exists(strndup HAVE_STRNDUP) + +if(HAVE_STRNDUP) + add_definitions(-DHAVE_STRNDUP) +endif() + # Platform support if(WIN32) add_definitions(-D_WIN32_WINNT=0x0600) @@ -102,10 +109,6 @@ if(BUILD_BBB OR BUILD_RASPI) endif() endif() -if(CMAKE_SYSTEM_NAME STREQUAL "Linux") - list(APPEND OS_LIB bsd) -endif() - # Features set(DEBUGGER_SRC ${CMAKE_SOURCE_DIR}/src/debugger/debugger.c ${CMAKE_SOURCE_DIR}/src/debugger/memory-debugger.c) diff --git a/src/debugger/parser.c b/src/debugger/parser.c index 1eb9379f7..840819e05 100644 --- a/src/debugger/parser.c +++ b/src/debugger/parser.c @@ -1,16 +1,6 @@ #include "parser.h" -static inline char* _strndup(const char* start, size_t len) { -#ifdef HAVE_STRNDUP - return strndup(start, len); -#else - // This is suboptimal, but anything recent should have strndup - char* out = malloc((len + 1) * sizeof(char)); - strncpy(out, start, len); - out[len] = '\0'; - return out; -#endif -} +#include "util/string.h" static struct LexVector* _lexOperator(struct LexVector* lv, char operator) { struct LexVector* lvNext = malloc(sizeof(struct LexVector)); @@ -108,13 +98,13 @@ size_t lexExpression(struct LexVector* lv, const char* string, size_t length) { case '*': case '/': lv->token.type = TOKEN_IDENTIFIER_TYPE; - lv->token.identifierValue = _strndup(tokenStart, string - tokenStart - 1); + lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1); lv = _lexOperator(lv, token); state = LEX_ROOT; break; case ')': lv->token.type = TOKEN_IDENTIFIER_TYPE; - lv->token.identifierValue = _strndup(tokenStart, string - tokenStart - 1); + lv->token.identifierValue = strndup(tokenStart, string - tokenStart - 1); state = LEX_EXPECT_OPERATOR; break; default: @@ -298,7 +288,7 @@ size_t lexExpression(struct LexVector* lv, const char* string, size_t length) { break; case LEX_EXPECT_IDENTIFIER: lv->token.type = TOKEN_IDENTIFIER_TYPE; - lv->token.identifierValue = _strndup(tokenStart, string - tokenStart); + lv->token.identifierValue = strndup(tokenStart, string - tokenStart); break; case LEX_EXPECT_OPERATOR: lvNext = malloc(sizeof(struct LexVector)); diff --git a/src/util/string.c b/src/util/string.c new file mode 100644 index 000000000..e38c78be4 --- /dev/null +++ b/src/util/string.c @@ -0,0 +1,25 @@ +#include "util/string.h" + +#include + +#ifndef HAVE_STRNDUP +char* strndup(const char* start, size_t len) { + // This is suboptimal, but anything recent should have strndup + char* out = malloc((len + 1) * sizeof(char)); + strncpy(out, start, len); + out[len] = '\0'; + return out; +} +#endif + +char* strnrstr(const char* restrict haystack, const char* restrict needle, size_t len) { + char* last = 0; + const char* next = haystack; + size_t needleLen = strlen(needle); + for (; len >= needleLen; --len, ++next) { + if (strncmp(needle, next, needleLen) == 0) { + last = (char*) next; + } + } + return last; +} diff --git a/src/util/string.h b/src/util/string.h new file mode 100644 index 000000000..b78e20180 --- /dev/null +++ b/src/util/string.h @@ -0,0 +1,9 @@ +#ifndef UTIL_STRING_H +#define UTIL_STRING_H + +#include "util/common.h" + +char* strndup(const char* start, size_t len); +char* strnrstr(const char* restrict s1, const char* restrict s2, size_t len); + +#endif diff --git a/src/util/vfs.c b/src/util/vfs.c index 9c8b32332..ccd52877c 100644 --- a/src/util/vfs.c +++ b/src/util/vfs.c @@ -1,5 +1,7 @@ #include "util/vfs.h" +#include "util/string.h" + #include #include @@ -273,14 +275,7 @@ struct VFile* VDirOptionalOpenIncrementFile(struct VDir* dir, const char* realPa if (dotPoint) { len = (dotPoint - filename); } - const char* separator = 0; - const char* nextSeparator = filename; - size_t strstrlen = len; - while ((nextSeparator = strnstr(nextSeparator, infix, strstrlen))) { - strstrlen -= nextSeparator - separator - 1; - separator = nextSeparator; - ++nextSeparator; - } + const char* separator = strnrstr(filename, infix, len); if (!separator) { continue; } From 6b382caa0f2936df4918c05e8a414b9681523a9e Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Sun, 19 Oct 2014 03:45:11 -0700 Subject: [PATCH 02/34] Fix decoding of {STR/LDR}[B]T --- src/arm/decoder.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/arm/decoder.c b/src/arm/decoder.c index 0ac1c612b..9f6ef7b5e 100644 --- a/src/arm/decoder.c +++ b/src/arm/decoder.c @@ -227,10 +227,23 @@ static const char* _armAccessTypeStrings[] = { "", "", "", + "", "sb", "sh", - "" + "", + "", + "", + "", + "", + + "", + "tb", + "", + "", + "t", + "", + "", "" }; From 29337a60b52529a942912a6b4595f9d3092df08b Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Sun, 19 Oct 2014 05:19:19 -0700 Subject: [PATCH 03/34] Mark DMA registers as special to keep loading them from messing up state --- src/gba/gba-io.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gba/gba-io.c b/src/gba/gba-io.c index ac1e1ef83..f8b39b235 100644 --- a/src/gba/gba-io.c +++ b/src/gba/gba-io.c @@ -62,9 +62,9 @@ static const int _isSpecialRegister[REG_MAX >> 1] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, // DMA - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Timers From f1afeae74c64f2e1d1ffd43cf964e574bd97cc46 Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Sun, 19 Oct 2014 05:41:35 -0700 Subject: [PATCH 04/34] Properly reset after loading BIOS --- src/gba/gba-thread.c | 3 ++- src/gba/gba.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/gba/gba-thread.c b/src/gba/gba-thread.c index 446d35ded..e0ac4084a 100644 --- a/src/gba/gba-thread.c +++ b/src/gba/gba-thread.c @@ -114,7 +114,6 @@ static THREAD_ENTRY _GBAThreadRun(void* context) { GBACreate(&gba); ARMSetComponents(&cpu, &gba.d, numComponents, components); ARMInit(&cpu); - ARMReset(&cpu); threadContext->gba = &gba; gba.sync = &threadContext->sync; gba.logLevel = threadContext->logLevel; @@ -145,6 +144,8 @@ static THREAD_ENTRY _GBAThreadRun(void* context) { } } + ARMReset(&cpu); + if (threadContext->debugger) { threadContext->debugger->log = GBADebuggerLogShim; GBAAttachDebugger(&gba, threadContext->debugger); diff --git a/src/gba/gba.c b/src/gba/gba.c index e4d2793f4..fe1c71913 100644 --- a/src/gba/gba.c +++ b/src/gba/gba.c @@ -427,8 +427,8 @@ void GBALoadBIOS(struct GBA* gba, struct VFile* vf) { GBALog(gba, GBA_LOG_WARN, "BIOS checksum incorrect"); } gba->biosChecksum = checksum; - if ((gba->cpu->gprs[ARM_PC] >> BASE_OFFSET) == BASE_BIOS) { - gba->cpu->memory.setActiveRegion(gba->cpu, gba->cpu->gprs[ARM_PC]); + if (gba->memory.activeRegion == REGION_BIOS) { + gba->cpu->memory.activeRegion = gba->memory.bios; } // TODO: error check } From 10509fe69de2a6489bec11c60ec01b171157d489 Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Sun, 19 Oct 2014 06:24:09 -0700 Subject: [PATCH 05/34] Fix objwin on objs when objs are off in winout --- src/gba/renderers/video-software.c | 44 ++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/src/gba/renderers/video-software.c b/src/gba/renderers/video-software.c index f1b0d3b54..2622b27a4 100644 --- a/src/gba/renderers/video-software.c +++ b/src/gba/renderers/video-software.c @@ -624,7 +624,7 @@ static void _drawScanline(struct GBAVideoSoftwareRenderer* renderer, int y) { renderer->start = renderer->end; renderer->end = renderer->windows[w].endX; renderer->currentWindow = renderer->windows[w].control; - if (!GBAWindowControlIsObjEnable(renderer->currentWindow.packed)) { + if (!GBAWindowControlIsObjEnable(renderer->currentWindow.packed) && !GBARegisterDISPCNTIsObjwinEnable(renderer->dispcnt)) { continue; } int i; @@ -1605,25 +1605,39 @@ static void _postprocessSprite(struct GBAVideoSoftwareRenderer* renderer, unsign uint32_t flags = FLAG_TARGET_2 * renderer->target2Obj; int objwinSlowPath = GBARegisterDISPCNTIsObjwinEnable(renderer->dispcnt); - int objwinDisable = 0; + bool objwinDisable = false; + bool objwinOnly = false; if (objwinSlowPath) { objwinDisable = !GBAWindowControlIsObjEnable(renderer->objwin.packed); + // TODO: Fix this for current window when WIN0/1 are enabled + objwinOnly = !objwinDisable && !GBAWindowControlIsObjEnable(renderer->winout.packed); } - if (objwinSlowPath && objwinDisable) { - for (x = 0; x < VIDEO_HORIZONTAL_PIXELS; ++x, ++pixel) { - uint32_t color = renderer->spriteLayer[x] & ~FLAG_OBJWIN; - uint32_t current = *pixel; - if ((color & FLAG_UNWRITTEN) != FLAG_UNWRITTEN && !(current & FLAG_OBJWIN) && (color & FLAG_PRIORITY) >> OFFSET_PRIORITY == priority) { - _compositeBlendObjwin(renderer, pixel, color | flags, current); + if (objwinSlowPath) { + if (objwinDisable) { + for (x = 0; x < VIDEO_HORIZONTAL_PIXELS; ++x, ++pixel) { + uint32_t color = renderer->spriteLayer[x] & ~FLAG_OBJWIN; + uint32_t current = *pixel; + if ((color & FLAG_UNWRITTEN) != FLAG_UNWRITTEN && !(current & FLAG_OBJWIN) && (color & FLAG_PRIORITY) >> OFFSET_PRIORITY == priority) { + _compositeBlendObjwin(renderer, pixel, color | flags, current); + } } + return; + } else if (objwinOnly) { + for (x = 0; x < VIDEO_HORIZONTAL_PIXELS; ++x, ++pixel) { + uint32_t color = renderer->spriteLayer[x] & ~FLAG_OBJWIN; + uint32_t current = *pixel; + if ((color & FLAG_UNWRITTEN) != FLAG_UNWRITTEN && (current & FLAG_OBJWIN) && (color & FLAG_PRIORITY) >> OFFSET_PRIORITY == priority) { + _compositeBlendObjwin(renderer, pixel, color | flags, current); + } + } + return; } - } else { - for (x = 0; x < VIDEO_HORIZONTAL_PIXELS; ++x, ++pixel) { - uint32_t color = renderer->spriteLayer[x] & ~FLAG_OBJWIN; - uint32_t current = *pixel; - if ((color & FLAG_UNWRITTEN) != FLAG_UNWRITTEN && (color & FLAG_PRIORITY) >> OFFSET_PRIORITY == priority) { - _compositeBlendNoObjwin(renderer, pixel, color | flags, current); - } + } + for (x = 0; x < VIDEO_HORIZONTAL_PIXELS; ++x, ++pixel) { + uint32_t color = renderer->spriteLayer[x] & ~FLAG_OBJWIN; + uint32_t current = *pixel; + if ((color & FLAG_UNWRITTEN) != FLAG_UNWRITTEN && (color & FLAG_PRIORITY) >> OFFSET_PRIORITY == priority) { + _compositeBlendNoObjwin(renderer, pixel, color | flags, current); } } } From ba5c77523d628da07cda28eb17c0ab941b98418b Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Sun, 19 Oct 2014 18:56:55 -0700 Subject: [PATCH 06/34] Remove stale file --- src/platform/glsl-main.c | 103 --------------------------------------- 1 file changed, 103 deletions(-) delete mode 100644 src/platform/glsl-main.c diff --git a/src/platform/glsl-main.c b/src/platform/glsl-main.c deleted file mode 100644 index 951c4e3af..000000000 --- a/src/platform/glsl-main.c +++ /dev/null @@ -1,103 +0,0 @@ -#include "debugger.h" -#include "gba-thread.h" -#include "gba.h" -#include "renderers/video-glsl.h" -#include "sdl-events.h" - -#include -#ifdef __APPLE__ -#include -#else -#include -#endif - -#include -#include -#include -#include - -static int _GBASDLInit(void); -static void _GBASDLDeinit(void); -static void _GBASDLRunloop(struct GBAThread* context, struct GBAVideoGLSLRenderer* renderer); - -int main(int argc, char** argv) { - const char* fname = "test.rom"; - if (argc > 1) { - fname = argv[1]; - } - int fd = open(fname, O_RDONLY); - if (fd < 0) { - return 1; - } - - struct GBAThread context; - struct GBAVideoGLSLRenderer renderer; - - if (!_GBASDLInit()) { - return 1; - } - GBAVideoGLSLRendererCreate(&renderer); - - context.fd = fd; - context.renderer = &renderer.d; - GBAThreadStart(&context); - - _GBASDLRunloop(&context, &renderer); - - GBAThreadJoin(&context); - close(fd); - - _GBASDLDeinit(); - - return 0; -} - -static int _GBASDLInit() { - if (SDL_Init(SDL_INIT_VIDEO) < 0) { - return 0; - } - - GBASDLInitEvents(); - - SDL_GL_SetAttribute(SDL_GL_SWAP_CONTROL, 1); - SDL_GL_SetAttribute(SDL_GL_RED_SIZE, 8); - SDL_GL_SetAttribute(SDL_GL_GREEN_SIZE, 8); - SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8); - SDL_GL_SetAttribute(SDL_GL_DEPTH_SIZE, 16); - SDL_SetVideoMode(240, 160, 32, SDL_OPENGL); - - glViewport(0, 0, 240, 160); - - return 1; -} - -static void _GBASDLRunloop(struct GBAThread* context, struct GBAVideoGLSLRenderer* renderer) { - SDL_Event event; - - glEnable(GL_TEXTURE_2D); - while (context->state < THREAD_EXITING) { - GBAVideoGLSLRendererProcessEvents(renderer); - pthread_mutex_lock(&renderer->mutex); - if (renderer->d.framesPending) { - renderer->d.framesPending = 0; - pthread_mutex_unlock(&renderer->mutex); - - SDL_GL_SwapBuffers(); - - while (SDL_PollEvent(&event)) { - GBASDLHandleEvent(context, &event); - } - pthread_mutex_lock(&renderer->mutex); - pthread_cond_broadcast(&renderer->downCond); - } else { - pthread_cond_broadcast(&renderer->downCond); - pthread_cond_wait(&renderer->upCond, &renderer->mutex); - } - pthread_mutex_unlock(&renderer->mutex); - } -} - -static void _GBASDLDeinit() { - GBASDLDeinitEvents(); - SDL_Quit(); -} From d44bf73a81ba8c84021b201c50f378f19513542f Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Sun, 19 Oct 2014 21:13:32 -0700 Subject: [PATCH 07/34] Check if VFileOpen is actually passed a path before trying to open it --- src/util/vfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/util/vfs.c b/src/util/vfs.c index ccd52877c..137d68095 100644 --- a/src/util/vfs.c +++ b/src/util/vfs.c @@ -39,6 +39,9 @@ static struct VFile* _vdOpenFile(struct VDir* vd, const char* path, int mode); static const char* _vdeName(struct VDirEntry* vde); struct VFile* VFileOpen(const char* path, int flags) { + if (!path) { + return 0; + } int fd = open(path, flags, 0666); return VFileFromFD(fd); } From e7bd5f9ade967c2bea151633b9336144b246576b Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Sun, 19 Oct 2014 23:30:10 -0700 Subject: [PATCH 08/34] Remove loop in DMA to find a value we already know --- src/gba/gba-memory.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/gba/gba-memory.c b/src/gba/gba-memory.c index 07cd3824c..4bbcd1ec0 100644 --- a/src/gba/gba-memory.c +++ b/src/gba/gba-memory.c @@ -852,11 +852,8 @@ void GBAMemoryServiceDMA(struct GBA* gba, int number, struct GBADMA* info) { } info->nextSource = source; - int i; - for (i = 0; i < 4; ++i) { - if (memory->dma[i].nextEvent != INT_MAX) { - memory->dma[i].nextEvent += cycles; - } + if (info->nextEvent != INT_MAX) { + info->nextEvent += cycles; } cpu->cycles += cycles; } From f7b1cee66ee5438db8d483911b48acacd8ebefd9 Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Tue, 21 Oct 2014 00:45:06 -0700 Subject: [PATCH 09/34] Use branchType instead of branches in decoder for more expressive branch decoding --- src/arm/decoder-arm.c | 18 ++++++++++-------- src/arm/decoder-thumb.c | 18 +++++++++++------- src/arm/decoder.h | 15 +++++++++++---- 3 files changed, 32 insertions(+), 19 deletions(-) diff --git a/src/arm/decoder-arm.c b/src/arm/decoder-arm.c index ee299369a..906289e02 100644 --- a/src/arm/decoder-arm.c +++ b/src/arm/decoder-arm.c @@ -110,7 +110,7 @@ info->operandFormat &= ~ARM_OPERAND_2; \ } \ if (info->op1.reg == ARM_PC) { \ - info->branches = 1; \ + info->branchType = ARM_BRANCH_INDIRECT; \ }) #define DEFINE_ALU_DECODER_ARM(NAME, SKIPPED) \ @@ -157,7 +157,7 @@ OTHER_AFFECTED; \ info->affectsCPSR = S; \ if (info->op1.reg == ARM_PC) { \ - info->branches = 1; \ + info->branchType = ARM_BRANCH_INDIRECT; \ }) #define DEFINE_LONG_MULTIPLY_DECODER_EX_ARM(NAME, MNEMONIC, S) \ @@ -174,7 +174,7 @@ ARM_OPERAND_REGISTER_4; \ info->affectsCPSR = S; \ if (info->op1.reg == ARM_PC) { \ - info->branches = 1; \ + info->branchType = ARM_BRANCH_INDIRECT; \ }) #define DEFINE_MULTIPLY_DECODER_ARM(NAME, OTHER_AFFECTED) \ @@ -255,7 +255,9 @@ DEFINE_DECODER_ARM(NAME, MNEMONIC, \ info->memory.baseReg = (opcode >> 16) & 0xF; \ info->op1.immediate = opcode & 0x0000FFFF; \ - info->branches = info->op1.immediate & (1 << ARM_PC); \ + if (info->op1.immediate & (1 << ARM_PC)) { \ + info->branchType = ARM_BRANCH_INDIRECT; \ + } \ info->operandFormat = ARM_OPERAND_MEMORY_1; \ info->memory.format = ARM_MEMORY_REGISTER_BASE | \ ARM_MEMORY_WRITEBACK | \ @@ -348,18 +350,18 @@ DEFINE_DECODER_ARM(B, B, int32_t offset = opcode << 8; info->op1.immediate = offset >> 6; info->operandFormat = ARM_OPERAND_IMMEDIATE_1; - info->branches = 1;) + info->branchType = ARM_BRANCH;) DEFINE_DECODER_ARM(BL, BL, int32_t offset = opcode << 8; info->op1.immediate = offset >> 6; info->operandFormat = ARM_OPERAND_IMMEDIATE_1; - info->branches = 1;) + info->branchType = ARM_BRANCH_LINKED;) DEFINE_DECODER_ARM(BX, BX, info->op1.reg = opcode & 0x0000000F; info->operandFormat = ARM_OPERAND_REGISTER_1; - info->branches = 1;) + info->branchType = ARM_BRANCH_INDIRECT;) // End branch definitions @@ -441,7 +443,7 @@ static const ARMDecoder _armDecoderTable[0x1000] = { void ARMDecodeARM(uint32_t opcode, struct ARMInstructionInfo* info) { info->execMode = MODE_ARM; info->opcode = opcode; - info->branches = 0; + info->branchType = ARM_BRANCH_NONE; info->traps = 0; info->affectsCPSR = 0; info->condition = opcode >> 28; diff --git a/src/arm/decoder-thumb.c b/src/arm/decoder-thumb.c index b3c9e1d6e..f460d4677 100644 --- a/src/arm/decoder-thumb.c +++ b/src/arm/decoder-thumb.c @@ -135,7 +135,9 @@ DEFINE_DATA_FORM_5_DECODER_THUMB(MVN, MVN, ARM_OPERAND_AFFECTED_1) DEFINE_THUMB_DECODER(NAME, MNEMONIC, \ info->op1.reg = (opcode & 0x0007) | H1; \ info->op2.reg = ((opcode >> 3) & 0x0007) | H2; \ - info->branches = info->op1.reg == ARM_PC; \ + if (info->op1.reg == ARM_PC) { \ + info->branchType = ARM_BRANCH_INDIRECT; \ + } \ info->affectsCPSR = CPSR; \ info->operandFormat = ARM_OPERAND_REGISTER_1 | \ AFFECTED | \ @@ -221,7 +223,9 @@ DEFINE_LOAD_STORE_WITH_REGISTER_THUMB(STRH2, STR, STORE_CYCLES, ARM_ACCESS_HALFW DEFINE_THUMB_DECODER(NAME, MNEMONIC, \ info->memory.baseReg = RN; \ info->op1.immediate = (opcode & 0xFF) | ADDITIONAL_REG; \ - info->branches = info->op1.immediate & (1 << ARM_PC); \ + if (info->op1.immediate & (1 << ARM_PC)) { \ + info->branchType = ARM_BRANCH_INDIRECT; \ + } \ info->operandFormat = ARM_OPERAND_MEMORY_1; \ info->memory.format = ARM_MEMORY_REGISTER_BASE | \ ARM_MEMORY_WRITEBACK | \ @@ -237,7 +241,7 @@ DEFINE_LOAD_STORE_MULTIPLE_THUMB(STM) DEFINE_THUMB_DECODER(B ## COND, B, \ int8_t immediate = opcode; \ info->op1.immediate = immediate << 1; \ - info->branches = 1; \ + info->branchType = ARM_BRANCH; \ info->condition = ARM_CONDITION_ ## COND; \ info->operandFormat = ARM_OPERAND_IMMEDIATE_1;) @@ -279,7 +283,7 @@ DEFINE_THUMB_DECODER(B, B, int16_t immediate = (opcode & 0x07FF) << 5; info->op1.immediate = (((int32_t) immediate) >> 4); info->operandFormat = ARM_OPERAND_IMMEDIATE_1; - info->branches = 1;) + info->branchType = ARM_BRANCH;) DEFINE_THUMB_DECODER(BL1, BLH, int16_t immediate = (opcode & 0x07FF) << 5; @@ -289,12 +293,12 @@ DEFINE_THUMB_DECODER(BL1, BLH, DEFINE_THUMB_DECODER(BL2, BL, info->op1.immediate = (opcode & 0x07FF) << 1; info->operandFormat = ARM_OPERAND_IMMEDIATE_1; - info->branches = 1;) + info->branchType = ARM_BRANCH_LINKED;) DEFINE_THUMB_DECODER(BX, BX, info->op1.reg = (opcode >> 3) & 0xF; info->operandFormat = ARM_OPERAND_REGISTER_1; - info->branches = 1;) + info->branchType = ARM_BRANCH_INDIRECT;) DEFINE_THUMB_DECODER(SWI, SWI, info->op1.immediate = opcode & 0xFF; @@ -310,7 +314,7 @@ static const ThumbDecoder _thumbDecoderTable[0x400] = { void ARMDecodeThumb(uint16_t opcode, struct ARMInstructionInfo* info) { info->execMode = MODE_THUMB; info->opcode = opcode; - info->branches = 0; + info->branchType = ARM_BRANCH_NONE; info->traps = 0; info->affectsCPSR = 0; info->condition = ARM_CONDITION_AL; diff --git a/src/arm/decoder.h b/src/arm/decoder.h index 7fa702832..12ff699f7 100644 --- a/src/arm/decoder.h +++ b/src/arm/decoder.h @@ -108,6 +108,13 @@ enum ARMMemoryAccessType { ARM_ACCESS_TRANSLATED_BYTE = 17 }; +enum ARMBranchType { + ARM_BRANCH_NONE = 0, + ARM_BRANCH = 1, + ARM_BRANCH_INDIRECT = 2, + ARM_BRANCH_LINKED = 4 +}; + struct ARMMemoryAccess { uint8_t baseReg; uint8_t width; @@ -175,17 +182,17 @@ struct ARMInstructionInfo { struct ARMMemoryAccess memory; int operandFormat; unsigned execMode : 1; - bool branches : 1; bool traps : 1; bool affectsCPSR : 1; + unsigned branchType : 3; unsigned condition : 4; unsigned mnemonic : 6; - unsigned iCycles : 2; + unsigned iCycles : 3; unsigned cCycles : 4; - unsigned sDataCycles : 10; - unsigned nDataCycles : 10; unsigned sInstructionCycles : 4; unsigned nInstructionCycles : 4; + unsigned sDataCycles : 10; + unsigned nDataCycles : 10; }; void ARMDecodeARM(uint32_t opcode, struct ARMInstructionInfo* info); From 13e16d2e36e6734c20deacaa15614c0530e57739 Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Tue, 21 Oct 2014 04:00:15 -0700 Subject: [PATCH 10/34] Actually set C flags properly now --- CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f28deab7e..debddd475 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,8 +1,7 @@ cmake_minimum_required(VERSION 2.6) project(mGBA C) set(BINARY_NAME mgba CACHE INTERNAL "Name of output binaries") -set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -g -Wall -Wextra -std=gnu99") -set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3 -Wall -Wextra -std=gnu99") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -std=gnu99") set(USE_CLI_DEBUGGER ON CACHE BOOL "Whether or not to enable the CLI-mode ARM debugger") set(USE_GDB_STUB ON CACHE BOOL "Whether or not to enable the GDB stub ARM debugger") set(USE_FFMPEG ON CACHE BOOL "Whether or not to enable FFmpeg support") From 501b6b621c6c27cbb5523f53172e00c426a8b445 Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Tue, 21 Oct 2014 22:07:18 -0700 Subject: [PATCH 11/34] Classic NES games jump to VRAM --- src/gba/gba-memory.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/gba/gba-memory.c b/src/gba/gba-memory.c index 4bbcd1ec0..7153f38ce 100644 --- a/src/gba/gba-memory.c +++ b/src/gba/gba-memory.c @@ -136,6 +136,10 @@ static void GBASetActiveRegion(struct ARMCore* cpu, uint32_t address) { cpu->memory.activeRegion = memory->iwram; cpu->memory.activeMask = SIZE_WORKING_IRAM - 1; break; + case BASE_VRAM: + cpu->memory.activeRegion = (uint32_t*) gba->video.renderer->vram; + cpu->memory.activeMask = 0x0000FFFF; + break; case BASE_CART0: case BASE_CART0_EX: case BASE_CART1: From b11528c69df86c3be49ac4715495d38b1309b53e Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Tue, 21 Oct 2014 22:08:20 -0700 Subject: [PATCH 12/34] Prevent deinitializing savegame data when the data is not masked --- src/gba/gba-savedata.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gba/gba-savedata.c b/src/gba/gba-savedata.c index e1839c82b..7a5dc97f7 100644 --- a/src/gba/gba-savedata.c +++ b/src/gba/gba-savedata.c @@ -70,6 +70,9 @@ void GBASavedataMask(struct GBASavedata* savedata, struct VFile* vf) { } void GBASavedataUnmask(struct GBASavedata* savedata) { + if (savedata->mapMode != MAP_READ) { + return; + } GBASavedataDeinit(savedata); savedata->vf = savedata->realVf; savedata->mapMode = MAP_WRITE; From c52edab71a0f3465c508b554130fdccb4108a654 Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Tue, 21 Oct 2014 22:10:20 -0700 Subject: [PATCH 13/34] Add override for NES classic series games --- src/gba/gba.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/gba/gba.c b/src/gba/gba.c index fe1c71913..1e95023fd 100644 --- a/src/gba/gba.c +++ b/src/gba/gba.c @@ -651,6 +651,11 @@ void GBAIllegal(struct ARMCore* cpu, uint32_t opcode) { void _checkOverrides(struct GBA* gba, uint32_t id) { int i; gba->busyLoop = -1; + if ((id & 0xFF) == 'F') { + GBALog(gba, GBA_LOG_DEBUG, "Found Classic NES Series game, using EEPROM saves"); + GBASavedataInitEEPROM(&gba->memory.savedata); + return; + } for (i = 0; _overrides[i].id[0]; ++i) { const uint32_t* overrideId = (const uint32_t*) _overrides[i].id; if (*overrideId == id) { From 8903d3145849278cd087aabe72ebe4ad46b78464 Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Tue, 21 Oct 2014 22:12:57 -0700 Subject: [PATCH 14/34] Pepper logging throughout the savegame loading mechanism --- src/gba/gba-memory.c | 5 +++++ src/gba/gba.c | 1 + 2 files changed, 6 insertions(+) diff --git a/src/gba/gba-memory.c b/src/gba/gba-memory.c index 7153f38ce..af624d215 100644 --- a/src/gba/gba-memory.c +++ b/src/gba/gba-memory.c @@ -365,6 +365,7 @@ int8_t GBALoad8(struct ARMCore* cpu, uint32_t address, int* cycleCounter) { case REGION_CART_SRAM_MIRROR: wait = memory->waitstatesNonseq16[address >> BASE_OFFSET]; if (memory->savedata.type == SAVEDATA_NONE) { + GBALog(gba, GBA_LOG_INFO, "Detected SRAM savegame"); GBASavedataInitSRAM(&memory->savedata); } if (memory->savedata.type == SAVEDATA_SRAM) { @@ -476,6 +477,7 @@ void GBAStore16(struct ARMCore* cpu, uint32_t address, int16_t value, int* cycle break; case REGION_CART2_EX: if (memory->savedata.type == SAVEDATA_NONE) { + GBALog(gba, GBA_LOG_INFO, "Detected EEPROM savegame"); GBASavedataInitEEPROM(&memory->savedata); } GBASavedataWriteEEPROM(&memory->savedata, value, 1); @@ -532,8 +534,10 @@ void GBAStore8(struct ARMCore* cpu, uint32_t address, int8_t value, int* cycleCo case REGION_CART_SRAM_MIRROR: if (memory->savedata.type == SAVEDATA_NONE) { if (address == SAVEDATA_FLASH_BASE) { + GBALog(gba, GBA_LOG_INFO, "Detected Flash savegame"); GBASavedataInitFlash(&memory->savedata); } else { + GBALog(gba, GBA_LOG_INFO, "Detected SRAM savegame"); GBASavedataInitSRAM(&memory->savedata); } } @@ -817,6 +821,7 @@ void GBAMemoryServiceDMA(struct GBA* gba, int number, struct GBADMA* info) { --wordsRemaining; } else if (destRegion == REGION_CART2_EX) { if (memory->savedata.type == SAVEDATA_NONE) { + GBALog(gba, GBA_LOG_INFO, "Detected EEPROM savegame"); GBASavedataInitEEPROM(&memory->savedata); } word = cpu->memory.load16(cpu, source, 0); diff --git a/src/gba/gba.c b/src/gba/gba.c index 1e95023fd..02d074199 100644 --- a/src/gba/gba.c +++ b/src/gba/gba.c @@ -659,6 +659,7 @@ void _checkOverrides(struct GBA* gba, uint32_t id) { for (i = 0; _overrides[i].id[0]; ++i) { const uint32_t* overrideId = (const uint32_t*) _overrides[i].id; if (*overrideId == id) { + GBALog(gba, GBA_LOG_DEBUG, "Found override for game %s!", _overrides[i].id); switch (_overrides[i].type) { case SAVEDATA_FLASH512: case SAVEDATA_FLASH1M: From 03b5e2d78eb58a1aac3fccd4bd0e6fff5d13bc59 Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Tue, 21 Oct 2014 22:17:52 -0700 Subject: [PATCH 15/34] Change the GBA magic to work on even improperly compiled ROMs --- src/gba/gba.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gba/gba.c b/src/gba/gba.c index 02d074199..1654f61ea 100644 --- a/src/gba/gba.c +++ b/src/gba/gba.c @@ -14,8 +14,8 @@ const uint32_t GBA_ARM7TDMI_FREQUENCY = 0x1000000; const uint32_t GBA_COMPONENT_MAGIC = 0x1000000; -static const size_t GBA_ROM_MAGIC_OFFSET = 4; -static const uint8_t GBA_ROM_MAGIC[] = { 0x24, 0xFF, 0xAE, 0x51, 0x69, 0x9A, 0xA2, 0x21 }; +static const size_t GBA_ROM_MAGIC_OFFSET = 1; +static const uint8_t GBA_ROM_MAGIC[] = { 0x00, 0x00, 0xEA }; enum { SP_BASE_SYSTEM = 0x03FFFF00, From 4bb8b7b1fdc48cfa37ea1ae3e44553bf8330ebaa Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Tue, 21 Oct 2014 22:24:32 -0700 Subject: [PATCH 16/34] Shutdown thread preemptively if the loaded file is not a ROM --- src/gba/gba-thread.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gba/gba-thread.c b/src/gba/gba-thread.c index e0ac4084a..5e1b0cf16 100644 --- a/src/gba/gba-thread.c +++ b/src/gba/gba-thread.c @@ -282,6 +282,7 @@ bool GBAThreadStart(struct GBAThread* threadContext) { } if (!threadContext->rom) { + threadContext->state = THREAD_SHUTDOWN; return false; } From 537480b940f5a030344b3e0ccd77752fb519a326 Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Tue, 21 Oct 2014 03:53:48 -0700 Subject: [PATCH 17/34] Load/Store multiple functions --- src/arm/arm.h | 13 ++- src/arm/isa-arm.c | 77 ++++--------- src/arm/isa-thumb.c | 78 +++++-------- src/debugger/memory-debugger.c | 3 - src/gba/gba-memory.c | 203 ++++++++++++++++++++++++++++++++- src/gba/gba-memory.h | 3 + 6 files changed, 265 insertions(+), 112 deletions(-) diff --git a/src/arm/arm.h b/src/arm/arm.h index 429a4bb67..11de34f49 100644 --- a/src/arm/arm.h +++ b/src/arm/arm.h @@ -48,6 +48,15 @@ enum RegisterBank { BANK_UNDEFINED = 5 }; +enum LSMDirection { + LSM_B = 1, + LSM_D = 2, + LSM_IA = 0, + LSM_IB = 1, + LSM_DA = 2, + LSM_DB = 3 +}; + struct ARMCore; union PSR { @@ -89,6 +98,9 @@ struct ARMMemory { void (*store16)(struct ARMCore*, uint32_t address, int16_t value, int* cycleCounter); void (*store8)(struct ARMCore*, uint32_t address, int8_t value, int* cycleCounter); + uint32_t (*loadMultiple)(struct ARMCore*, uint32_t baseAddress, int mask, enum LSMDirection direction, int* cycleCounter); + uint32_t (*storeMultiple)(struct ARMCore*, uint32_t baseAddress, int mask, enum LSMDirection direction, int* cycleCounter); + uint32_t* activeRegion; uint32_t activeMask; uint32_t activeSeqCycles32; @@ -98,7 +110,6 @@ struct ARMMemory { uint32_t activeUncachedCycles32; uint32_t activeUncachedCycles16; void (*setActiveRegion)(struct ARMCore*, uint32_t address); - int (*waitMultiple)(struct ARMCore*, uint32_t startAddress, int count); }; struct ARMInterruptHandler { diff --git a/src/arm/isa-arm.c b/src/arm/isa-arm.c index 3beab5a13..8a3c7f9c5 100644 --- a/src/arm/isa-arm.c +++ b/src/arm/isa-arm.c @@ -241,6 +241,8 @@ static inline void _immediate(struct ARMCore* cpu, uint32_t opcode) { #define ADDR_MODE_3_INDEX(U_OP, M) ADDR_MODE_2_INDEX(U_OP, M) #define ADDR_MODE_3_WRITEBACK(ADDR) ADDR_MODE_2_WRITEBACK(ADDR) +#define ADDR_MODE_4_WRITEBACK cpu->gprs[rn] = address + #define ARM_LOAD_POST_BODY \ ++currentCycles; \ if (rd == ARM_PC) { \ @@ -385,66 +387,35 @@ static inline void _immediate(struct ARMCore* cpu, uint32_t opcode) { #define ARM_MS_POST ARMSetPrivilegeMode(cpu, privilegeMode); -#define ADDR_MODE_4_DA uint32_t addr = cpu->gprs[rn] -#define ADDR_MODE_4_IA uint32_t addr = cpu->gprs[rn] -#define ADDR_MODE_4_DB uint32_t addr = cpu->gprs[rn] - 4 -#define ADDR_MODE_4_IB uint32_t addr = cpu->gprs[rn] + 4 -#define ADDR_MODE_4_DAW cpu->gprs[rn] = addr -#define ADDR_MODE_4_IAW cpu->gprs[rn] = addr -#define ADDR_MODE_4_DBW cpu->gprs[rn] = addr + 4 -#define ADDR_MODE_4_IBW cpu->gprs[rn] = addr - 4 - -#define ARM_M_INCREMENT(BODY) \ - for (m = rs, i = 0; m; m >>= 1, ++i) { \ - if (m & 1) { \ - BODY; \ - addr += 4; \ - total += 1; \ - } \ - } - -#define ARM_M_DECREMENT(BODY) \ - for (m = 0x8000, i = 15; m; m >>= 1, --i) { \ - if (rs & m) { \ - BODY; \ - addr -= 4; \ - total += 1; \ - } \ - } - -#define DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME, ADDRESS, WRITEBACK, LOOP, S_PRE, S_POST, BODY, POST_BODY) \ +#define DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME, LS, WRITEBACK, S_PRE, S_POST, DIRECTION, POST_BODY) \ DEFINE_INSTRUCTION_ARM(NAME, \ int rn = (opcode >> 16) & 0xF; \ int rs = opcode & 0x0000FFFF; \ - int m; \ - int i; \ - int total = 0; \ - ADDRESS; \ + uint32_t address = cpu->gprs[rn]; \ S_PRE; \ - LOOP(BODY); \ + address = cpu->memory. LS ## Multiple(cpu, address, rs, LSM_ ## DIRECTION, ¤tCycles); \ S_POST; \ - currentCycles += cpu->memory.waitMultiple(cpu, addr, total); \ POST_BODY; \ WRITEBACK;) -#define DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_ARM(NAME, BODY, POST_BODY) \ - DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## DA, ADDR_MODE_4_DA, , ARM_M_DECREMENT, , , BODY, POST_BODY) \ - DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## DAW, ADDR_MODE_4_DA, ADDR_MODE_4_DAW, ARM_M_DECREMENT, , , BODY, POST_BODY) \ - DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## DB, ADDR_MODE_4_DB, , ARM_M_DECREMENT, , , BODY, POST_BODY) \ - DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## DBW, ADDR_MODE_4_DB, ADDR_MODE_4_DBW, ARM_M_DECREMENT, , , BODY, POST_BODY) \ - DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## IA, ADDR_MODE_4_IA, , ARM_M_INCREMENT, , , BODY, POST_BODY) \ - DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## IAW, ADDR_MODE_4_IA, ADDR_MODE_4_IAW, ARM_M_INCREMENT, , , BODY, POST_BODY) \ - DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## IB, ADDR_MODE_4_IB, , ARM_M_INCREMENT, , , BODY, POST_BODY) \ - DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## IBW, ADDR_MODE_4_IB, ADDR_MODE_4_IBW, ARM_M_INCREMENT, , , BODY, POST_BODY) \ - DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## SDA, ADDR_MODE_4_DA, , ARM_M_DECREMENT, ARM_MS_PRE, ARM_MS_POST, BODY, POST_BODY) \ - DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## SDAW, ADDR_MODE_4_DA, ADDR_MODE_4_DAW, ARM_M_DECREMENT, ARM_MS_PRE, ARM_MS_POST, BODY, POST_BODY) \ - DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## SDB, ADDR_MODE_4_DB, , ARM_M_DECREMENT, ARM_MS_PRE, ARM_MS_POST, BODY, POST_BODY) \ - DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## SDBW, ADDR_MODE_4_DB, ADDR_MODE_4_DBW, ARM_M_DECREMENT, ARM_MS_PRE, ARM_MS_POST, BODY, POST_BODY) \ - DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## SIA, ADDR_MODE_4_IA, , ARM_M_INCREMENT, ARM_MS_PRE, ARM_MS_POST, BODY, POST_BODY) \ - DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## SIAW, ADDR_MODE_4_IA, ADDR_MODE_4_IAW, ARM_M_INCREMENT, ARM_MS_PRE, ARM_MS_POST, BODY, POST_BODY) \ - DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## SIB, ADDR_MODE_4_IB, , ARM_M_INCREMENT, ARM_MS_PRE, ARM_MS_POST, BODY, POST_BODY) \ - DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## SIBW, ADDR_MODE_4_IB, ADDR_MODE_4_IBW, ARM_M_INCREMENT, ARM_MS_PRE, ARM_MS_POST, BODY, POST_BODY) +#define DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_ARM(NAME, LS, POST_BODY) \ + DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## DA, LS, , , , DA, POST_BODY) \ + DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## DAW, LS, ADDR_MODE_4_WRITEBACK, , , DA, POST_BODY) \ + DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## DB, LS, , , , DB, POST_BODY) \ + DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## DBW, LS, ADDR_MODE_4_WRITEBACK, , , DB, POST_BODY) \ + DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## IA, LS, , , , IA, POST_BODY) \ + DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## IAW, LS, ADDR_MODE_4_WRITEBACK, , , IA, POST_BODY) \ + DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## IB, LS, , , , IB, POST_BODY) \ + DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## IBW, LS, ADDR_MODE_4_WRITEBACK, , , IB, POST_BODY) \ + DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## SDA, LS, , ARM_MS_PRE, ARM_MS_POST, DA, POST_BODY) \ + DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## SDAW, LS, ADDR_MODE_4_WRITEBACK, ARM_MS_PRE, ARM_MS_POST, DA, POST_BODY) \ + DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## SDB, LS, , ARM_MS_PRE, ARM_MS_POST, DB, POST_BODY) \ + DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## SDBW, LS, ADDR_MODE_4_WRITEBACK, ARM_MS_PRE, ARM_MS_POST, DB, POST_BODY) \ + DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## SIA, LS, , ARM_MS_PRE, ARM_MS_POST, IA, POST_BODY) \ + DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## SIAW, LS, ADDR_MODE_4_WRITEBACK, ARM_MS_PRE, ARM_MS_POST, IA, POST_BODY) \ + DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## SIB, LS, , ARM_MS_PRE, ARM_MS_POST, IB, POST_BODY) \ + DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_EX_ARM(NAME ## SIBW, LS, ADDR_MODE_4_WRITEBACK, ARM_MS_PRE, ARM_MS_POST, IB, POST_BODY) // Begin ALU definitions @@ -580,14 +551,14 @@ DEFINE_LOAD_STORE_T_INSTRUCTION_ARM(STRT, ARM_STORE_POST_BODY;) DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_ARM(LDM, - cpu->gprs[i] = cpu->memory.load32(cpu, addr & 0xFFFFFFFC, 0);, + load, ++currentCycles; if (rs & 0x8000) { ARM_WRITE_PC; }) DEFINE_LOAD_STORE_MULTIPLE_INSTRUCTION_ARM(STM, - cpu->memory.store32(cpu, addr, cpu->gprs[i], 0);, + store, currentCycles += cpu->memory.activeNonseqCycles32 - cpu->memory.activeSeqCycles32) DEFINE_INSTRUCTION_ARM(SWP, diff --git a/src/arm/isa-thumb.c b/src/arm/isa-thumb.c index 2e4d22994..5d5611463 100644 --- a/src/arm/isa-thumb.c +++ b/src/arm/isa-thumb.c @@ -289,39 +289,30 @@ DEFINE_LOAD_STORE_WITH_REGISTER_THUMB(STR2, cpu->memory.store32(cpu, cpu->gprs[r DEFINE_LOAD_STORE_WITH_REGISTER_THUMB(STRB2, cpu->memory.store8(cpu, cpu->gprs[rn] + cpu->gprs[rm], cpu->gprs[rd], ¤tCycles); THUMB_STORE_POST_BODY;) DEFINE_LOAD_STORE_WITH_REGISTER_THUMB(STRH2, cpu->memory.store16(cpu, cpu->gprs[rn] + cpu->gprs[rm], cpu->gprs[rd], ¤tCycles); THUMB_STORE_POST_BODY;) -#define DEFINE_LOAD_STORE_MULTIPLE_EX_THUMB(NAME, RN, ADDRESS, LOOP, BODY, OP, PRE_BODY, POST_BODY, WRITEBACK) \ +#define DEFINE_LOAD_STORE_MULTIPLE_EX_THUMB(NAME, RN, LS, DIRECTION, PRE_BODY, WRITEBACK) \ DEFINE_INSTRUCTION_THUMB(NAME, \ int rn = RN; \ UNUSED(rn); \ int rs = opcode & 0xFF; \ - int32_t address = ADDRESS; \ - int m; \ - int i; \ - int total = 0; \ + int32_t address = cpu->gprs[RN]; \ PRE_BODY; \ - for LOOP { \ - if (rs & m) { \ - BODY; \ - address OP 4; \ - ++total; \ - } \ - } \ - POST_BODY; \ - currentCycles += cpu->memory.waitMultiple(cpu, address, total); \ + address = cpu->memory. LS ## Multiple(cpu, address, rs, LSM_ ## DIRECTION, ¤tCycles); \ WRITEBACK;) -#define DEFINE_LOAD_STORE_MULTIPLE_THUMB(NAME, BODY, WRITEBACK) \ - COUNT_CALL_3(DEFINE_LOAD_STORE_MULTIPLE_EX_THUMB, NAME ## _R, cpu->gprs[rn], (m = 0x01, i = 0; i < 8; m <<= 1, ++i), BODY, +=, , , WRITEBACK) +#define DEFINE_LOAD_STORE_MULTIPLE_THUMB(NAME, LS, DIRECTION, WRITEBACK) \ + COUNT_CALL_3(DEFINE_LOAD_STORE_MULTIPLE_EX_THUMB, NAME ## _R, LS, DIRECTION, , WRITEBACK) DEFINE_LOAD_STORE_MULTIPLE_THUMB(LDMIA, - cpu->gprs[i] = cpu->memory.load32(cpu, address, 0), + load, + IA, THUMB_LOAD_POST_BODY; if (!((1 << rn) & rs)) { cpu->gprs[rn] = address; }) DEFINE_LOAD_STORE_MULTIPLE_THUMB(STMIA, - cpu->memory.store32(cpu, address, cpu->gprs[i], 0), + store, + IA, THUMB_STORE_POST_BODY; cpu->gprs[rn] = address;) @@ -352,48 +343,37 @@ DEFINE_INSTRUCTION_THUMB(ADD7, cpu->gprs[ARM_SP] += (opcode & 0x7F) << 2) DEFINE_INSTRUCTION_THUMB(SUB4, cpu->gprs[ARM_SP] -= (opcode & 0x7F) << 2) DEFINE_LOAD_STORE_MULTIPLE_EX_THUMB(POP, - opcode & 0x00FF, - cpu->gprs[ARM_SP], - (m = 0x01, i = 0; i < 8; m <<= 1, ++i), - cpu->gprs[i] = cpu->memory.load32(cpu, address, 0), - +=, + ARM_SP, + load, + IA, , - THUMB_LOAD_POST_BODY;, + THUMB_LOAD_POST_BODY; cpu->gprs[ARM_SP] = address) DEFINE_LOAD_STORE_MULTIPLE_EX_THUMB(POPR, - opcode & 0x00FF, - cpu->gprs[ARM_SP], - (m = 0x01, i = 0; i < 8; m <<= 1, ++i), - cpu->gprs[i] = cpu->memory.load32(cpu, address, 0), - +=, - , - cpu->gprs[ARM_PC] = cpu->memory.load32(cpu, address, 0) & 0xFFFFFFFE; - address += 4; - THUMB_LOAD_POST_BODY;, + ARM_SP, + load, + IA, + rs |= 1 << ARM_PC, + THUMB_LOAD_POST_BODY; cpu->gprs[ARM_SP] = address; THUMB_WRITE_PC;) DEFINE_LOAD_STORE_MULTIPLE_EX_THUMB(PUSH, - opcode & 0x00FF, - cpu->gprs[ARM_SP] - 4, - (m = 0x80, i = 7; m; m >>= 1, --i), - cpu->memory.store32(cpu, address, cpu->gprs[i], 0), - -=, + ARM_SP, + store, + DB, , - THUMB_STORE_POST_BODY, - cpu->gprs[ARM_SP] = address + 4) + THUMB_STORE_POST_BODY; + cpu->gprs[ARM_SP] = address) DEFINE_LOAD_STORE_MULTIPLE_EX_THUMB(PUSHR, - opcode & 0x00FF, - cpu->gprs[ARM_SP] - 4, - (m = 0x80, i = 7; m; m >>= 1, --i), - cpu->memory.store32(cpu, address, cpu->gprs[i], 0), - -=, - cpu->memory.store32(cpu, address, cpu->gprs[ARM_LR], 0); - address -= 4;, - THUMB_STORE_POST_BODY, - cpu->gprs[ARM_SP] = address + 4) + ARM_SP, + store, + DB, + rs |= 1 << ARM_LR, + THUMB_STORE_POST_BODY; + cpu->gprs[ARM_SP] = address) DEFINE_INSTRUCTION_THUMB(ILL, ARM_ILL) DEFINE_INSTRUCTION_THUMB(BKPT, ARM_STUB) diff --git a/src/debugger/memory-debugger.c b/src/debugger/memory-debugger.c index adcc3313a..df934e1cd 100644 --- a/src/debugger/memory-debugger.c +++ b/src/debugger/memory-debugger.c @@ -43,7 +43,6 @@ CREATE_WATCHPOINT_SHIM(loadU8, 1, uint8_t, (struct ARMCore* cpu, uint32_t addres CREATE_WATCHPOINT_SHIM(store32, 4, void, (struct ARMCore* cpu, uint32_t address, int32_t value, int* cycleCounter), address, value, cycleCounter) CREATE_WATCHPOINT_SHIM(store16, 2, void, (struct ARMCore* cpu, uint32_t address, int16_t value, int* cycleCounter), address, value, cycleCounter) CREATE_WATCHPOINT_SHIM(store8, 1, void, (struct ARMCore* cpu, uint32_t address, int8_t value, int* cycleCounter), address, value, cycleCounter) -CREATE_SHIM(waitMultiple, int, (struct ARMCore* cpu, uint32_t startAddress, int count), startAddress, count) CREATE_SHIM(setActiveRegion, void, (struct ARMCore* cpu, uint32_t address), address) static bool _checkWatchpoints(struct DebugBreakpoint* watchpoints, uint32_t address, int width) { @@ -67,7 +66,6 @@ void ARMDebuggerInstallMemoryShim(struct ARMDebugger* debugger) { debugger->cpu->memory.load8 = ARMDebuggerShim_load8; debugger->cpu->memory.loadU8 = ARMDebuggerShim_loadU8; debugger->cpu->memory.setActiveRegion = ARMDebuggerShim_setActiveRegion; - debugger->cpu->memory.waitMultiple = ARMDebuggerShim_waitMultiple; } void ARMDebuggerRemoveMemoryShim(struct ARMDebugger* debugger) { @@ -80,5 +78,4 @@ void ARMDebuggerRemoveMemoryShim(struct ARMDebugger* debugger) { debugger->cpu->memory.load8 = debugger->originalMemory.load8; debugger->cpu->memory.loadU8 = debugger->originalMemory.loadU8; debugger->cpu->memory.setActiveRegion = debugger->originalMemory.setActiveRegion; - debugger->cpu->memory.waitMultiple = debugger->originalMemory.waitMultiple; } diff --git a/src/gba/gba-memory.c b/src/gba/gba-memory.c index af624d215..b86fea46d 100644 --- a/src/gba/gba-memory.c +++ b/src/gba/gba-memory.c @@ -9,7 +9,6 @@ #include "util/memory.h" static void GBASetActiveRegion(struct ARMCore* cpu, uint32_t region); -static int GBAWaitMultiple(struct ARMCore* cpu, uint32_t startAddress, int count); static void GBAMemoryServiceDMA(struct GBA* gba, int number, struct GBADMA* info); static const char GBA_BASE_WAITSTATES[16] = { 0, 0, 2, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4 }; @@ -27,9 +26,11 @@ void GBAMemoryInit(struct GBA* gba) { cpu->memory.loadU16 = GBALoadU16; cpu->memory.load8 = GBALoad8; cpu->memory.loadU8 = GBALoadU8; + cpu->memory.loadMultiple = GBALoadMultiple; cpu->memory.store32 = GBAStore32; cpu->memory.store16 = GBAStore16; cpu->memory.store8 = GBAStore8; + cpu->memory.storeMultiple = GBAStoreMultiple; gba->memory.bios = (uint32_t*) hleBios; gba->memory.fullBios = 0; @@ -67,7 +68,6 @@ void GBAMemoryInit(struct GBA* gba) { cpu->memory.activeUncachedCycles32 = 0; cpu->memory.activeUncachedCycles16 = 0; gba->memory.biosPrefetch = 0; - cpu->memory.waitMultiple = GBAWaitMultiple; } void GBAMemoryDeinit(struct GBA* gba) { @@ -558,12 +558,203 @@ void GBAStore8(struct ARMCore* cpu, uint32_t address, int8_t value, int* cycleCo } } -static int GBAWaitMultiple(struct ARMCore* cpu, uint32_t startAddress, int count) { +uint32_t GBALoadMultiple(struct ARMCore* cpu, uint32_t address, int mask, enum LSMDirection direction, int* cycleCounter) { struct GBA* gba = (struct GBA*) cpu->master; struct GBAMemory* memory = &gba->memory; - int wait = 1 + memory->waitstatesNonseq32[startAddress >> BASE_OFFSET]; - wait += (1 + memory->waitstatesSeq32[startAddress >> BASE_OFFSET]) * (count - 1); - return wait; + uint32_t value; + int wait; + int totalWait = 0; + char* waitstatesRegion = memory->waitstatesNonseq32; + + int i; + int offset = 4; + int popcount = 0; + if (direction & LSM_D) { + offset = -4; + for (i = 0; i < 16; ++i) { + if (mask & (1 << i)) { + ++popcount; + } + } + address -= (popcount << 2) - 4; + } + + if (direction & LSM_B) { + address += offset; + } + + address &= 0xFFFFFFFC; + + for (i = 0; i < 16; ++i) { + wait = 0; + if (~mask & (1 << i)) { + continue; + } + switch (address >> BASE_OFFSET) { + case REGION_BIOS: + if (memory->activeRegion == REGION_BIOS) { + if (address < SIZE_BIOS) { + LOAD_32(value, address, memory->bios); + } else { + value = 0; + } + } else { + value = memory->biosPrefetch; + } + break; + case REGION_WORKING_RAM: + LOAD_32(value, address & (SIZE_WORKING_RAM - 1), memory->wram); + wait = waitstatesRegion[REGION_WORKING_RAM]; + break; + case REGION_WORKING_IRAM: + LOAD_32(value, address & (SIZE_WORKING_IRAM - 1), memory->iwram); + break; + case REGION_IO: + value = GBAIORead(gba, (address & (SIZE_IO - 1)) & ~2) | (GBAIORead(gba, (address & (SIZE_IO - 1)) | 2) << 16); + break; + case REGION_PALETTE_RAM: + LOAD_32(value, address & (SIZE_PALETTE_RAM - 1), gba->video.palette); + break; + case REGION_VRAM: + LOAD_32(value, address & 0x0001FFFF, gba->video.renderer->vram); + break; + case REGION_OAM: + LOAD_32(value, address & (SIZE_OAM - 1), gba->video.oam.raw); + break; + case REGION_CART0: + case REGION_CART0_EX: + case REGION_CART1: + case REGION_CART1_EX: + case REGION_CART2: + case REGION_CART2_EX: + wait = waitstatesRegion[address >> BASE_OFFSET]; + if ((address & (SIZE_CART0 - 1)) < memory->romSize) { + LOAD_32(value, address & (SIZE_CART0 - 1), memory->rom); + } + break; + case REGION_CART_SRAM: + case REGION_CART_SRAM_MIRROR: + GBALog(gba, GBA_LOG_STUB, "Unimplemented memory Load32: 0x%08X", address); + break; + default: + GBALog(gba, GBA_LOG_GAME_ERROR, "Bad memory Load32: 0x%08X", address); + value = cpu->prefetch; + if (cpu->executionMode == MODE_THUMB) { + value |= value << 16; + } + break; + } + waitstatesRegion = memory->waitstatesSeq32; + cpu->gprs[i] = value; + totalWait += 1 + wait; + address += 4; + } + + if (cycleCounter) { + *cycleCounter += totalWait; + } + + if (direction & LSM_B) { + address -= offset; + } + + if (direction & LSM_D) { + address -= (popcount << 2) + 4; + } + + return address; +} + +uint32_t GBAStoreMultiple(struct ARMCore* cpu, uint32_t address, int mask, enum LSMDirection direction, int* cycleCounter) { + struct GBA* gba = (struct GBA*) cpu->master; + struct GBAMemory* memory = &gba->memory; + uint32_t value; + int wait; + int totalWait = 0; + char* waitstatesRegion = memory->waitstatesNonseq32; + + int i; + int offset = 4; + int popcount = 0; + if (direction & LSM_D) { + offset = -4; + for (i = 0; i < 16; ++i) { + if (mask & (1 << i)) { + ++popcount; + } + } + address -= (popcount << 2) - 4; + } + + if (direction & LSM_B) { + address += offset; + } + + address &= 0xFFFFFFFC; + + for (i = 0; i < 16; ++i) { + wait = 0; + if (~mask & (1 << i)) { + continue; + } + value = cpu->gprs[i]; + switch (address >> BASE_OFFSET) { + case REGION_WORKING_RAM: + STORE_32(value, address & (SIZE_WORKING_RAM - 1), memory->wram); + wait = waitstatesRegion[REGION_WORKING_RAM]; + break; + case REGION_WORKING_IRAM: + STORE_32(value, address & (SIZE_WORKING_IRAM - 1), memory->iwram); + break; + case REGION_IO: + GBAIOWrite32(gba, address & (SIZE_IO - 1), value); + break; + case REGION_PALETTE_RAM: + STORE_32(value, address & (SIZE_PALETTE_RAM - 1), gba->video.palette); + gba->video.renderer->writePalette(gba->video.renderer, (address & (SIZE_PALETTE_RAM - 1)) + 2, value >> 16); + gba->video.renderer->writePalette(gba->video.renderer, address & (SIZE_PALETTE_RAM - 1), value); + break; + case REGION_VRAM: + if ((address & OFFSET_MASK) < SIZE_VRAM) { + STORE_32(value, address & 0x0001FFFF, gba->video.renderer->vram); + } else if ((address & OFFSET_MASK) < 0x00020000) { + STORE_32(value, address & 0x00017FFF, gba->video.renderer->vram); + } + break; + case REGION_OAM: + STORE_32(value, address & (SIZE_OAM - 1), gba->video.oam.raw); + gba->video.renderer->writeOAM(gba->video.renderer, (address & (SIZE_OAM - 4)) >> 1); + gba->video.renderer->writeOAM(gba->video.renderer, ((address & (SIZE_OAM - 4)) >> 1) + 1); + break; + case REGION_CART0: + GBALog(gba, GBA_LOG_STUB, "Unimplemented memory Store32: 0x%08X", address); + break; + case REGION_CART_SRAM: + case REGION_CART_SRAM_MIRROR: + GBALog(gba, GBA_LOG_STUB, "Unimplemented memory Store32: 0x%08X", address); + break; + default: + GBALog(gba, GBA_LOG_GAME_ERROR, "Bad memory Store32: 0x%08X", address); + break; + } + waitstatesRegion = memory->waitstatesSeq32; + totalWait += 1 + wait; + address += 4; + } + + if (cycleCounter) { + *cycleCounter += totalWait; + } + + if (direction & LSM_B) { + address -= offset; + } + + if (direction & LSM_D) { + address -= (popcount << 2) + 4; + } + + return address; } void GBAAdjustWaitstates(struct GBA* gba, uint16_t parameters) { diff --git a/src/gba/gba-memory.h b/src/gba/gba-memory.h index 23bd0a41a..0c6b23647 100644 --- a/src/gba/gba-memory.h +++ b/src/gba/gba-memory.h @@ -149,6 +149,9 @@ void GBAStore32(struct ARMCore* cpu, uint32_t address, int32_t value, int* cycle void GBAStore16(struct ARMCore* cpu, uint32_t address, int16_t value, int* cycleCounter); void GBAStore8(struct ARMCore* cpu, uint32_t address, int8_t value, int* cycleCounter); +uint32_t GBALoadMultiple(struct ARMCore*, uint32_t baseAddress, int mask, enum LSMDirection direction, int* cycleCounter); +uint32_t GBAStoreMultiple(struct ARMCore*, uint32_t baseAddress, int mask, enum LSMDirection direction, int* cycleCounter); + void GBAAdjustWaitstates(struct GBA* gba, uint16_t parameters); void GBAMemoryWriteDMASAD(struct GBA* gba, int dma, uint32_t address); From 9abe700d37c5453eb735548d0a19bbfce9545279 Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Tue, 21 Oct 2014 22:51:39 -0700 Subject: [PATCH 18/34] Use GCC popcount intrinsic for popcount --- src/gba/gba-memory.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/gba/gba-memory.c b/src/gba/gba-memory.c index b86fea46d..9bc184c57 100644 --- a/src/gba/gba-memory.c +++ b/src/gba/gba-memory.c @@ -571,11 +571,7 @@ uint32_t GBALoadMultiple(struct ARMCore* cpu, uint32_t address, int mask, enum L int popcount = 0; if (direction & LSM_D) { offset = -4; - for (i = 0; i < 16; ++i) { - if (mask & (1 << i)) { - ++popcount; - } - } + popcount = __builtin_popcount(mask); address -= (popcount << 2) - 4; } @@ -678,11 +674,7 @@ uint32_t GBAStoreMultiple(struct ARMCore* cpu, uint32_t address, int mask, enum int popcount = 0; if (direction & LSM_D) { offset = -4; - for (i = 0; i < 16; ++i) { - if (mask & (1 << i)) { - ++popcount; - } - } + popcount = __builtin_popcount(mask); address -= (popcount << 2) - 4; } From a57312d0cc299b2ad1d63a08df8149692222aa85 Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Tue, 21 Oct 2014 23:31:49 -0700 Subject: [PATCH 19/34] Refine and reuse STM/LDM code --- src/gba/gba-memory.c | 397 +++++++++++++++++++++++++------------------ 1 file changed, 236 insertions(+), 161 deletions(-) diff --git a/src/gba/gba-memory.c b/src/gba/gba-memory.c index 9bc184c57..e221d7ec8 100644 --- a/src/gba/gba-memory.c +++ b/src/gba/gba-memory.c @@ -163,42 +163,74 @@ static void GBASetActiveRegion(struct ARMCore* cpu, uint32_t address) { cpu->memory.activeUncachedCycles16 = memory->waitstatesNonseq16[memory->activeRegion]; } +#define LOAD_BIOS \ + if (memory->activeRegion == REGION_BIOS) { \ + if (address < SIZE_BIOS) { \ + LOAD_32(value, address, memory->bios); \ + } else { \ + value = 0; \ + } \ + } else { \ + value = memory->biosPrefetch; \ + } + +#define LOAD_WORKING_RAM \ + LOAD_32(value, address & (SIZE_WORKING_RAM - 1), memory->wram); \ + wait += waitstatesRegion[REGION_WORKING_RAM]; + +#define LOAD_WORKING_IRAM LOAD_32(value, address & (SIZE_WORKING_IRAM - 1), memory->iwram); +#define LOAD_IO value = GBAIORead(gba, (address & (SIZE_IO - 1)) & ~2) | (GBAIORead(gba, (address & (SIZE_IO - 1)) | 2) << 16); +#define LOAD_PALETTE_RAM LOAD_32(value, address & (SIZE_PALETTE_RAM - 1), gba->video.palette); +#define LOAD_VRAM LOAD_32(value, address & 0x0001FFFF, gba->video.renderer->vram); +#define LOAD_OAM LOAD_32(value, address & (SIZE_OAM - 1), gba->video.oam.raw); + +#define LOAD_CART \ + wait += waitstatesRegion[address >> BASE_OFFSET]; \ + if ((address & (SIZE_CART0 - 1)) < memory->romSize) { \ + LOAD_32(value, address & (SIZE_CART0 - 1), memory->rom); \ + } else { \ + value = (address >> 1) & 0xFFFF; \ + } + +#define LOAD_SRAM \ + GBALog(gba, GBA_LOG_STUB, "Unimplemented memory Load32: 0x%08X", address); \ + value = 0xDEADBEEF; + +#define LOAD_BAD \ + GBALog(gba, GBA_LOG_GAME_ERROR, "Bad memory Load32: 0x%08X", address); \ + value = cpu->prefetch; \ + if (cpu->executionMode == MODE_THUMB) { \ + value |= value << 16; \ + } + int32_t GBALoad32(struct ARMCore* cpu, uint32_t address, int* cycleCounter) { struct GBA* gba = (struct GBA*) cpu->master; struct GBAMemory* memory = &gba->memory; uint32_t value = 0; int wait = 0; + char* waitstatesRegion = memory->waitstatesNonseq32; switch (address >> BASE_OFFSET) { case REGION_BIOS: - if (memory->activeRegion == REGION_BIOS) { - if (address < SIZE_BIOS) { - LOAD_32(value, address, memory->bios); - } else { - value = 0; - } - } else { - value = memory->biosPrefetch; - } + LOAD_BIOS; break; case REGION_WORKING_RAM: - LOAD_32(value, address & (SIZE_WORKING_RAM - 1), memory->wram); - wait = memory->waitstatesNonseq32[REGION_WORKING_RAM]; + LOAD_WORKING_RAM; break; case REGION_WORKING_IRAM: - LOAD_32(value, address & (SIZE_WORKING_IRAM - 1), memory->iwram); + LOAD_WORKING_IRAM; break; case REGION_IO: - value = GBAIORead(gba, (address & (SIZE_IO - 1)) & ~2) | (GBAIORead(gba, (address & (SIZE_IO - 1)) | 2) << 16); + LOAD_IO; break; case REGION_PALETTE_RAM: - LOAD_32(value, address & (SIZE_PALETTE_RAM - 1), gba->video.palette); + LOAD_PALETTE_RAM; break; case REGION_VRAM: - LOAD_32(value, address & 0x0001FFFF, gba->video.renderer->vram); + LOAD_VRAM; break; case REGION_OAM: - LOAD_32(value, address & (SIZE_OAM - 1), gba->video.oam.raw); + LOAD_OAM; break; case REGION_CART0: case REGION_CART0_EX: @@ -206,21 +238,14 @@ int32_t GBALoad32(struct ARMCore* cpu, uint32_t address, int* cycleCounter) { case REGION_CART1_EX: case REGION_CART2: case REGION_CART2_EX: - wait = memory->waitstatesNonseq32[address >> BASE_OFFSET]; - if ((address & (SIZE_CART0 - 1)) < memory->romSize) { - LOAD_32(value, address & (SIZE_CART0 - 1), memory->rom); - } + LOAD_CART; break; case REGION_CART_SRAM: case REGION_CART_SRAM_MIRROR: - GBALog(gba, GBA_LOG_STUB, "Unimplemented memory Load32: 0x%08X", address); + LOAD_SRAM; break; default: - GBALog(gba, GBA_LOG_GAME_ERROR, "Bad memory Load32: 0x%08X", address); - value = cpu->prefetch; - if (cpu->executionMode == MODE_THUMB) { - value |= value << 16; - } + LOAD_BAD; break; } @@ -386,48 +411,81 @@ int8_t GBALoad8(struct ARMCore* cpu, uint32_t address, int* cycleCounter) { return value; } +#define STORE_WORKING_RAM \ + STORE_32(value, address & (SIZE_WORKING_RAM - 1), memory->wram); \ + wait += waitstatesRegion[REGION_WORKING_RAM]; + +#define STORE_WORKING_IRAM \ + STORE_32(value, address & (SIZE_WORKING_IRAM - 1), memory->iwram); + +#define STORE_IO \ + GBAIOWrite32(gba, address & (SIZE_IO - 1), value); + +#define STORE_PALETTE_RAM \ + STORE_32(value, address & (SIZE_PALETTE_RAM - 1), gba->video.palette); \ + gba->video.renderer->writePalette(gba->video.renderer, (address & (SIZE_PALETTE_RAM - 1)) + 2, value >> 16); \ + gba->video.renderer->writePalette(gba->video.renderer, address & (SIZE_PALETTE_RAM - 1), value); + +#define STORE_VRAM \ + if ((address & OFFSET_MASK) < SIZE_VRAM) { \ + STORE_32(value, address & 0x0001FFFF, gba->video.renderer->vram); \ + } else if ((address & OFFSET_MASK) < 0x00020000) { \ + STORE_32(value, address & 0x00017FFF, gba->video.renderer->vram); \ + } + +#define STORE_OAM \ + STORE_32(value, address & (SIZE_OAM - 1), gba->video.oam.raw); \ + gba->video.renderer->writeOAM(gba->video.renderer, (address & (SIZE_OAM - 4)) >> 1); \ + gba->video.renderer->writeOAM(gba->video.renderer, ((address & (SIZE_OAM - 4)) >> 1) + 1); + +#define STORE_CART \ + GBALog(gba, GBA_LOG_STUB, "Unimplemented memory Store32: 0x%08X", address); + +#define STORE_SRAM \ + GBALog(gba, GBA_LOG_STUB, "Unimplemented memory Store32: 0x%08X", address); + +#define STORE_BAD \ + GBALog(gba, GBA_LOG_GAME_ERROR, "Bad memory Store32: 0x%08X", address); + void GBAStore32(struct ARMCore* cpu, uint32_t address, int32_t value, int* cycleCounter) { struct GBA* gba = (struct GBA*) cpu->master; struct GBAMemory* memory = &gba->memory; int wait = 0; + char* waitstatesRegion = memory->waitstatesNonseq32; switch (address >> BASE_OFFSET) { case REGION_WORKING_RAM: - STORE_32(value, address & (SIZE_WORKING_RAM - 1), memory->wram); - wait = memory->waitstatesNonseq32[REGION_WORKING_RAM]; + STORE_WORKING_RAM; break; case REGION_WORKING_IRAM: - STORE_32(value, address & (SIZE_WORKING_IRAM - 1), memory->iwram); + STORE_WORKING_IRAM break; case REGION_IO: - GBAIOWrite32(gba, address & (SIZE_IO - 1), value); + STORE_IO; break; case REGION_PALETTE_RAM: - STORE_32(value, address & (SIZE_PALETTE_RAM - 1), gba->video.palette); - gba->video.renderer->writePalette(gba->video.renderer, (address & (SIZE_PALETTE_RAM - 1)) + 2, value >> 16); - gba->video.renderer->writePalette(gba->video.renderer, address & (SIZE_PALETTE_RAM - 1), value); + STORE_PALETTE_RAM; break; case REGION_VRAM: - if ((address & OFFSET_MASK) < SIZE_VRAM) { - STORE_32(value, address & 0x0001FFFF, gba->video.renderer->vram); - } else if ((address & OFFSET_MASK) < 0x00020000) { - STORE_32(value, address & 0x00017FFF, gba->video.renderer->vram); - } + STORE_VRAM; break; case REGION_OAM: - STORE_32(value, address & (SIZE_OAM - 1), gba->video.oam.raw); - gba->video.renderer->writeOAM(gba->video.renderer, (address & (SIZE_OAM - 4)) >> 1); - gba->video.renderer->writeOAM(gba->video.renderer, ((address & (SIZE_OAM - 4)) >> 1) + 1); + STORE_OAM; break; case REGION_CART0: - GBALog(gba, GBA_LOG_STUB, "Unimplemented memory Store32: 0x%08X", address); + case REGION_CART0_EX: + case REGION_CART1: + case REGION_CART1_EX: + case REGION_CART2: + case REGION_CART2_EX: + STORE_CART; break; case REGION_CART_SRAM: case REGION_CART_SRAM_MIRROR: - GBALog(gba, GBA_LOG_STUB, "Unimplemented memory Store32: 0x%08X", address); + STORE_SRAM; break; default: - GBALog(gba, GBA_LOG_GAME_ERROR, "Bad memory Store32: 0x%08X", address); + STORE_BAD; break; } @@ -558,12 +616,24 @@ void GBAStore8(struct ARMCore* cpu, uint32_t address, int8_t value, int* cycleCo } } +#define LDM_LOOP_BEGIN \ + for (i = 0; i < 16; ++i) { \ + if (~mask & (1 << i)) { \ + continue; \ + } + +#define LDM_LOOP_END \ + waitstatesRegion = memory->waitstatesSeq32; \ + cpu->gprs[i] = value; \ + ++wait; \ + address += 4; \ + } + uint32_t GBALoadMultiple(struct ARMCore* cpu, uint32_t address, int mask, enum LSMDirection direction, int* cycleCounter) { struct GBA* gba = (struct GBA*) cpu->master; struct GBAMemory* memory = &gba->memory; uint32_t value; - int wait; - int totalWait = 0; + int wait = 0; char* waitstatesRegion = memory->waitstatesNonseq32; int i; @@ -581,73 +651,62 @@ uint32_t GBALoadMultiple(struct ARMCore* cpu, uint32_t address, int mask, enum L address &= 0xFFFFFFFC; - for (i = 0; i < 16; ++i) { - wait = 0; - if (~mask & (1 << i)) { - continue; - } - switch (address >> BASE_OFFSET) { - case REGION_BIOS: - if (memory->activeRegion == REGION_BIOS) { - if (address < SIZE_BIOS) { - LOAD_32(value, address, memory->bios); - } else { - value = 0; - } - } else { - value = memory->biosPrefetch; - } - break; - case REGION_WORKING_RAM: - LOAD_32(value, address & (SIZE_WORKING_RAM - 1), memory->wram); - wait = waitstatesRegion[REGION_WORKING_RAM]; - break; - case REGION_WORKING_IRAM: - LOAD_32(value, address & (SIZE_WORKING_IRAM - 1), memory->iwram); - break; - case REGION_IO: - value = GBAIORead(gba, (address & (SIZE_IO - 1)) & ~2) | (GBAIORead(gba, (address & (SIZE_IO - 1)) | 2) << 16); - break; - case REGION_PALETTE_RAM: - LOAD_32(value, address & (SIZE_PALETTE_RAM - 1), gba->video.palette); - break; - case REGION_VRAM: - LOAD_32(value, address & 0x0001FFFF, gba->video.renderer->vram); - break; - case REGION_OAM: - LOAD_32(value, address & (SIZE_OAM - 1), gba->video.oam.raw); - break; - case REGION_CART0: - case REGION_CART0_EX: - case REGION_CART1: - case REGION_CART1_EX: - case REGION_CART2: - case REGION_CART2_EX: - wait = waitstatesRegion[address >> BASE_OFFSET]; - if ((address & (SIZE_CART0 - 1)) < memory->romSize) { - LOAD_32(value, address & (SIZE_CART0 - 1), memory->rom); - } - break; - case REGION_CART_SRAM: - case REGION_CART_SRAM_MIRROR: - GBALog(gba, GBA_LOG_STUB, "Unimplemented memory Load32: 0x%08X", address); - break; - default: - GBALog(gba, GBA_LOG_GAME_ERROR, "Bad memory Load32: 0x%08X", address); - value = cpu->prefetch; - if (cpu->executionMode == MODE_THUMB) { - value |= value << 16; - } - break; - } - waitstatesRegion = memory->waitstatesSeq32; - cpu->gprs[i] = value; - totalWait += 1 + wait; - address += 4; + switch (address >> BASE_OFFSET) { + case REGION_WORKING_RAM: + LDM_LOOP_BEGIN; + LOAD_WORKING_RAM; + LDM_LOOP_END; + break; + case REGION_WORKING_IRAM: + LDM_LOOP_BEGIN; + LOAD_WORKING_IRAM; + LDM_LOOP_END; + break; + case REGION_IO: + LDM_LOOP_BEGIN; + LOAD_IO; + LDM_LOOP_END; + break; + case REGION_PALETTE_RAM: + LDM_LOOP_BEGIN; + LOAD_PALETTE_RAM; + LDM_LOOP_END; + break; + case REGION_VRAM: + LDM_LOOP_BEGIN; + LOAD_VRAM; + LDM_LOOP_END; + break; + case REGION_OAM: + LDM_LOOP_BEGIN; + LOAD_OAM; + LDM_LOOP_END; + break; + case REGION_CART0: + case REGION_CART0_EX: + case REGION_CART1: + case REGION_CART1_EX: + case REGION_CART2: + case REGION_CART2_EX: + LDM_LOOP_BEGIN; + LOAD_CART; + LDM_LOOP_END; + break; + case REGION_CART_SRAM: + case REGION_CART_SRAM_MIRROR: + LDM_LOOP_BEGIN; + LOAD_SRAM; + LDM_LOOP_END; + break; + default: + LDM_LOOP_BEGIN; + LOAD_BAD; + LDM_LOOP_END; + break; } if (cycleCounter) { - *cycleCounter += totalWait; + *cycleCounter += wait; } if (direction & LSM_B) { @@ -661,12 +720,24 @@ uint32_t GBALoadMultiple(struct ARMCore* cpu, uint32_t address, int mask, enum L return address; } +#define STM_LOOP_BEGIN \ + for (i = 0; i < 16; ++i) { \ + if (~mask & (1 << i)) { \ + continue; \ + } \ + value = cpu->gprs[i]; + +#define STM_LOOP_END \ + waitstatesRegion = memory->waitstatesSeq32; \ + ++wait; \ + address += 4; \ + } + uint32_t GBAStoreMultiple(struct ARMCore* cpu, uint32_t address, int mask, enum LSMDirection direction, int* cycleCounter) { struct GBA* gba = (struct GBA*) cpu->master; struct GBAMemory* memory = &gba->memory; uint32_t value; - int wait; - int totalWait = 0; + int wait = 0; char* waitstatesRegion = memory->waitstatesNonseq32; int i; @@ -684,58 +755,62 @@ uint32_t GBAStoreMultiple(struct ARMCore* cpu, uint32_t address, int mask, enum address &= 0xFFFFFFFC; - for (i = 0; i < 16; ++i) { - wait = 0; - if (~mask & (1 << i)) { - continue; - } - value = cpu->gprs[i]; - switch (address >> BASE_OFFSET) { - case REGION_WORKING_RAM: - STORE_32(value, address & (SIZE_WORKING_RAM - 1), memory->wram); - wait = waitstatesRegion[REGION_WORKING_RAM]; - break; - case REGION_WORKING_IRAM: - STORE_32(value, address & (SIZE_WORKING_IRAM - 1), memory->iwram); - break; - case REGION_IO: - GBAIOWrite32(gba, address & (SIZE_IO - 1), value); - break; - case REGION_PALETTE_RAM: - STORE_32(value, address & (SIZE_PALETTE_RAM - 1), gba->video.palette); - gba->video.renderer->writePalette(gba->video.renderer, (address & (SIZE_PALETTE_RAM - 1)) + 2, value >> 16); - gba->video.renderer->writePalette(gba->video.renderer, address & (SIZE_PALETTE_RAM - 1), value); - break; - case REGION_VRAM: - if ((address & OFFSET_MASK) < SIZE_VRAM) { - STORE_32(value, address & 0x0001FFFF, gba->video.renderer->vram); - } else if ((address & OFFSET_MASK) < 0x00020000) { - STORE_32(value, address & 0x00017FFF, gba->video.renderer->vram); - } - break; - case REGION_OAM: - STORE_32(value, address & (SIZE_OAM - 1), gba->video.oam.raw); - gba->video.renderer->writeOAM(gba->video.renderer, (address & (SIZE_OAM - 4)) >> 1); - gba->video.renderer->writeOAM(gba->video.renderer, ((address & (SIZE_OAM - 4)) >> 1) + 1); - break; - case REGION_CART0: - GBALog(gba, GBA_LOG_STUB, "Unimplemented memory Store32: 0x%08X", address); - break; - case REGION_CART_SRAM: - case REGION_CART_SRAM_MIRROR: - GBALog(gba, GBA_LOG_STUB, "Unimplemented memory Store32: 0x%08X", address); - break; - default: - GBALog(gba, GBA_LOG_GAME_ERROR, "Bad memory Store32: 0x%08X", address); - break; - } - waitstatesRegion = memory->waitstatesSeq32; - totalWait += 1 + wait; - address += 4; + switch (address >> BASE_OFFSET) { + case REGION_WORKING_RAM: + STM_LOOP_BEGIN; + STORE_WORKING_RAM; + STM_LOOP_END; + break; + case REGION_WORKING_IRAM: + STM_LOOP_BEGIN; + STORE_WORKING_IRAM; + STM_LOOP_END; + break; + case REGION_IO: + STM_LOOP_BEGIN; + STORE_IO; + STM_LOOP_END; + break; + case REGION_PALETTE_RAM: + STM_LOOP_BEGIN; + STORE_PALETTE_RAM; + STM_LOOP_END; + break; + case REGION_VRAM: + STM_LOOP_BEGIN; + STORE_VRAM; + STM_LOOP_END; + break; + case REGION_OAM: + STM_LOOP_BEGIN; + STORE_OAM; + STM_LOOP_END; + break; + case REGION_CART0: + case REGION_CART0_EX: + case REGION_CART1: + case REGION_CART1_EX: + case REGION_CART2: + case REGION_CART2_EX: + STM_LOOP_BEGIN; + STORE_CART; + STM_LOOP_END; + break; + case REGION_CART_SRAM: + case REGION_CART_SRAM_MIRROR: + STM_LOOP_BEGIN; + STORE_SRAM; + STM_LOOP_END; + break; + default: + STM_LOOP_BEGIN; + STORE_BAD; + STM_LOOP_END; + break; } if (cycleCounter) { - *cycleCounter += totalWait; + *cycleCounter += wait; } if (direction & LSM_B) { From f484ef9fa89e214378b01cbfdba09003fc84a8a1 Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Tue, 21 Oct 2014 23:42:24 -0700 Subject: [PATCH 20/34] Fix timing on palette RAM and VRAM 32-bit load/stores --- src/gba/gba-memory.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/gba/gba-memory.c b/src/gba/gba-memory.c index e221d7ec8..bebf0dfcd 100644 --- a/src/gba/gba-memory.c +++ b/src/gba/gba-memory.c @@ -180,8 +180,15 @@ static void GBASetActiveRegion(struct ARMCore* cpu, uint32_t address) { #define LOAD_WORKING_IRAM LOAD_32(value, address & (SIZE_WORKING_IRAM - 1), memory->iwram); #define LOAD_IO value = GBAIORead(gba, (address & (SIZE_IO - 1)) & ~2) | (GBAIORead(gba, (address & (SIZE_IO - 1)) | 2) << 16); -#define LOAD_PALETTE_RAM LOAD_32(value, address & (SIZE_PALETTE_RAM - 1), gba->video.palette); -#define LOAD_VRAM LOAD_32(value, address & 0x0001FFFF, gba->video.renderer->vram); + +#define LOAD_PALETTE_RAM \ + LOAD_32(value, address & (SIZE_PALETTE_RAM - 1), gba->video.palette); \ + ++wait; + +#define LOAD_VRAM \ + LOAD_32(value, address & 0x0001FFFF, gba->video.renderer->vram); \ + ++wait; + #define LOAD_OAM LOAD_32(value, address & (SIZE_OAM - 1), gba->video.oam.raw); #define LOAD_CART \ @@ -424,6 +431,7 @@ int8_t GBALoad8(struct ARMCore* cpu, uint32_t address, int* cycleCounter) { #define STORE_PALETTE_RAM \ STORE_32(value, address & (SIZE_PALETTE_RAM - 1), gba->video.palette); \ gba->video.renderer->writePalette(gba->video.renderer, (address & (SIZE_PALETTE_RAM - 1)) + 2, value >> 16); \ + ++wait; \ gba->video.renderer->writePalette(gba->video.renderer, address & (SIZE_PALETTE_RAM - 1), value); #define STORE_VRAM \ @@ -431,7 +439,8 @@ int8_t GBALoad8(struct ARMCore* cpu, uint32_t address, int* cycleCounter) { STORE_32(value, address & 0x0001FFFF, gba->video.renderer->vram); \ } else if ((address & OFFSET_MASK) < 0x00020000) { \ STORE_32(value, address & 0x00017FFF, gba->video.renderer->vram); \ - } + } \ + ++wait; #define STORE_OAM \ STORE_32(value, address & (SIZE_OAM - 1), gba->video.oam.raw); \ From 0050fb23c30c8b7192b77d3591ea8f5939702836 Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Wed, 22 Oct 2014 00:47:32 -0700 Subject: [PATCH 21/34] Use proper literal sizes for uint64_t --- src/platform/perf-main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/platform/perf-main.c b/src/platform/perf-main.c index 846540098..c2afbd57f 100644 --- a/src/platform/perf-main.c +++ b/src/platform/perf-main.c @@ -76,10 +76,10 @@ int main(int argc, char** argv) { } struct timeval tv; gettimeofday(&tv, 0); - uint64_t start = 1000000 * tv.tv_sec + tv.tv_usec; + uint64_t start = 1000000LL * tv.tv_sec + tv.tv_usec; _GBAPerfRunloop(&context, &frames, perfOpts.csv); gettimeofday(&tv, 0); - uint64_t end = 1000000 * tv.tv_sec + tv.tv_usec; + uint64_t end = 1000000LL * tv.tv_sec + tv.tv_usec; uint64_t duration = end - start; GBAThreadJoin(&context); From eecf70ee5818bd3ee461245b529feb64a7eaba63 Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Wed, 22 Oct 2014 22:00:08 -0700 Subject: [PATCH 22/34] Fix magic number for some games --- src/gba/gba.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gba/gba.c b/src/gba/gba.c index 1654f61ea..098da2a70 100644 --- a/src/gba/gba.c +++ b/src/gba/gba.c @@ -14,8 +14,8 @@ const uint32_t GBA_ARM7TDMI_FREQUENCY = 0x1000000; const uint32_t GBA_COMPONENT_MAGIC = 0x1000000; -static const size_t GBA_ROM_MAGIC_OFFSET = 1; -static const uint8_t GBA_ROM_MAGIC[] = { 0x00, 0x00, 0xEA }; +static const size_t GBA_ROM_MAGIC_OFFSET = 2; +static const uint8_t GBA_ROM_MAGIC[] = { 0x00, 0xEA }; enum { SP_BASE_SYSTEM = 0x03FFFF00, From 5340cb300b36d245949b7c389f11865ff4e122c7 Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Wed, 22 Oct 2014 22:00:19 -0700 Subject: [PATCH 23/34] Add more options to PGO --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index debddd475..ab4a3e6cb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,8 +60,8 @@ set(BUILD_PGO CACHE BOOL "Build with profiling-guided optimization") set(PGO_STAGE_2 CACHE BOOL "Rebuild for profiling-guided optimization after profiles have been generated") set(PGO_DIR "/tmp/gba-pgo/" CACHE PATH "Profiling-guided optimization profiles path") mark_as_advanced(BUILD_PGO PGO_STAGE_2 PGO_DIR) -set(PGO_PRE_FLAGS "-pg -fprofile-generate=${PGO_DIR}") -set(PGO_POST_FLAGS "-fprofile-use=${PGO_DIR}") +set(PGO_PRE_FLAGS "-pg -fprofile-generate=${PGO_DIR} -fprofile-arcs") +set(PGO_POST_FLAGS "-fprofile-use=${PGO_DIR} -fbranch-probabilities") if(BUILD_PGO AND NOT PGO_STAGE_2) set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${PGO_PRE_FLAGS}") From 1619b760e197bb7762f1d5270864231d2770fd47 Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Wed, 22 Oct 2014 22:01:11 -0700 Subject: [PATCH 24/34] Use better popcount than the GCC one... --- src/gba/gba-memory.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/gba/gba-memory.c b/src/gba/gba-memory.c index bebf0dfcd..1810c1372 100644 --- a/src/gba/gba-memory.c +++ b/src/gba/gba-memory.c @@ -8,6 +8,8 @@ #include "hle-bios.h" #include "util/memory.h" +static uint32_t _popcount32(unsigned bits); + static void GBASetActiveRegion(struct ARMCore* cpu, uint32_t region); static void GBAMemoryServiceDMA(struct GBA* gba, int number, struct GBADMA* info); @@ -650,7 +652,7 @@ uint32_t GBALoadMultiple(struct ARMCore* cpu, uint32_t address, int mask, enum L int popcount = 0; if (direction & LSM_D) { offset = -4; - popcount = __builtin_popcount(mask); + popcount = _popcount32(mask); address -= (popcount << 2) - 4; } @@ -754,7 +756,7 @@ uint32_t GBAStoreMultiple(struct ARMCore* cpu, uint32_t address, int mask, enum int popcount = 0; if (direction & LSM_D) { offset = -4; - popcount = __builtin_popcount(mask); + popcount = _popcount32(mask); address -= (popcount << 2) - 4; } @@ -1143,3 +1145,9 @@ void GBAMemoryDeserialize(struct GBAMemory* memory, struct GBASerializedState* s memcpy(memory->wram, state->wram, SIZE_WORKING_RAM); memcpy(memory->iwram, state->iwram, SIZE_WORKING_IRAM); } + +uint32_t _popcount32(unsigned bits) { + bits = bits - ((bits >> 1) & 0x55555555); + bits = (bits & 0x33333333) + ((bits >> 2) & 0x33333333); + return (((bits + (bits >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24; +} From 043bb75a6b5eb30e11d3e3fe109e7d69a33a923c Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Wed, 22 Oct 2014 22:01:48 -0700 Subject: [PATCH 25/34] Use cleaner mechanism for checking the register list bits --- src/gba/gba-memory.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gba/gba-memory.c b/src/gba/gba-memory.c index 1810c1372..1e384b8ee 100644 --- a/src/gba/gba-memory.c +++ b/src/gba/gba-memory.c @@ -629,7 +629,7 @@ void GBAStore8(struct ARMCore* cpu, uint32_t address, int8_t value, int* cycleCo #define LDM_LOOP_BEGIN \ for (i = 0; i < 16; ++i) { \ - if (~mask & (1 << i)) { \ + if (!(mask & (1 << i))) { \ continue; \ } @@ -733,7 +733,7 @@ uint32_t GBALoadMultiple(struct ARMCore* cpu, uint32_t address, int mask, enum L #define STM_LOOP_BEGIN \ for (i = 0; i < 16; ++i) { \ - if (~mask & (1 << i)) { \ + if (!(mask & (1 << i))) { \ continue; \ } \ value = cpu->gprs[i]; From d3b78b2fd2b874fa368524c13f0a667a7db244a5 Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Thu, 23 Oct 2014 01:41:52 -0700 Subject: [PATCH 26/34] Log level for SWI --- src/gba/gba-bios.c | 3 ++- src/gba/gba.h | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/gba/gba-bios.c b/src/gba/gba-bios.c index de33845d1..bfd39ced5 100644 --- a/src/gba/gba-bios.c +++ b/src/gba/gba-bios.c @@ -116,7 +116,8 @@ static void _Div(struct ARMCore* cpu, int32_t num, int32_t denom) { void GBASwi16(struct ARMCore* cpu, int immediate) { struct GBA* gba = (struct GBA*) cpu->master; - GBALog(gba, GBA_LOG_DEBUG, "SWI: %02x", immediate); + GBALog(gba, GBA_LOG_SWI, "SWI: %02X r0: %08X r1: %08X r2: %08X r3: %08X", + immediate, cpu->gprs[0], cpu->gprs[1], cpu->gprs[2], cpu->gprs[3]); if (gba->memory.fullBios) { ARMRaiseSWI(cpu); diff --git a/src/gba/gba.h b/src/gba/gba.h index 281667974..9ae2b0771 100644 --- a/src/gba/gba.h +++ b/src/gba/gba.h @@ -43,7 +43,10 @@ enum GBALogLevel { GBA_LOG_DEBUG = 0x10, GBA_LOG_STUB = 0x20, - GBA_LOG_GAME_ERROR = 0x100 + GBA_LOG_GAME_ERROR = 0x100, + GBA_LOG_SWI = 0x200, + + GBA_LOG_ALL = 0x33F }; enum GBAKey { From 768316a8d39665065507a29ccb2ea06ac4a26823 Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Thu, 23 Oct 2014 01:45:16 -0700 Subject: [PATCH 27/34] Fix two small consistency issues --- src/debugger/cli-debugger.c | 2 +- src/gba/gba-bios.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/debugger/cli-debugger.c b/src/debugger/cli-debugger.c index e1ad3f2ef..80b884890 100644 --- a/src/debugger/cli-debugger.c +++ b/src/debugger/cli-debugger.c @@ -201,7 +201,7 @@ static void _printBin(struct CLIDebugger* debugger, struct DebugVector* dv) { printf(" 0b"); int i = 32; while (i--) { - printf("%u", (dv->intValue >> i) & 1); + printf(" %u", (dv->intValue >> i) & 1); } } printf("\n"); diff --git a/src/gba/gba-bios.c b/src/gba/gba-bios.c index bfd39ced5..d5985a7f1 100644 --- a/src/gba/gba-bios.c +++ b/src/gba/gba-bios.c @@ -227,7 +227,7 @@ void GBASwi16(struct ARMCore* cpu, int immediate) { _MidiKey2Freq(gba); break; default: - GBALog(gba, GBA_LOG_STUB, "Stub software interrupt: %02x", immediate); + GBALog(gba, GBA_LOG_STUB, "Stub software interrupt: %02X", immediate); } } From 61915939b5953db7a04bb9ee254ff356566ebcb4 Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Thu, 23 Oct 2014 01:45:32 -0700 Subject: [PATCH 28/34] Log division by zero --- src/gba/gba-bios.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/gba/gba-bios.c b/src/gba/gba-bios.c index d5985a7f1..8587f7545 100644 --- a/src/gba/gba-bios.c +++ b/src/gba/gba-bios.c @@ -99,13 +99,15 @@ static void _MidiKey2Freq(struct GBA* gba) { cpu->gprs[0] = key / powf(2, (180.f - cpu->gprs[1] - cpu->gprs[2] / 256.f) / 12.f); } -static void _Div(struct ARMCore* cpu, int32_t num, int32_t denom) { +static void _Div(struct GBA* gba, int32_t num, int32_t denom) { + struct ARMCore* cpu = gba->cpu; if (denom != 0) { div_t result = div(num, denom); cpu->gprs[0] = result.quot; cpu->gprs[1] = result.rem; cpu->gprs[3] = abs(result.quot); } else { + GBALog(gba, GBA_LOG_GAME_ERROR, "Attempting to divide %i by zero!", num); // If abs(num) > 1, this should hang, but that would be painful to // emulate in HLE, and no game will get into a state where it hangs... cpu->gprs[0] = (num < 0) ? -1 : 1; @@ -138,10 +140,10 @@ void GBASwi16(struct ARMCore* cpu, int immediate) { ARMRaiseSWI(cpu); break; case 0x6: - _Div(cpu, cpu->gprs[0], cpu->gprs[1]); + _Div(gba, cpu->gprs[0], cpu->gprs[1]); break; case 0x7: - _Div(cpu, cpu->gprs[1], cpu->gprs[0]); + _Div(gba, cpu->gprs[1], cpu->gprs[0]); break; case 0x8: cpu->gprs[0] = sqrt(cpu->gprs[0]); From 344364695e4900668c085de57c02f2306f52d85c Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Thu, 23 Oct 2014 01:50:45 -0700 Subject: [PATCH 29/34] Fix parsing of numbers starting with 0 (and disregarding octal) --- src/debugger/parser.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/debugger/parser.c b/src/debugger/parser.c index 840819e05..a9b1949a0 100644 --- a/src/debugger/parser.c +++ b/src/debugger/parser.c @@ -253,6 +253,21 @@ size_t lexExpression(struct LexVector* lv, const char* string, size_t length) { lv->token.uintValue = next; state = LEX_EXPECT_OPERATOR; break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + next = token - '0'; + state = LEX_EXPECT_DECIMAL; + break; + default: + state = LEX_ERROR; } break; case LEX_EXPECT_OPERATOR: From a265cf45a92bb76e21d386c0974deb137267ec2e Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Fri, 24 Oct 2014 22:01:13 -0700 Subject: [PATCH 30/34] Fix build on Windows --- src/util/vfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/vfs.c b/src/util/vfs.c index 137d68095..665cd9cd7 100644 --- a/src/util/vfs.c +++ b/src/util/vfs.c @@ -136,7 +136,7 @@ static void* _vfdMap(struct VFile* vf, size_t size, int flags) { size = fileSize; } vfd->hMap = CreateFileMapping((HANDLE) _get_osfhandle(vfd->fd), 0, createFlags, 0, size & 0xFFFFFFFF, 0); - return MapViewOfFile(hMap, mapFiles, 0, 0, size); + return MapViewOfFile(vfd->hMap, mapFiles, 0, 0, size); } static void _vfdUnmap(struct VFile* vf, void* memory, size_t size) { From c7a1894a99aa7b96d1bef16110be2572002b5386 Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Sat, 25 Oct 2014 01:43:05 -0700 Subject: [PATCH 31/34] Fix pkg-config overwriting find_package results --- CMakeLists.txt | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ab4a3e6cb..877f29d2f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,12 +31,16 @@ function(find_feature FEATURE_NAME FEATURE_REQUIRES) return() endif() foreach(REQUIRE ${FEATURE_REQUIRES}) - find_package(${REQUIRE} QUIET) - pkg_search_module(${REQUIRE} ${REQUIRE}) - if (NOT ${REQUIRE}_FOUND) - message(WARNING "Requested module ${REQUIRE} missing for feature ${FEATURE_NAME}. Feature disabled.") - set(${FEATURE_NAME} OFF PARENT_SCOPE) - return() + if(NOT ${REQUIRE}_FOUND) + find_package(${REQUIRE} QUIET) + if(NOT ${REQUIRE}_FOUND) + pkg_search_module(${REQUIRE} ${REQUIRE}) + if (NOT ${REQUIRE}_FOUND) + message(WARNING "Requested module ${REQUIRE} missing for feature ${FEATURE_NAME}. Feature disabled.") + set(${FEATURE_NAME} OFF PARENT_SCOPE) + return() + endif() + endif() endif() string(TOUPPER ${REQUIRE} UREQUIRE) set(${UREQUIRE}_CFLAGS_OTHER ${${REQUIRE}_CFLAGS_OTHER} PARENT_SCOPE) @@ -78,7 +82,7 @@ add_definitions(-DBINARY_NAME="${BINARY_NAME}" -DPROJECT_NAME="${PROJECT_NAME}" # Feature dependencies find_feature(USE_CLI_DEBUGGER "libedit") find_feature(USE_FFMPEG "libavcodec;libavformat;libavutil") -find_feature(USE_PNG "PNG;ZLIB") +find_feature(USE_PNG "ZLIB;PNG") find_feature(USE_LIBZIP "libzip") include(CheckFunctionExists) From 5450bd8d59dbd475f765c87bf428a2adcaeece94 Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Sat, 25 Oct 2014 01:45:42 -0700 Subject: [PATCH 32/34] Force files into binary mode on Windows --- src/util/vfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/util/vfs.c b/src/util/vfs.c index 665cd9cd7..14b2af2fc 100644 --- a/src/util/vfs.c +++ b/src/util/vfs.c @@ -42,6 +42,9 @@ struct VFile* VFileOpen(const char* path, int flags) { if (!path) { return 0; } +#ifdef _WIN32 + flags |= O_BINARY; +#endif int fd = open(path, flags, 0666); return VFileFromFD(fd); } From 86003496b0af95e897ab4ceea86e8d32efe01d75 Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Fri, 24 Oct 2014 01:09:46 -0700 Subject: [PATCH 33/34] Handle out-of-bounds SRAM and ROM reads --- src/gba/gba-memory.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/gba/gba-memory.c b/src/gba/gba-memory.c index 1e384b8ee..2b817ab50 100644 --- a/src/gba/gba-memory.c +++ b/src/gba/gba-memory.c @@ -198,7 +198,9 @@ static void GBASetActiveRegion(struct ARMCore* cpu, uint32_t address) { if ((address & (SIZE_CART0 - 1)) < memory->romSize) { \ LOAD_32(value, address & (SIZE_CART0 - 1), memory->rom); \ } else { \ + GBALog(gba, GBA_LOG_GAME_ERROR, "Out of bounds ROM Load32: 0x%08X", address); \ value = (address >> 1) & 0xFFFF; \ + value |= value << 16; \ } #define LOAD_SRAM \ @@ -315,6 +317,9 @@ int16_t GBALoad16(struct ARMCore* cpu, uint32_t address, int* cycleCounter) { wait = memory->waitstatesNonseq16[address >> BASE_OFFSET]; if ((address & (SIZE_CART0 - 1)) < memory->romSize) { LOAD_16(value, address & (SIZE_CART0 - 1), memory->rom); + } else { + GBALog(gba, GBA_LOG_GAME_ERROR, "Out of bounds ROM Load16: 0x%08X", address); + value = (address >> 1) & 0xFFFF; \ } break; case REGION_CART2_EX: @@ -323,6 +328,9 @@ int16_t GBALoad16(struct ARMCore* cpu, uint32_t address, int* cycleCounter) { value = GBASavedataReadEEPROM(&memory->savedata); } else if ((address & (SIZE_CART0 - 1)) < memory->romSize) { LOAD_16(value, address & (SIZE_CART0 - 1), memory->rom); + } else { + GBALog(gba, GBA_LOG_GAME_ERROR, "Out of bounds ROM Load16: 0x%08X", address); + value = (address >> 1) & 0xFFFF; \ } break; case REGION_CART_SRAM: @@ -393,6 +401,9 @@ int8_t GBALoad8(struct ARMCore* cpu, uint32_t address, int* cycleCounter) { wait = memory->waitstatesNonseq16[address >> BASE_OFFSET]; if ((address & (SIZE_CART0 - 1)) < memory->romSize) { value = ((int8_t*) memory->rom)[address & (SIZE_CART0 - 1)]; + } else { + GBALog(gba, GBA_LOG_GAME_ERROR, "Out of bounds ROM Load8: 0x%08X", address); + value = (address >> 1) & 0xFF; \ } break; case REGION_CART_SRAM: @@ -406,6 +417,9 @@ int8_t GBALoad8(struct ARMCore* cpu, uint32_t address, int* cycleCounter) { value = memory->savedata.data[address & (SIZE_CART_SRAM - 1)]; } else if (memory->savedata.type == SAVEDATA_FLASH512 || memory->savedata.type == SAVEDATA_FLASH1M) { value = GBASavedataReadFlash(&memory->savedata, address); + } else { + GBALog(gba, GBA_LOG_GAME_ERROR, "Reading from non-existent SRAM: 0x%08X", address); + value = 7; } break; default: @@ -614,6 +628,8 @@ void GBAStore8(struct ARMCore* cpu, uint32_t address, int8_t value, int* cycleCo GBASavedataWriteFlash(&memory->savedata, address, value); } else if (memory->savedata.type == SAVEDATA_SRAM) { memory->savedata.data[address & (SIZE_CART_SRAM - 1)] = value; + } else { + GBALog(gba, GBA_LOG_GAME_ERROR, "Writing to non-existent SRAM: 0x%08X", address); } wait = memory->waitstatesNonseq16[REGION_CART_SRAM]; break; From 732e29e3c643b35fa27ba9df9db17303ca28018c Mon Sep 17 00:00:00 2001 From: Jeffrey Pfau Date: Fri, 24 Oct 2014 01:14:08 -0700 Subject: [PATCH 34/34] Fix argument number errors --- src/debugger/cli-debugger.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/debugger/cli-debugger.c b/src/debugger/cli-debugger.c index 80b884890..316e4acce 100644 --- a/src/debugger/cli-debugger.c +++ b/src/debugger/cli-debugger.c @@ -518,11 +518,10 @@ static bool _parse(struct CLIDebugger* debugger, const char* line, size_t count) _DVFree(dv); return false; } - } else { - printf("Wrong number of arguments"); } } else if (firstSpace) { - printf("Wrong number of arguments"); + printf("Wrong number of arguments\n"); + return false; } _debuggerCommands[i].command(debugger, dv); _DVFree(dv);