From 86f2be7260f9a9b51efd7c795c28cdcfda775742 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Sun, 14 Jul 2019 19:24:00 +0200 Subject: [PATCH] jit: add compile option --- CMakeLists.txt | 36 +++++++++++++++++++ src/ARM.cpp | 13 +++---- src/ARM.h | 6 ++++ src/ARMJIT_x64/ARMJIT_Compiler.cpp | 55 ++++++++++++++++-------------- src/ARMJIT_x64/ARMJIT_Compiler.h | 1 - src/CMakeLists.txt | 27 ++++++++------- src/CP15.cpp | 12 +++++-- src/Config.cpp | 4 +++ src/Config.h | 2 ++ src/NDS.cpp | 26 ++++++++++++++ src/dolphin/CodeBlock.h | 3 -- 11 files changed, 134 insertions(+), 51 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 885f0dd6..1e53c607 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,6 +14,42 @@ if (NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) endif() +include(CheckSymbolExists) +function(detect_architecture symbol arch) + if (NOT DEFINED ARCHITECTURE) + set(CMAKE_REQUIRED_QUIET 1) + check_symbol_exists("${symbol}" "" ARCHITECTURE_${arch}) + unset(CMAKE_REQUIRED_QUIET) + + # The output variable needs to be unique across invocations otherwise + # CMake's crazy scope rules will keep it defined + if (ARCHITECTURE_${arch}) + set(ARCHITECTURE "${arch}" PARENT_SCOPE) + set(ARCHITECTURE_${arch} 1 PARENT_SCOPE) + add_definitions(-DARCHITECTURE_${arch}=1) + endif() + endif() +endfunction() + +detect_architecture("__x86_64__" x86_64) +detect_architecture("__i386__" x86) +detect_architecture("__arm__" ARM) +detect_architecture("__aarch64__" ARM64) + +if (ARCHITECTURE STREQUAL x86_64) + option(ENABLE_JIT "Enable x64 JIT recompiler" ON) +endif() + +if (ENABLE_JIT) + add_definitions(-DJIT_ENABLED) +endif() + +if (CMAKE_BUILD_TYPE STREQUAL Release) + option(ENABLE_LTO "Enable link-time optimization" ON) +else() + option(ENABLE_LTO "Enable link-time optimization" OFF) +endif() + if (CMAKE_BUILD_TYPE STREQUAL Debug) add_compile_options(-Og) endif() diff --git a/src/ARM.cpp b/src/ARM.cpp index 1cd4bb27..bfe18902 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -81,15 +81,8 @@ ARMv4::ARMv4() : ARM(1) // } -namespace ARMJIT {extern int instructionPopularityARM[ARMInstrInfo::ak_Count];} - void ARM::Reset() { - FILE* blabla = fopen("fhhg", "w"); - for (int i = 0; i < ARMInstrInfo::ak_Count; i++) - fprintf(blabla, "%d -> %dx\n", i, ARMJIT::instructionPopularityARM[i]); - fclose(blabla); - Cycles = 0; Halted = 0; @@ -591,6 +584,7 @@ void ARMv5::Execute() Halted = 0; } +#ifdef JIT_ENABLED void ARMv5::ExecuteJIT() { if (Halted) @@ -642,6 +636,7 @@ void ARMv5::ExecuteJIT() if (Halted == 2) Halted = 0; } +#endif void ARMv4::Execute() { @@ -720,6 +715,7 @@ void ARMv4::Execute() Halted = 0; } +#ifdef JIT_ENABLED void ARMv4::ExecuteJIT() { if (Halted) @@ -771,4 +767,5 @@ void ARMv4::ExecuteJIT() if (Halted == 2) Halted = 0; -} \ No newline at end of file +} +#endif \ No newline at end of file diff --git a/src/ARM.h b/src/ARM.h index 3b01ef33..c3e7f44d 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -52,7 +52,9 @@ public: } virtual void Execute() = 0; +#ifdef ENABLE_JIT virtual void ExecuteJIT() = 0; +#endif bool CheckCondition(u32 code) { @@ -160,7 +162,9 @@ public: void DataAbort(); void Execute(); +#ifdef JIT_ENABLED void ExecuteJIT(); +#endif // all code accesses are forced nonseq 32bit u32 CodeRead32(u32 addr, bool branch); @@ -283,7 +287,9 @@ public: void JumpTo(u32 addr, bool restorecpsr = false); void Execute(); +#ifdef JIT_ENABLED void ExecuteJIT(); +#endif u16 CodeRead16(u32 addr) { diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp index fe238595..18cb27e9 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp +++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp @@ -4,7 +4,10 @@ #include +#include "../dolphin/CommonFuncs.h" + #ifdef _WIN32 +#include #else #include #include @@ -32,8 +35,6 @@ const int RegisterCache::NativeRegsAvailable = #endif ; -int instructionPopularityARM[ARMInstrInfo::ak_Count]; - /* We'll repurpose this .bss memory @@ -42,29 +43,33 @@ u8 CodeMemory[1024 * 1024 * 32]; Compiler::Compiler() { -#ifdef _WIN32 -#else - u64 pagesize = sysconf(_SC_PAGE_SIZE); -#endif + { + #ifdef _WIN32 + SYSTEM_INFO sysInfo; + GetSystemInfo(&sysInfo); - u8* pageAligned = (u8*)(((u64)CodeMemory & ~(pagesize - 1)) + pagesize); - u64 alignedSize = (((u64)CodeMemory + sizeof(CodeMemory)) & ~(pagesize - 1)) - (u64)pageAligned; + u64 pageSize = (u64)sysInfo.dwPageSize; + #else + u64 pageSize = sysconf(_SC_PAGE_SIZE); + #endif -#ifdef _WIN32 -#else - mprotect(pageAligned, alignedSize, PROT_EXEC | PROT_READ | PROT_WRITE); -#endif + u8* pageAligned = (u8*)(((u64)CodeMemory & ~(pageSize - 1)) + pageSize); + u64 alignedSize = (((u64)CodeMemory + sizeof(CodeMemory)) & ~(pageSize - 1)) - (u64)pageAligned; - region = pageAligned; - region_size = alignedSize; - total_region_size = region_size; + #ifdef _WIN32 + DWORD dummy; + VirtualProtect(pageAligned, alignedSize, PAGE_EXECUTE_READWRITE, &dummy); + #else + mprotect(pageAligned, alignedSize, PROT_EXEC | PROT_READ | PROT_WRITE); + #endif + + region = pageAligned; + region_size = alignedSize; + total_region_size = region_size; + } ClearCodeSpace(); - SetCodePtr(pageAligned); - - memset(instructionPopularityARM, 0, sizeof(instructionPopularityARM)); - for (int i = 0; i < 3; i++) { for (int j = 0; j < 2; j++) @@ -118,7 +123,7 @@ Compiler::Compiler() SetJumpTarget(und); MOV(32, R(ABI_PARAM3), MComplex(RCPU, ABI_PARAM2, SCALE_4, offsetof(ARM, R_UND))); RET(); - } + } { // RSCRATCH mode // ABI_PARAM2 reg n @@ -163,7 +168,10 @@ Compiler::Compiler() RET(); } - ResetStart = (void*)GetWritableCodePtr(); + // move the region forward to prevent overwriting the generated functions + region_size -= GetWritableCodePtr() - region; + total_region_size = region_size; + region = GetWritableCodePtr(); } void Compiler::LoadCPSR() @@ -338,7 +346,7 @@ const Compiler::CompileFunc T_Comp[ARMInstrInfo::tk_Count] = { void Compiler::Reset() { - SetCodePtr((u8*)ResetStart); + ClearCodeSpace(); } CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrsCount) @@ -375,9 +383,6 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs ? T_Comp[CurInstr.Info.Kind] : A_Comp[CurInstr.Info.Kind]; - if (!Thumb) - instructionPopularityARM[CurInstr.Info.Kind] += comp == NULL; - if (comp == NULL || i == instrsCount - 1) { MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(R15)); diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.h b/src/ARMJIT_x64/ARMJIT_Compiler.h index cd58012b..0ce7d8d2 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.h +++ b/src/ARMJIT_x64/ARMJIT_Compiler.h @@ -132,7 +132,6 @@ public: return Gen::R(RegCache.Mapping[reg]); } - void* ResetStart; void* MemoryFuncs9[3][2]; void* MemoryFuncs7[3][2][2]; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 75fa42c8..bfc0ad97 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -49,20 +49,23 @@ add_library(core STATIC WifiAP.cpp tiny-AES-c/aes.c - - ARMJIT.cpp - ARMJIT_x64/ARMJIT_Compiler.cpp - ARMJIT_x64/ARMJIT_ALU.cpp - ARMJIT_x64/ARMJIT_LoadStore.cpp - ARMJIT_x64/ARMJIT_Branch.cpp - - dolphin/CommonFuncs.cpp - dolphin/x64ABI.cpp - dolphin/x64CPUDetect.cpp - dolphin/x64Emitter.cpp - dolphin/MemoryUtil.cpp ) +if (ENABLE_JIT) + target_sources(core PRIVATE + ARMJIT.cpp + ARMJIT_x64/ARMJIT_Compiler.cpp + ARMJIT_x64/ARMJIT_ALU.cpp + ARMJIT_x64/ARMJIT_LoadStore.cpp + ARMJIT_x64/ARMJIT_Branch.cpp + + dolphin/CommonFuncs.cpp + dolphin/x64ABI.cpp + dolphin/x64CPUDetect.cpp + dolphin/x64Emitter.cpp + ) +endif() + if (WIN32) target_link_libraries(core ole32 comctl32 ws2_32 opengl32) else() diff --git a/src/CP15.cpp b/src/CP15.cpp index 3e1c08b1..5b5f935a 100644 --- a/src/CP15.cpp +++ b/src/CP15.cpp @@ -813,7 +813,9 @@ void ARMv5::DataWrite8(u32 addr, u8 val) { DataCycles = 1; *(u8*)&ITCM[addr & 0x7FFF] = val; +#ifdef JIT_ENABLED ARMJIT::cache.ARM9_ITCM[(addr & 0x7FFF) >> 1] = NULL; +#endif return; } if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize)) @@ -835,7 +837,9 @@ void ARMv5::DataWrite16(u32 addr, u16 val) { DataCycles = 1; *(u16*)&ITCM[addr & 0x7FFF] = val; +#ifdef JIT_ENABLED ARMJIT::cache.ARM9_ITCM[(addr & 0x7FFF) >> 1] = NULL; +#endif return; } if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize)) @@ -857,8 +861,10 @@ void ARMv5::DataWrite32(u32 addr, u32 val) { DataCycles = 1; *(u32*)&ITCM[addr & 0x7FFF] = val; +#ifdef JIT_ENABLED ARMJIT::cache.ARM9_ITCM[(addr & 0x7FFF) >> 1] = NULL; ARMJIT::cache.ARM9_ITCM[((addr + 2) & 0x7FFF) >> 1] = NULL; +#endif return; } if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize)) @@ -880,8 +886,10 @@ void ARMv5::DataWrite32S(u32 addr, u32 val) { DataCycles += 1; *(u32*)&ITCM[addr & 0x7FFF] = val; - ARMJIT::cache.ARM9_ITCM[(addr & 0x7FFF) / 2] = NULL; - ARMJIT::cache.ARM9_ITCM[(addr & 0x7FFF) / 2 + 1] = NULL; +#ifdef JIT_ENABLED + ARMJIT::cache.ARM9_ITCM[(addr & 0x7FFF) >> 1] = NULL; + ARMJIT::cache.ARM9_ITCM[((addr & 0x7FFF) >> 1) + 1] = NULL; +#endif return; } if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize)) diff --git a/src/Config.cpp b/src/Config.cpp index 5c0892ad..33bab75f 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -37,8 +37,10 @@ char DSiBIOS7Path[1024]; char DSiFirmwarePath[1024]; char DSiNANDPath[1024]; +#ifdef JIT_ENABLED bool JIT_Enable = false; int JIT_MaxBlockSize = 12; +#endif ConfigEntry ConfigFile[] = { @@ -51,8 +53,10 @@ ConfigEntry ConfigFile[] = {"DSiFirmwarePath", 1, DSiFirmwarePath, 0, "", 1023}, {"DSiNANDPath", 1, DSiNANDPath, 0, "", 1023}, +#ifdef JIT_ENABLED {"JIT_Enable", 0, &JIT_Enable, 0, NULL, 0}, {"JIT_MaxBlockSize", 0, &JIT_MaxBlockSize, 10, NULL, 0}, +#endif {"", -1, NULL, 0, NULL, 0} }; diff --git a/src/Config.h b/src/Config.h index 9dda157e..9296335b 100644 --- a/src/Config.h +++ b/src/Config.h @@ -51,8 +51,10 @@ extern char DSiBIOS7Path[1024]; extern char DSiFirmwarePath[1024]; extern char DSiNANDPath[1024]; +#ifdef JIT_ENABLED extern bool JIT_Enable; extern int JIT_MaxBlockSize; +#endif } diff --git a/src/NDS.cpp b/src/NDS.cpp index cb85d13e..7636a07b 100644 --- a/src/NDS.cpp +++ b/src/NDS.cpp @@ -169,7 +169,9 @@ bool Init() ARM9 = new ARMv5(); ARM7 = new ARMv4(); +#ifdef JIT_ENABLED ARMJIT::Init(); +#endif DMAs[0] = new DMA(0, 0); DMAs[1] = new DMA(0, 1); @@ -203,7 +205,9 @@ void DeInit() delete ARM9; delete ARM7; +#ifdef JIT_ENABLED ARMJIT::DeInit(); +#endif for (int i = 0; i < 8; i++) delete DMAs[i]; @@ -566,7 +570,9 @@ void Reset() KeyCnt = 0; RCnt = 0; +#ifdef JIT_ENABLED ARMJIT::InvalidateBlockCache(); +#endif NDSCart::Reset(); GBACart::Reset(); @@ -794,10 +800,12 @@ bool DoSavestate(Savestate* file) GPU::SetPowerCnt(PowerControl9); } +#ifdef JIT_ENABLED if (!file->Saving) { ARMJIT::InvalidateBlockCache(); } +#endif return true; } @@ -923,9 +931,11 @@ u32 RunFrame() } else { +#ifdef JIT_ENABLED if (EnableJIT) ARM9->ExecuteJIT(); else +#endif ARM9->Execute(); } @@ -949,9 +959,11 @@ u32 RunFrame() } else { +#ifdef JIT_ENABLED if (EnableJIT) ARM7->ExecuteJIT(); else +#endif ARM7->Execute(); } @@ -984,9 +996,11 @@ u32 RunFrame() u32 RunFrame() { +#ifdef JIT_ENABLED if (Config::JIT_Enable) return RunFrame(); else +#endif return RunFrame(); } @@ -1998,7 +2012,9 @@ u32 ARM9Read32(u32 addr) void ARM9Write8(u32 addr, u8 val) { +#ifdef JIT_ENABLED ARMJIT::Invalidate16(0, addr); +#endif switch (addr & 0xFF000000) { @@ -2050,7 +2066,9 @@ void ARM9Write8(u32 addr, u8 val) void ARM9Write16(u32 addr, u16 val) { +#ifdef JIT_ENABLED ARMJIT::Invalidate16(0, addr); +#endif switch (addr & 0xFF000000) { @@ -2118,7 +2136,9 @@ void ARM9Write16(u32 addr, u16 val) void ARM9Write32(u32 addr, u32 val) { +#ifdef JIT_ENABLED ARMJIT::Invalidate32(0, addr); +#endif switch (addr & 0xFF000000) { @@ -2414,7 +2434,9 @@ u32 ARM7Read32(u32 addr) void ARM7Write8(u32 addr, u8 val) { +#ifdef JIT_ENABLED ARMJIT::Invalidate16(1, addr); +#endif switch (addr & 0xFF800000) { @@ -2475,7 +2497,9 @@ void ARM7Write8(u32 addr, u8 val) void ARM7Write16(u32 addr, u16 val) { +#ifdef JIT_ENABLED ARMJIT::Invalidate16(1, addr); +#endif switch (addr & 0xFF800000) { @@ -2546,7 +2570,9 @@ void ARM7Write16(u32 addr, u16 val) void ARM7Write32(u32 addr, u32 val) { +#ifdef JIT_ENABLED ARMJIT::Invalidate32(1, addr); +#endif switch (addr & 0xFF800000) { diff --git a/src/dolphin/CodeBlock.h b/src/dolphin/CodeBlock.h index 31a8d931..e71cf6d5 100644 --- a/src/dolphin/CodeBlock.h +++ b/src/dolphin/CodeBlock.h @@ -9,7 +9,6 @@ #include "Assert.h" #include "../types.h" -#include "MemoryUtil.h" namespace Common { @@ -41,8 +40,6 @@ public: CodeBlock() = default; virtual ~CodeBlock() { - if (region) - FreeCodeSpace(); } CodeBlock(const CodeBlock&) = delete; CodeBlock& operator=(const CodeBlock&) = delete;