From f12a70f4b7e23213ff4230d855b843f2547479fd Mon Sep 17 00:00:00 2001 From: rogerman Date: Sat, 23 Apr 2022 20:00:30 -0700 Subject: [PATCH] Add a brand new JIT CPU emulation engine for ARM! Special thanks to user @byte4byte from byte4byte.com for making this new feature! - If you have an ARM processor, expect anywhere between 10% to 50% improvement to CPU emulation performance for most games. - But it isn't quite for prime time just yet. There is a crashing bug related to munmap() at emit_core.cpp:93 that causes the app to crash when resetting the emulator after a game has already been run. - The Cocoa port now enables the new ARM JIT engine for Macs running Apple Silicon processors, but only for dev+ builds. This is due to the crashing bug noted above. --- .../frontend/cocoa/ClientExecutionControl.cpp | 2 +- .../project.pbxproj | 22 + desmume/src/frontend/cocoa/DeSmuME_Prefix.pch | 2 +- .../translations/English.lproj/MainMenu.xib | 356 +- desmume/src/utils/arm_jit/arm64_emit.cpp | 1880 +++++++ desmume/src/utils/arm_jit/arm_emit.cpp | 1310 +++++ desmume/src/utils/arm_jit/arm_jit_arm.cpp | 4370 +++++++++++++++++ desmume/src/utils/arm_jit/emit_core.cpp | 208 + 8 files changed, 8051 insertions(+), 99 deletions(-) create mode 100755 desmume/src/utils/arm_jit/arm64_emit.cpp create mode 100755 desmume/src/utils/arm_jit/arm_emit.cpp create mode 100755 desmume/src/utils/arm_jit/arm_jit_arm.cpp create mode 100755 desmume/src/utils/arm_jit/emit_core.cpp diff --git a/desmume/src/frontend/cocoa/ClientExecutionControl.cpp b/desmume/src/frontend/cocoa/ClientExecutionControl.cpp index e80e92d49..fd88d0292 100644 --- a/desmume/src/frontend/cocoa/ClientExecutionControl.cpp +++ b/desmume/src/frontend/cocoa/ClientExecutionControl.cpp @@ -197,7 +197,7 @@ const char* ClientExecutionControl::GetCPUEmulationEngineName() void ClientExecutionControl::SetCPUEmulationEngineByID(CPUEmulationEngineID engineID) { -#if !defined(__i386__) && !defined(__x86_64__) +#if !defined(__i386__) && !defined(__x86_64__) && !defined(__aarch64__) engineID = CPUEmulationEngineID_Interpreter; #endif diff --git a/desmume/src/frontend/cocoa/DeSmuME (Latest).xcodeproj/project.pbxproj b/desmume/src/frontend/cocoa/DeSmuME (Latest).xcodeproj/project.pbxproj index 81f7309ea..20ab8b62a 100755 --- a/desmume/src/frontend/cocoa/DeSmuME (Latest).xcodeproj/project.pbxproj +++ b/desmume/src/frontend/cocoa/DeSmuME (Latest).xcodeproj/project.pbxproj @@ -1376,6 +1376,9 @@ AB405690169F5DCC0016AC3E /* x86operand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB405674169F5DCC0016AC3E /* x86operand.cpp */; }; AB405693169F5DCC0016AC3E /* x86util.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB405676169F5DCC0016AC3E /* x86util.cpp */; }; AB407F371A6206FB00313213 /* xbrz.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB47B52C18A3F722009A42AF /* xbrz.cpp */; }; + AB46E54F2814F42500A4E3D6 /* arm_jit_arm.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB46E54D2814F40800A4E3D6 /* arm_jit_arm.cpp */; }; + AB46E5502814F42700A4E3D6 /* arm_jit_arm.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB46E54D2814F40800A4E3D6 /* arm_jit_arm.cpp */; }; + AB46E5522814F43300A4E3D6 /* arm_jit_arm.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB46E54D2814F40800A4E3D6 /* arm_jit_arm.cpp */; }; AB47B52F18A45C35009A42AF /* xbrz.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AB47B52C18A3F722009A42AF /* xbrz.cpp */; }; AB4979BD27F2E97A00E8A244 /* KeyNames.plist in Resources */ = {isa = PBXBuildFile; fileRef = AB02475B13886BF300E9F9AB /* KeyNames.plist */; }; AB4979BE27F2E97A00E8A244 /* Icon_MicrophoneGray_256x256.png in Resources */ = {isa = PBXBuildFile; fileRef = ABB0FBD81A9FD0260060C55A /* Icon_MicrophoneGray_256x256.png */; }; @@ -3597,6 +3600,10 @@ AB405675169F5DCC0016AC3E /* x86operand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = x86operand.h; sourceTree = ""; }; AB405676169F5DCC0016AC3E /* x86util.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = x86util.cpp; sourceTree = ""; }; AB405677169F5DCC0016AC3E /* x86util.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = x86util.h; sourceTree = ""; }; + AB46E54B2814F40800A4E3D6 /* arm64_emit.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = arm64_emit.cpp; sourceTree = ""; }; + AB46E54C2814F40800A4E3D6 /* arm_emit.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = arm_emit.cpp; sourceTree = ""; }; + AB46E54D2814F40800A4E3D6 /* arm_jit_arm.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = arm_jit_arm.cpp; sourceTree = ""; }; + AB46E54E2814F40800A4E3D6 /* emit_core.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = emit_core.cpp; sourceTree = ""; }; AB47B52B18A3F722009A42AF /* xbrz.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = xbrz.h; sourceTree = ""; }; AB47B52C18A3F722009A42AF /* xbrz.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = xbrz.cpp; sourceTree = ""; }; AB497B1227F2E97A00E8A244 /* DeSmuME.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = DeSmuME.app; sourceTree = BUILT_PRODUCTS_DIR; }; @@ -4939,6 +4946,17 @@ path = OptimizationProfiles; sourceTree = ""; }; + AB46E54A2814F40800A4E3D6 /* arm_jit */ = { + isa = PBXGroup; + children = ( + AB46E54D2814F40800A4E3D6 /* arm_jit_arm.cpp */, + AB46E54E2814F40800A4E3D6 /* emit_core.cpp */, + AB46E54C2814F40800A4E3D6 /* arm_emit.cpp */, + AB46E54B2814F40800A4E3D6 /* arm64_emit.cpp */, + ); + path = arm_jit; + sourceTree = ""; + }; AB796D7415CDD20E00C59155 /* AsmJit */ = { isa = PBXGroup; children = ( @@ -5332,6 +5350,7 @@ ABD1FF1C1345ACBF00AF11D1 /* utils */ = { isa = PBXGroup; children = ( + AB46E54A2814F40800A4E3D6 /* arm_jit */, AB796D7415CDD20E00C59155 /* AsmJit */, ABD1FF211345ACBF00AF11D1 /* decrypt */, ABD1FF2E1345ACBF00AF11D1 /* libfat */, @@ -7398,6 +7417,7 @@ 8C43E83027E3CD0100A35F65 /* wifi.cpp in Sources */, 8C43E83127E3CD0100A35F65 /* xstring.cpp in Sources */, 8C43E83227E3CD0100A35F65 /* main.m in Sources */, + AB46E54F2814F42500A4E3D6 /* arm_jit_arm.cpp in Sources */, 8C43E83327E3CD0100A35F65 /* rthreads.c in Sources */, 8C43E83427E3CD0100A35F65 /* cocoa_cheat.mm in Sources */, 8C43E83527E3CD0100A35F65 /* cocoa_core.mm in Sources */, @@ -7489,6 +7509,7 @@ 8C43E92327E3CD4C00A35F65 /* ftbase.c in Sources */, 8C43E92427E3CD4C00A35F65 /* ftsynth.c in Sources */, 8C43E92527E3CD4C00A35F65 /* ftbitmap.c in Sources */, + AB46E5502814F42700A4E3D6 /* arm_jit_arm.cpp in Sources */, 8C43E92627E3CD4C00A35F65 /* ftgxval.c in Sources */, 8C43E92727E3CD4C00A35F65 /* ClientAVCaptureObject.cpp in Sources */, 8C43E92827E3CD4C00A35F65 /* AAFilter.cpp in Sources */, @@ -7795,6 +7816,7 @@ 8CCD84A127E40B730024BDD5 /* MacMetalDisplayView.mm in Sources */, 8CCD84A227E40B730024BDD5 /* MacOGLDisplayView.mm in Sources */, 8CCD84A327E40B730024BDD5 /* cocoa_firmware.mm in Sources */, + AB46E5522814F43300A4E3D6 /* arm_jit_arm.cpp in Sources */, 8CCD84A427E40B730024BDD5 /* cocoa_input.mm in Sources */, 8CCD84A527E40B730024BDD5 /* cocoa_output.mm in Sources */, 8CCD84A627E40B730024BDD5 /* smooth.c in Sources */, diff --git a/desmume/src/frontend/cocoa/DeSmuME_Prefix.pch b/desmume/src/frontend/cocoa/DeSmuME_Prefix.pch index ff540d037..e7c124cec 100644 --- a/desmume/src/frontend/cocoa/DeSmuME_Prefix.pch +++ b/desmume/src/frontend/cocoa/DeSmuME_Prefix.pch @@ -36,7 +36,7 @@ #define GL_SILENCE_DEPRECATION -#if defined(__i386__) || defined(__x86_64__) +#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) #define HAVE_JIT #endif diff --git a/desmume/src/frontend/cocoa/translations/English.lproj/MainMenu.xib b/desmume/src/frontend/cocoa/translations/English.lproj/MainMenu.xib index 7e10ec7ba..ff95ce583 100644 --- a/desmume/src/frontend/cocoa/translations/English.lproj/MainMenu.xib +++ b/desmume/src/frontend/cocoa/translations/English.lproj/MainMenu.xib @@ -50980,86 +50980,6 @@ xcjGRsbDx0HHv8g9yLzJOsm5yjjKt8s2y7bMNcy1zTXNtc42zrbPN8+40DnQutE80b7SP9LB00TTxtRJ 8065 - - - enabled: isAppRunningOnIntel - - - - - - enabled: isAppRunningOnIntel - enabled - isAppRunningOnIntel - 2 - - - 8067 - - - - enabled: isAppRunningOnIntel - - - - - - enabled: isAppRunningOnIntel - enabled - isAppRunningOnIntel - 2 - - - 8068 - - - - enabled: isAppRunningOnIntel - - - - - - enabled: isAppRunningOnIntel - enabled - isAppRunningOnIntel - 2 - - - 8069 - - - - enabled: isAppRunningOnIntel - - - - - - enabled: isAppRunningOnIntel - enabled - isAppRunningOnIntel - 2 - - - 8070 - - - - enabled: isAppRunningOnIntel - - - - - - enabled: isAppRunningOnIntel - enabled - isAppRunningOnIntel - 2 - - - 8071 - selectedTag: selection.cpuEmulationEngine @@ -51080,22 +51000,6 @@ xcjGRsbDx0HHv8g9yLzJOsm5yjjKt8s2y7bMNcy1zTXNtc42zrbPN8+40DnQutE80b7SP9LB00TTxtRJ 8072 - - - enabled: isAppRunningOnIntel - - - - - - enabled: isAppRunningOnIntel - enabled - isAppRunningOnIntel - 2 - - - 8073 - selectedTag: values.Emulation_CPUEmulationEngine @@ -61002,6 +60906,264 @@ xcjGRsbDx0HHv8g9yLzJOsm5yjjKt8s2y7bMNcy1zTXNtc42zrbPN8+40DnQutE80b7SP9LB00TTxtRJ 12275 + + + enabled: isAppRunningOnPowerPC + + + + + + enabled: isAppRunningOnPowerPC + enabled + isAppRunningOnPowerPC + + NSValueTransformerName + NSNegateBoolean + + 2 + + + 12277 + + + + enabled: isAppRunningOnPowerPC + + + + + + enabled: isAppRunningOnPowerPC + enabled + isAppRunningOnPowerPC + + NSValueTransformerName + NSNegateBoolean + + 2 + + + 12279 + + + + enabled: isAppRunningOnPowerPC + + + + + + enabled: isAppRunningOnPowerPC + enabled + isAppRunningOnPowerPC + + NSValueTransformerName + NSNegateBoolean + + 2 + + + 12281 + + + + enabled: isAppRunningOnPowerPC + + + + + + enabled: isAppRunningOnPowerPC + enabled + isAppRunningOnPowerPC + + NSValueTransformerName + NSNegateBoolean + + 2 + + + 12283 + + + + enabled: isAppRunningOnPowerPC + + + + + + enabled: isAppRunningOnPowerPC + enabled + isAppRunningOnPowerPC + + NSValueTransformerName + NSNegateBoolean + + 2 + + + 12285 + + + + enabled: isAppRunningOnPowerPC + + + + + + enabled: isAppRunningOnPowerPC + enabled + isAppRunningOnPowerPC + + NSValueTransformerName + NSNegateBoolean + + 2 + + + 12287 + + + + enabled2: isDeveloperPlusBuild + + + + + + enabled2: isDeveloperPlusBuild + enabled2 + isDeveloperPlusBuild + + + + + + + + 2 + + + 12288 + + + + enabled2: isDeveloperPlusBuild + + + + + + enabled2: isDeveloperPlusBuild + enabled2 + isDeveloperPlusBuild + + + + + + + + 2 + + + 12289 + + + + enabled2: isDeveloperPlusBuild + + + + + + enabled2: isDeveloperPlusBuild + enabled2 + isDeveloperPlusBuild + + + + + + + + 2 + + + 12290 + + + + enabled2: isDeveloperPlusBuild + + + + + + enabled2: isDeveloperPlusBuild + enabled2 + isDeveloperPlusBuild + + + + + + + + 2 + + + 12291 + + + + enabled2: isDeveloperPlusBuild + + + + + + enabled2: isDeveloperPlusBuild + enabled2 + isDeveloperPlusBuild + + + + + + + + 2 + + + 12292 + + + + enabled2: isDeveloperPlusBuild + + + + + + enabled2: isDeveloperPlusBuild + enabled2 + isDeveloperPlusBuild + + + + + + + + 2 + + + 12293 + @@ -88712,7 +88874,7 @@ xcjGRsbDx0HHv8g9yLzJOsm5yjjKt8s2y7bMNcy1zTXNtc42zrbPN8+40DnQutE80b7SP9LB00TTxtRJ - 12275 + 12293 diff --git a/desmume/src/utils/arm_jit/arm64_emit.cpp b/desmume/src/utils/arm_jit/arm64_emit.cpp new file mode 100755 index 000000000..5f37bb37d --- /dev/null +++ b/desmume/src/utils/arm_jit/arm64_emit.cpp @@ -0,0 +1,1880 @@ +/* + Copyright (C) 2022 Patrick Herlihy - https://byte4byte.com + Copyright (C) 2022 DeSmuME team + + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This file is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the this software. If not, see . +*/ + +#include +#include +#include +#include + +#define R11 11 +#define R10 10 +#define FP 11 +#define LR 14 +#define PC 15 +#define SP 13 +#define R0 0 +#define R1 1 +#define R2 2 +#define R3 3 +#define R4 4 +#define R5 5 +#define R6 6 +#define R7 7 +#define R8 8 +#define R9 9 + +#define SHIFT_REG R0 + +#define WZR 31 +#define XZR WZR + +struct s_bytes; +typedef struct s_bytes t_bytes; + +typedef unsigned int (*JittedFunc)(); + +static void output_w32(t_bytes *bytes, unsigned int word); +unsigned int genlabel(); + +static u_int genimm(u_int imm,u_int *encoded) +{ + if(imm==0) {*encoded=0;return 1;} + int i=32; + while(i>0) + { + if(imm<256) { + *encoded=((i&30)<<7)|imm; + return 1; + } + imm=(imm>>2)|(imm<<30);i-=2; + } + return 0; +} + + +// This function returns true if the argument is a non-empty +// sequence of ones starting at the least significant bit with the remainder +// zero. +static uint32_t is_mask(uint64_t value) { + return value && ((value + 1) & value) == 0; +} + +// This function returns true if the argument contains a +// non-empty sequence of ones with the remainder zero. +static uint32_t is_shifted_mask(uint64_t Value) { + return Value && is_mask((Value - 1) | Value); +} + +uint32_t count_leading_zeros(uint64_t value) +{ +#ifdef _MSC_VER + uint32_t leading_zero_low = 0; + uint32_t leading_zero_high = 0; + if(!_BitScanReverse(&leading_zero_low, (uint32_t)value)) + leading_zero_low = 32; + else + leading_zero_low = 31 - leading_zero_low; + + if(!_BitScanReverse(&leading_zero_high, (uint32_t)(value>>32))) + leading_zero_high = 32; + else + leading_zero_high = 31 - leading_zero_high; + + if(leading_zero_high == 32) + return leading_zero_low + leading_zero_high; + else + return leading_zero_high; +#else /* ARM64 */ + return __builtin_clzll(value); +#endif +} + +static void emit_cmp(t_bytes *out, int rs,int rt); + +uint32_t count_trailing_zeros(uint64_t value) +{ +#ifdef _MSC_VER + uint32_t trailing_zero_low = 0; + uint32_t trailing_zero_high = 0; + if(!_BitScanForward(&trailing_zero_low, (uint32_t)value)) + trailing_zero_low = 32; + + if(!_BitScanForward(&trailing_zero_high, (uint32_t)(value>>32))) + trailing_zero_high = 32; + + if(trailing_zero_low == 32) + return trailing_zero_low + trailing_zero_high; + else + return trailing_zero_low; +#else /* ARM64 */ + return __builtin_ctzll(value); +#endif +} + +// Determine if an immediate value can be encoded +// as the immediate operand of a logical instruction for the given register +// size. If so, return 1 with "encoding" set to the encoded value in +// the form N:immr:imms. +static uint32_t genimm2(uint64_t imm, uint32_t regsize, uint32_t * encoded) { + // First, determine the element size. + uint32_t size = regsize; + do { + size /= 2; + uint64_t mask = (1ULL << size) - 1; + + if ((imm & mask) != ((imm >> size) & mask)) { + size *= 2; + break; + } + } while (size > 2); + + // Second, determine the rotation to make the element be: 0^m 1^n. + uint32_t trailing_one, trailing_zero; + uint64_t mask = ((uint64_t)-1LL) >> (64 - size); + imm &= mask; + + if (is_shifted_mask(imm)) { + trailing_zero = count_trailing_zeros(imm); + assert(trailing_zero < 64); + trailing_one = count_trailing_zeros(~(imm >> trailing_zero)); + } else { + imm |= ~mask; + if (!is_shifted_mask(~imm)) + return 0; + + uint32_t leading_one = count_leading_zeros(~imm); + trailing_zero = 64 - leading_one; + trailing_one = leading_one + count_trailing_zeros(~imm) - (64 - size); + } + + // Encode in immr the number of RORs it would take to get *from* 0^m 1^n + // to our target value, where trailing_zero is the number of RORs to go the opposite + // direction. + assert(size > trailing_zero); + uint32_t immr = (size - trailing_zero) & (size - 1); + + // If size has a 1 in the n'th bit, create a value that has zeroes in + // bits [0, n] and ones above that. + uint64_t Nimms = ~(size-1) << 1; + + // Or the trailing_one value into the low bits, which must be below the Nth bit + // bit mentioned above. + Nimms |= (trailing_one-1); + + // Extract the seventh bit and toggle it to create the N field. + uint32_t N = ((Nimms >> 6) & 1) ^ 1; + + *encoded = (N << 12) | (immr << 6) | (Nimms & 0x3f); + return 1; +} + +static void emit_branch_label(t_bytes *out, u_int id, int cond); +static void emit_test(t_bytes *out, int rs, int rt); +static void emit_cmpimm(t_bytes *out, int reg, int val); +static void set_carry_flag(t_bytes *out); +static void clear_carry_flag(t_bytes *out); +static void emit_andimm(t_bytes *out,int rs,int imm,int rt); +static void emit_label(t_bytes *bytes, u_int id); +static void get_carry_flag(t_bytes *out, int reg); + +static void emit_lsls_reg(t_bytes *out, int reg, int nreg); +static void emit_lsrs_reg(t_bytes *out, int reg, int nreg); +static void emit_asrs_reg(t_bytes *out, int reg, int nreg); +static void emit_rors_reg(t_bytes *out, int reg, int nreg); +static void emit_rrxs_reg(t_bytes *out, int reg, int nreg); +static void emit_rrx_reg(t_bytes *out, int reg, int nreg); +static void emit_ror_reg(t_bytes *out, int reg, int nreg); + +static void emit_lsr_reg(t_bytes *out, int reg, int nreg); +static void emit_testimm(t_bytes *out, int rs,int imm); + +static void output_w32_FROM_PC(t_bytes *bytes, unsigned int word); +static void emit_movimm(t_bytes *out, uintptr_t imm,u_int rt); +static void emit_movimm2pad(t_bytes *out, u_int imm,u_int rt); +static void emit_addnop(t_bytes *out, u_int r); + +static void output_w32(t_bytes *bytes, unsigned int word); + +static void output_w32_full(t_bytes *bytes, unsigned int word, int type, int label_id, int cond); + +static void emit_lsr_reg_quick(t_bytes *out, int reg, int nreg); + + +static void emit_setnz(t_bytes *out, int reg); +static void emit_movimm(t_bytes *out, uintptr_t imm,u_int rt); +static void emit_mov(t_bytes *out, int rs,int rt); +static void emit_jmpreg(t_bytes *out, u_int r); + +static void emit_brk(t_bytes *out); + +static void emit_lsl(t_bytes *out, int reg, int n) { + emit_movimm(out, n, SHIFT_REG); + output_w32(out, 0x1AC02000|SHIFT_REG<<16|reg<<5|reg); +} + +static void emit_lsr(t_bytes *out, int reg, int n) { + if (n < 32) { + output_w32(out, 0x53000000|n<<16|0x1f<<10|reg<<5|reg); + } + else { + emit_movimm(out, n, SHIFT_REG); + output_w32(out, 0x1ac02400|SHIFT_REG<<16|reg<<5|reg); + } +} + +static void emit_lsr64(t_bytes *out, int reg, int n) { + if (n < 32) { + output_w32(out, 0xD340FC00|n<<16|0x1f<<10|reg<<5|reg); + } + else { + emit_movimm(out, n, SHIFT_REG); + output_w32(out, 0x9AC02400|SHIFT_REG<<16|reg<<5|reg); + } +} + +static void emit_asr(t_bytes *out, int reg, int num) +{ + if (num < 32) { + output_w32(out, 0x13000000|num<<16|0x1f<<10|reg<<5|reg); + } + else { + emit_movimm(out, num, SHIFT_REG); + output_w32(out, 0x1ac02800|SHIFT_REG<<16|reg<<5|reg); + } +} + +static void emit_asr64(t_bytes *out, int reg, int num) +{ + emit_movimm(out, num, SHIFT_REG); + output_w32(out, 0x9AC02800|SHIFT_REG<<16|reg<<5|reg); +} + +static void emit_asr_reg(t_bytes *out, int reg, int nreg) { + + int done=genlabel(); + + emit_cmpimm(out, nreg, 0); + emit_branch_label(out, done, 0); // eq 0 + + output_w32(out, 0x1ac02800|nreg<<16|reg<<5|reg); // do shift + + emit_label(out, (int)done); +} + + +static void emit_ror(t_bytes *out, int reg, int n) { + if (n < 32) { + output_w32(out, 0x13800000|reg<<16|n<<10|reg<<5|reg); + } + else { + emit_movimm(out, n, SHIFT_REG); + emit_ror_reg(out, reg, SHIFT_REG); + } +} +static void emit_rrx(t_bytes *out, int reg, int n) { + + emit_movimm(out, n, SHIFT_REG); + emit_ror_reg(out, reg, SHIFT_REG); +} +static void emit_lsl_reg(t_bytes *out, int reg, int nreg) { + int setb=genlabel(); + int done=genlabel(); + + emit_cmpimm(out, nreg, 32); + emit_branch_label(out, setb, 10); // greater than or equal to 32 + + output_w32(out, 0x1ac02000|nreg<<16|reg<<5|reg); // do shift + emit_branch_label(out, done, 0xE); + + emit_label(out, (int)setb); + emit_movimm(out, 0, reg); + + emit_label(out, (int)done); +} + +static void emit_lsl_reg_quick(t_bytes *out, int reg, int nreg) { + output_w32(out, 0x1ac02000|nreg<<16|reg<<5|reg); +} + +static void emit_lsr_reg_quick(t_bytes *out, int reg, int nreg) { + output_w32(out, 0x1ac02400|nreg<<16|reg<<5|reg); +} + +static void emit_lsr_reg(t_bytes *out, int reg, int nreg) +{ + int setb=genlabel(); + int done=genlabel(); + + emit_cmpimm(out, nreg, 32); + emit_branch_label(out, setb, 10); // greater than or equal to 32 + + output_w32(out, 0x1ac02400|nreg<<16|reg<<5|reg); // do shift + emit_branch_label(out, done, 0xE); + + emit_label(out, (int)setb); + emit_movimm(out, 0, reg); + + emit_label(out, (int)done); +} + +static void emit_andsimm(t_bytes *out,int rs,int imm,int rt); +static void emit_ror_reg(t_bytes *out, int reg, int nreg) { + + int done=genlabel(); + + emit_cmpimm(out, nreg, 0); + emit_branch_label(out, done, 0); // 0 + emit_andsimm(out, nreg, 0x1F, nreg); + emit_branch_label(out, done, 0); // 0 + + output_w32(out, 0x1AC02C00|reg|reg<<5|nreg<<16); // do shift + + emit_label(out, (int)done); +} + +static void emit_lsls(t_bytes *out, int reg, int n) { + emit_movimm(out, n, SHIFT_REG); + emit_lsls_reg(out, reg, SHIFT_REG); +} +static void emit_lsrs(t_bytes *out, int reg, int n) { + emit_movimm(out, n, SHIFT_REG); + emit_lsrs_reg(out, reg, SHIFT_REG); +} +static void emit_asrs(t_bytes *out, int reg, int n) { + emit_movimm(out, n, SHIFT_REG); + emit_asrs_reg(out, reg, SHIFT_REG); +} +static void emit_rors(t_bytes *out, int reg, int n) { + emit_movimm(out, n, SHIFT_REG); + emit_rors_reg(out, reg, SHIFT_REG); +} +static void emit_rrxs(t_bytes *out, int reg, int n) { + emit_movimm(out, n, SHIFT_REG); + emit_rrxs_reg(out, reg, SHIFT_REG); +} +static void emit_lsls_reg(t_bytes *out, int reg, int nreg) { + + + int nonzero=genlabel(); + int done=genlabel(); + int eq32=genlabel(); + int gt32=genlabel(); + + emit_cmpimm(out, nreg, 0); + emit_branch_label(out, nonzero, 0x1); + + /* eq 0 */ + + get_carry_flag(out, 5); + emit_branch_label(out, done, 0xE); + + /* end eq 0 */ + + emit_label(out, (int)nonzero); + + emit_mov(out, reg, 20); + + /* non zero */ + emit_cmpimm(out, nreg, 32); + emit_branch_label(out, eq32, 0x0); // nreg == 32 + emit_branch_label(out, gt32, 12); // gt 32 + + // get last shifted bit + emit_andimm(out, nreg, 0xff, 19); + emit_movimm(out, 32, 18); + output_w32(out, 0x4B130252); // sub w18, w18, w19 + + // perform shifft + output_w32(out, 0x1AC02000|nreg<<16|reg<<5|reg); + + //emit_test(out, 20, 18); + emit_movimm(out, 1, 22); + emit_lsl_reg_quick(out, 22, 18); + emit_test(out, 20, 22); + + + emit_setnz(out, 5); + emit_branch_label(out, done, 0xE); + + /* end non zero */ + + emit_label(out, (int)eq32); + + /* equal 32 */ + + emit_movimm(out, 1, 22); + emit_movimm(out, 31, 18); + emit_lsl_reg_quick(out, 22, 18); // get BIT31 for carry + emit_test(out, 20, 22); // test BIT31(reg 22) of original reg 20 + + emit_setnz(out, 5); + emit_branch_label(out, done, 0xE); + + /* end equal 32 */ + + emit_label(out, (int)gt32); + + /* gt 32 */ + + emit_movimm(out, 0, reg); + + emit_movimm(out, 0, 5); + + // fall into clrb + + /* end gt 32 */ + + emit_label(out, (int)done); + emit_cmpimm(out, reg, 0); +} +static void emit_lsrs_reg(t_bytes *out, int reg, int nreg) { + + int nonzero=genlabel(); + int done=genlabel(); + int eq32=genlabel(); + int gt32=genlabel(); + + emit_cmpimm(out, nreg, 0); + emit_branch_label(out, nonzero, 0x1); + + /* eq 0 */ + + get_carry_flag(out, 5); + emit_branch_label(out, done, 0xE); + + /* end eq 0 */ + + emit_label(out, (int)nonzero); + + emit_mov(out, reg, 20); + + /* non zero */ + emit_cmpimm(out, nreg, 32); + emit_branch_label(out, eq32, 0x0); // nreg == 32 + emit_branch_label(out, gt32, 12); // gt 32 + + // get last shifted bit + emit_andimm(out, nreg, 0xff, 19); + emit_movimm(out, 1, 18); + output_w32(out, 0x4B120272); // sub w18, w19, w18 + + // perform shifft + output_w32(out, 0x1AC02400|nreg<<16|reg<<5|reg); + + emit_movimm(out, 1, 22); + emit_lsl_reg_quick(out, 22, 18); + emit_test(out, 20, 22); + + emit_setnz(out, 5); + emit_branch_label(out, done, 0xE); + + /* end non zero */ + + emit_label(out, (int)eq32); + + /* equal 32 */ + + emit_movimm(out, 1, 22); + emit_movimm(out, 31, 18); + emit_lsl_reg_quick(out, 22, 18); // get BIT31 for carry + emit_test(out, 20, 22); // test BIT31(reg 22) of original reg 20 + + emit_setnz(out, 5); + emit_branch_label(out, done, 0xE); + + /* end equal 32 */ + + emit_label(out, (int)gt32); + + /* gt 32 */ + + emit_movimm(out, 0, reg); + // fall into clrb + + /* end gt 32 */ + + emit_label(out, (int)done); + + emit_cmpimm(out, reg, 0); +} +static void emit_asrs_reg(t_bytes *out, int reg, int nreg) { + + int nonzero=genlabel(); + int done=genlabel(); + int gteq32=genlabel(); + + emit_cmpimm(out, nreg, 0); + emit_branch_label(out, nonzero, 0x1); + + /* eq 0 */ + + get_carry_flag(out, 5); + emit_branch_label(out, done, 0xE); + + /* end eq 0 */ + + emit_label(out, (int)nonzero); + + emit_mov(out, reg, 20); + + /* non zero */ + emit_cmpimm(out, nreg, 32); + emit_branch_label(out, gteq32, 10); // gteq 32 + + // get last shifted bit + emit_andimm(out, nreg, 0xff, 19); + emit_movimm(out, 1, 18); + output_w32(out, 0x4B120272); // sub w18, w19, w18 + + // perform shifft + output_w32(out, 0x1AC02800|nreg<<16|reg<<5|reg); + + emit_movimm(out, 1, 22); + emit_lsl_reg_quick(out, 22, 18); + emit_test(out, 20, 22); + + emit_setnz(out, 5); + emit_branch_label(out, done, 0xE); + + /* end non zero */ + + emit_label(out, (int)gteq32); + + /* equal 32 */ + + // perform shifft + output_w32(out, 0x1AC02800|nreg<<16|reg<<5|reg); + + emit_movimm(out, 1, 22); + emit_movimm(out, 31, 18); + emit_lsl_reg_quick(out, 22, 18); // get BIT31 for carry + emit_test(out, 20, 22); // test BIT31(reg 22) of original reg 20 + + + emit_setnz(out, 5); + + emit_label(out, (int)done); + + emit_cmpimm(out, reg, 0); + +} +static void emit_rors_reg(t_bytes *out, int reg, int nreg) { + + + int nonzero=genlabel(); + int done=genlabel(); + int and0=genlabel(); + + emit_mov(out, reg, 20); + + emit_cmpimm(out, nreg, 0); + emit_branch_label(out, nonzero, 0x1); + + get_carry_flag(out, 5); + emit_branch_label(out, done, 0xE); + + /* eq 0 */ + + /* end eq 0 */ + + emit_label(out, (int)nonzero); + + /* non zero */ + emit_andimm(out, nreg, 0x1F, 19); + emit_cmpimm(out, 19, 0); + emit_branch_label(out, and0, 0x0); // eq 0 + + // get last shifted bit + emit_movimm(out, 1, 18); + output_w32(out, 0x4B120272); // sub w18, w19, w18 + + // perform shifft + output_w32(out, 0x1AC02C00|nreg<<16|reg<<5|reg); + + //emit_test(out, 20, 18); + emit_movimm(out, 1, 22); + emit_lsl_reg_quick(out, 22, 18); + emit_test(out, 20, 22); + + emit_setnz(out, 5); + emit_branch_label(out, done, 0xE); + //emit_branch_label(out, setb, 0x1); + //emit_branch_label(out, clrb, 0xE); + + + /* end non zero */ + + emit_label(out, (int)and0); + + /* equal 32 */ + + // perform shifft + //output_w32(out, 0x1AC02800|nreg<<16|reg<<5|reg); + emit_mov(out, 20, reg); + + emit_movimm(out, 1, 22); + emit_movimm(out, 31, 18); + emit_lsl_reg_quick(out, 22, 18); // get BIT31 for carry + emit_test(out, 20, 22); // test BIT31(reg 22) of original reg 20 + + +emit_setnz(out, 5); + emit_branch_label(out, done, 0xE); + + emit_label(out, (int)done); + emit_cmpimm(out, reg, 0); +} +static void emit_rrxs_reg(t_bytes *out, int reg, int nreg) { + emit_rors_reg(out, reg, nreg); +} + +static void emit_mov(t_bytes *out, int rs,int rt) +{ + output_w32(out, 0x2a000000|rs<<16|WZR<<5|rt); +} + +static void emit_mov64(t_bytes *out, int rs,int rt) +{ + output_w32(out, 0xaa000000|rs<<16|WZR<<5|rt); +} + +static void emit_add(t_bytes *out, int rs1,int rs2,int rt) +{ + output_w32(out, 0x0b000000|rs2<<16|rs1<<5|rt); +} + +static void emit_adds(t_bytes *out, int rs1,int rs2,int rt) +{ + output_w32(out, 0x2b000000|rs2<<16|rs1<<5|rt); +} + +static void emit_add64(t_bytes *out, int rs1,int rs2,int rt) +{ + output_w32(out, 0x8b000000|rs2<<16|rs1<<5|rt); +} + +static void emit_adc(t_bytes *out, int rs1,int rs2,int rt) +{ + output_w32(out, 0x1a000000|rs2<<16|rs1<<5|rt); +} + +static void emit_adcs(t_bytes *out, int rs1,int rs2,int rt) +{ + output_w32(out, 0x3a000000|rs2<<16|rs1<<5|rt); +} + +static void emit_sub(t_bytes *out, int rs1,int rs2,int rt) +{ + output_w32(out, 0x4b000000|rs2<<16|rs1<<5|rt); +} + +static void emit_subs(t_bytes *out, int rs1,int rs2,int rt) +{ + output_w32(out, 0x6b000000|rs2<<16|rs1<<5|rt); +} + +static void emit_sbc(t_bytes *out, int rs1,int rs2,int rt) +{ + output_w32(out, 0x5a000000|rs2<<16|rs1<<5|rt); +} + +static void emit_sbcs(t_bytes *out, int rs1,int rs2,int rt) +{ + output_w32(out, 0x7a000000|rs2<<16|rs1<<5|rt); +} + +static void emit_neg(t_bytes *out, int rs, int rt) +{ + output_w32(out, 0x4b000000|rs<<16|WZR<<5|rt); +} + +static void emit_negs(t_bytes *out, int rs, int rt) +{ + output_w32(out, 0x6b000000|rs<<16|WZR<<5|rt); +} + +static void emit_mvns(t_bytes *out, int rs,int rt) +{ + output_w32(out, 0x2a200000|rs<<16|WZR<<5|rt); + emit_cmp(out, rt, rt); +} + +static void emit_zeroreg(t_bytes *out, int rt) +{ + output_w32(out, 0x52800000|rt); +} +static void emit_movn(t_bytes *out, u_int imm,u_int rt) +{ + output_w32(out, 0x12800000|imm<<5|rt); +} + +static void emit_movimms(t_bytes *out, u_int imm,u_int rt) +{ + emit_movimm(out, imm, rt); + emit_cmpimm(out, rt, 0); +} + +static void emit_movz(t_bytes *out, u_int imm,u_int rt) +{ + output_w32(out, 0x52800000|imm<<5|rt); +} +static void emit_movn_lsl16(t_bytes *out, u_int imm,u_int rt) +{ + output_w32(out, 0x12a00000|imm<<5|rt); +} +static void emit_movz_lsl16(t_bytes *out, u_int imm,u_int rt) +{ + output_w32(out, 0x52a00000|imm<<5|rt); +} +static void emit_movk(t_bytes *out, u_int imm,u_int rt) +{ + output_w32(out, 0x72800000|imm<<5|rt); +} +static void emit_movk_lsl16(t_bytes *out, u_int imm,u_int rt) +{ + output_w32(out, 0x72a00000|imm<<5|rt); +} + +static void emit_movs(t_bytes *out, int rs,int rt) { + output_w32(out, 0x2A0003E0|rs<<5|rt); + emit_cmp(out, rt, rt); +} + +static void emit_mov_cond(t_bytes *out, int rs,int rt, int cond) +{ + int setb=genlabel(); + int done=genlabel(); + + emit_branch_label(out, setb, cond); + emit_branch_label(out, done, 0xE); + emit_label(out, (int)setb); + emit_mov(out, rs, rt); + emit_label(out, (int)done); +} + +static void emit_movk_lslnum(t_bytes *out, u_int imm,u_int rt, int lsl) +{ + uint32_t lsl_bit; + + switch (lsl) { + case 16: + lsl_bit = 0x200000; + break; + case 32: + lsl_bit = 0x400000; + break; + case 48: + lsl_bit = 0x600000; + break; + default: + lsl_bit = 0; + break; + } + + output_w32(out, 0x72800000|imm<<5|rt|lsl_bit); +} + +static void emit_movk_lslnum64(t_bytes *out, u_int imm,u_int rt, int lsl) +{ + uint32_t lsl_bit; + + switch (lsl) { + case 16: + lsl_bit = 0x200000; + break; + case 32: + lsl_bit = 0x400000; + break; + case 48: + lsl_bit = 0x600000; + break; + default: + lsl_bit = 0; + break; + } + + output_w32(out, 0x80000000|0x72800000|imm<<5|rt|lsl_bit); +} + +static void emit_movz64(t_bytes *out, u_int imm,u_int rt) +{ + output_w32(out, 0x80000000|0x52800000|imm<<5|rt); +} + +static void emit_ptr(t_bytes *out, uintptr_t ptr, u_int rt) { + emit_movz64(out, ptr&0xffff,rt); + if ((ptr>>16)&0xffff) emit_movk_lslnum64(out, (ptr>>16)&0xffff,rt, 16); + if ((ptr>>32)&0xffff) emit_movk_lslnum64(out, (ptr>>32)&0xffff,rt, 32); + if ((ptr>>48)&0xffff) emit_movk_lslnum64(out, (ptr>>48)&0xffff,rt, 48); +} + +static void emit_movimm(t_bytes *out, uintptr_t imm,u_int rt) +{ + uint32_t armval=0; + if(imm<65536) { + emit_movz(out, imm,rt); + }else if((~imm)<65536) { + emit_movn(out, ~imm,rt); + }else if((imm&0xffff)==0) { + emit_movz_lsl16(out, (imm>>16)&0xffff,rt); + }else if(((~imm)&0xffff)==0) { + emit_movn_lsl16(out, (~imm>>16)&0xffff,rt); + }else if(genimm2((uint64_t)imm,32,&armval)) { + output_w32(out, 0x32000000|armval<<10|WZR<<5|rt); + }else{ + emit_movz_lsl16(out, (imm>>16)&0xffff,rt); + emit_movk(out, imm&0xffff,rt); + } +} + +static void emit_test(t_bytes *out, int rs, int rt) +{ + output_w32(out, 0x6a000000|rt<<16|rs<<5|WZR); +} + +static void emit_not(t_bytes *out, int rs,int rt) +{ + output_w32(out, 0x2a200000|rs<<16|WZR<<5|rt); +} + +static void emit_and(t_bytes *out, u_int rs1,u_int rs2,u_int rt) +{ + output_w32(out, 0x0a000000|rs2<<16|rs1<<5|rt); +} + +static void emit_or(t_bytes *out, u_int rs1,u_int rs2,u_int rt) +{ + output_w32(out, 0x2a000000|rs2<<16|rs1<<5|rt); +} + +static void emit_or64(t_bytes *out, u_int rs1,u_int rs2,u_int rt) +{ + output_w32(out, 0xAA000000|rs2<<16|rs1<<5|rt); +} + +static void emit_or_and_set_flags(t_bytes *out, int rs1,int rs2,int rt) +{ + output_w32(out, 0x2A000000|rs2<<16|rs1<<5|rt); + emit_cmpimm(out, rt, 0); +} + +static void emit_xor(t_bytes *out, u_int rs1,u_int rs2,u_int rt) +{ + output_w32(out, 0x4a000000|rs2<<16|rs1<<5|rt); +} + +static void emit_ands(t_bytes *out, u_int rs1,u_int rs2,u_int rt) +{ + output_w32(out, 0x6a000000|rs2<<16|rs1<<5|rt); +} + +static void emit_ors(t_bytes *out, u_int rs1,u_int rs2,u_int rt) +{ + emit_or_and_set_flags(out, rs1,rs2,rt); +} +static void emit_xors(t_bytes *out, u_int rs1,u_int rs2,u_int rt) +{ + output_w32(out, 0x4a000000|rs2<<16|rs1<<5|rt); + emit_cmpimm(out, rt, 0); +} + +static void emit_addimm64(t_bytes *out, u_int rs,int imm,u_int rt) +{ + emit_movimm(out, imm, 16); + emit_add64(out, rs, 16, rt); +} + + +static void emit_addimm(t_bytes *out, u_int rs,int imm,u_int rt) +{ + if(imm!=0) { + //assert(imm>-16777216&&imm<16777216); + if(imm<0&&imm>-4096) { + output_w32(out, 0x51000000|((-imm)&0xfff)<<10|rs<<5|rt); + }else if(imm>0&&imm<4096) { + output_w32(out, 0x11000000|(imm&0xfff)<<10|rs<<5|rt); + }else if(imm<0) { + output_w32(out, 0x51400000|(((-imm)>>12)&0xfff)<<10|rs<<5|rt); + if((-imm&0xfff)!=0) { + output_w32(out, 0x51000000|((-imm)&0xfff)<<10|rt<<5|rt); + } + }else { + output_w32(out, 0x11400000|((imm>>12)&0xfff)<<10|rs<<5|rt); + if((imm&0xfff)!=0) { + output_w32(out, 0x11000000|(imm&0xfff)<<10|rt<<5|rt); + } + } + } + else if(rs!=rt) emit_mov(out, rs,rt); +} + +static void emit_addsimm(t_bytes *out, u_int rs,int imm,u_int rt) +{ + emit_movimm(out, imm, 16); + emit_adds(out, rs, 16, rt); +} + +static void emit_cmp(t_bytes *out, int rs,int rt) +{ + output_w32(out, 0x6b000000|rt<<16|rs<<5|WZR); +} + +static void emit_smulbb(t_bytes *out, u_int rs1, u_int rs2) { + output_w32(out, 0x93403C11|rs1<<5); // SBFX x17, x0, #0, #16 + output_w32(out, 0x93403C12|rs2<<5); // SBFX x18, x0, #0, #16 + + // mul + output_w32(out, 0x1B007C00|(18<<16)|(17<<5)|rs1); +} + +static void emit_smulbt(t_bytes *out, u_int rs1, u_int rs2) { + output_w32(out, 0x93403C11|rs1<<5); // SBFX x17, x0, #0, #16 + output_w32(out, 0x93507C12|rs2<<5); // SBFX x18, x0, #16, #16 + // mu; + output_w32(out, 0x1B007C00|(18<<16)|(17<<5)|rs1); +} + +static void emit_smultb(t_bytes *out, u_int rs1, u_int rs2) { + output_w32(out, 0x93507C11|rs1<<5); //SBFX x17, rs1, #16, #16 + output_w32(out, 0x93403C12|rs2<<5); // SBFX x18, x0, #0, #16 + + // mul + output_w32(out, 0x1B007C00|(18<<16)|(17<<5)|rs1); +} + +static void emit_smultt(t_bytes *out, u_int rs1, u_int rs2) { + output_w32(out, 0x93507C11|rs1<<5); // SBFX x17, rs1, #16, #16 + output_w32(out, 0x93507C12|rs2<<5); // SBFX x18, rs2, #16, #16 + + // mul + output_w32(out, 0x1B007C00|(18<<16)|(17<<5)|rs1); +} +static void emit_smulwt(t_bytes *out, u_int rs1, u_int rs2) { + output_w32(out, 0x93507C12|rs2<<5); // SBFX x18, x0, #16, #16 + + // mul + output_w32(out, 0x1B007C00|(18<<16)|(rs1<<5)|rs1); +} +static void emit_smulwb(t_bytes *out, u_int rs1, u_int rs2) { + output_w32(out, 0x93403C12|rs2<<5); // SBFX x18, x0, #0, #16 + + // mul + output_w32(out, 0x1B007C00|(18<<16)|(rs1<<5)|rs1); +} + +static void emit_smlawb(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + output_w32(out, 0x93403C12|rs2<<5); // SBFX x18, x0, #0, #16 + + output_w32(out, 0x1B007C00|(18<<16)|(rs1<<5)|hi); + emit_adds(out, hi, lo, hi); // sets carry flag - need to use in SET_Q +} +static void emit_smlawt(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + output_w32(out, 0x93507C12|rs2<<5); // SBFX x18, x0, #16, #16 + + output_w32(out, 0x1B007C00|(18<<16)|(rs1<<5)|hi); + emit_adds(out, hi, lo, hi); // sets carry flag - need to use in SET_Q +} + +static void emit_smlabb(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + output_w32(out, 0x93403C11|rs1<<5); // SBFX x17, x0, #0, #16 + output_w32(out, 0x93403C12|rs2<<5); // SBFX x18, x0, #0, #16 + + output_w32(out, 0x1B007C00|(18<<16)|(17<<5)|hi); + emit_adds(out, hi, lo, hi); // sets carry flag - need to use in SET_Q +} +static void emit_smlabt(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + output_w32(out, 0x93403C11|rs1<<5); // SBFX x17, x0, #0, #16 + output_w32(out, 0x93507C12|rs2<<5); // SBFX x18, x0, #16, #16 + + output_w32(out, 0x1B007C00|(18<<16)|(17<<5)|hi); + emit_adds(out, hi, lo, hi); // sets carry flag - need to use in SET_Q +} +static void emit_smlatb(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + output_w32(out, 0x93507C11|rs1<<5); // SBFX x17, x0, #16, #16 + output_w32(out, 0x93403C12|rs2<<5); // sbfx x18, x0, #0, #16 + + output_w32(out, 0x1B007C00|(18<<16)|(17<<5)|hi); + emit_adds(out, hi, lo, hi); // sets carry flag - need to use in SET_Q +} +static void emit_smlatt(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + output_w32(out, 0x93507C11|rs1<<5); // SBFX x17, rs1, #16, #16 + output_w32(out, 0x93507C12|rs2<<5); // SBFX x18, rs2, #16, #16 + + output_w32(out, 0x1B007C00|(18<<16)|(17<<5)|hi); + emit_adds(out, hi, lo, hi); // sets carry flag - need to use in SET_Q +} + +static void emit_smlalbb(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + emit_mov(out, hi, 17); + output_w32(out, 0xB3607C11|lo<<5); // BFI x17, xhi, #32, #32 + + output_w32(out, 0x93403C13|rs1<<5); //SBFX x19, rs1, #0, #16 + output_w32(out, 0x93403C14|rs2<<5); //SBFX x20, rs2, #0, #16 + + output_w32(out, 0x9B204411|(20<<16)|(19<<5)); // SMADDL x17, w#, w#, x17 + + output_w32(out, 0xD360FE20|lo); // lsr lo, x17, #32 + emit_mov(out, 17, hi); +} +static void emit_smlalbt(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + emit_mov(out, hi, 17); + output_w32(out, 0xB3607C11|lo<<5); // BFI x17, xhi, #32, #32 + + output_w32(out, 0x93403C13|rs1<<5); //SBFX x19, rs1, #0, #16 + output_w32(out, 0x93507C14|rs2<<5); //SBFX x20, rs2, #16, #16 + + output_w32(out, 0x9B204411|(20<<16)|(19<<5)); // SMADDL x17, w#, w#, x17 + + output_w32(out, 0xD360FE20|lo); // lsr lo, x17, #32 + emit_mov(out, 17, hi); +} +static void emit_smlaltb(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + emit_mov(out, hi, 17); + output_w32(out, 0xB3607C11|lo<<5); // BFI x17, xhi, #32, #32 + + output_w32(out, 0x93507C13|rs1<<5); //sbfx x19, rs1, #16, #16 + output_w32(out, 0x93403C14|rs2<<5); //sbfx x20, rs2, #0, #16 + + output_w32(out, 0x9B204411|(20<<16)|(19<<5)); // SMADDL x17, w#, w#, x17 + + output_w32(out, 0xD360FE20|lo); // lsr lo, x17, #32 + emit_mov(out, 17, hi); +} +static void emit_smlaltt(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + emit_mov(out, hi, 17); + output_w32(out, 0xB3607C11|lo<<5); // BFI x17, xhi, #32, #32 + + output_w32(out, 0x93507C13|rs1<<5); //sbfx x19, rs1, #16, #16 + output_w32(out, 0x93507C14|rs2<<5); //sbfx x20, rs2, #16, #16 + + output_w32(out, 0x9B204411|(20<<16)|(19<<5)); // SMADDL x17, w#, w#, x17 + + output_w32(out, 0xD360FE20|lo); // lsr lo, x17, #32 + emit_mov(out, 17, hi); +} + +static void emit_mul(t_bytes *out, u_int rs1,u_int rs2) +{ + output_w32(out, 0x1B007C00|(rs2<<16)|(rs1<<5)|rs1); +} +static void emit_muls(t_bytes *out, u_int rs1,u_int rs2) +{ + output_w32(out, 0x1B007C00|(rs2<<16)|(rs1<<5)|rs1); + emit_cmpimm(out, rs1, 0); +} + +static void emit_mla(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + output_w32(out, 0x1B000000|(lo<<10)|(rs2<<16)|(rs1<<5)|hi); +} +static void emit_mlas(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + output_w32(out, 0x1B000000|(lo<<10)|(rs2<<16)|(rs1<<5)|hi); + emit_cmpimm(out, hi, 0); +} +static void emit_cmpx64(t_bytes *out, int rs) { + output_w32(out, 0xF100001F|rs<<5); +} +static void emit_umull(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + output_w32(out, 0x9BA07C11|(rs2<<16)|(rs1<<5)); // UMADDL x17, w#, w#, XZR + + output_w32(out, 0xD360FE20|lo); // lsr lo, x17, #32 + emit_mov(out, 17, hi); +} +static void emit_umulls(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + output_w32(out, 0x9BA07C11|(rs2<<16)|(rs1<<5)); // UMADDL x17, w#, w#, XZR + emit_cmpx64(out, 17); + + output_w32(out, 0xD360FE20|lo); // lsr lo, x17, #32 + emit_mov(out, 17, hi); +} +static void emit_umlal(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + emit_mov(out, hi, 17); + output_w32(out, 0xB3607C11|lo<<5); // BFI x17, xhi, #32, #32 + + output_w32(out, 0x9BA04411|(rs2<<16)|(rs1<<5)); // UMADDL x17, w#, w#, x17 + + output_w32(out, 0xD360FE20|lo); // lsr lo, x17, #32 + emit_mov(out, 17, hi); +} +static void emit_umlals(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + emit_mov(out, hi, 17); + output_w32(out, 0xB3607C11|lo<<5); // BFI x17, xhi, #32, #32 + + output_w32(out, 0x9BA04411|(rs2<<16)|(rs1<<5)); // UMADDL x17, w#, w#, x17 + emit_cmpx64(out, 17); + + output_w32(out, 0xD360FE20|lo); // lsr lo, x17, #32 + emit_mov(out, 17, hi); +} +static void emit_smull(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + output_w32(out, 0x9B207C11|(rs2<<16)|(rs1<<5)); // SMADDL x17, w#, w#, XZR + + output_w32(out, 0xD360FE20|lo); // lsr lo, x17, #32 + emit_mov(out, 17, hi); +} +static void emit_smulls(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + + output_w32(out, 0x9B207C11|(rs2<<16)|(rs1<<5)); // SMADDL x17, w#, w#, XZR + emit_cmpx64(out, 17); + + output_w32(out, 0xD360FE20|lo); // lsr lo, x17, #32 + emit_mov(out, 17, hi); +} + +static void emit_smlal(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + emit_mov(out, hi, 17); + output_w32(out, 0xB3607C11|lo<<5); // BFI x17, xhi, #32, #32 + + output_w32(out, 0x9B204411|(rs2<<16)|(rs1<<5)); // SMADDL x17, w#, w#, x17 + + output_w32(out, 0xD360FE20|lo); // lsr lo, x17, #32 + emit_mov(out, 17, hi); +} +static void emit_smlals(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + emit_mov(out, hi, 17); + output_w32(out, 0xB3607C11|lo<<5); // BFI x17, xhi, #32, #32 + + output_w32(out, 0x9B204411|(rs2<<16)|(rs1<<5)); // SMADDL x17, w#, w#, x17 + emit_cmpx64(out, 17); + + output_w32(out, 0xD360FE20|lo); // lsr lo, x17, #32 + emit_mov(out, 17, hi); +} + +static void emit_clz(t_bytes *out, int rs,int rt) +{ + output_w32(out,0x5AC01000|rs<<5|rt); +} + +static void emit_teq(t_bytes *out, int rs, int rt) +{ + output_w32(out, 0x4A000000|16|rs<<5|rt<<16); + emit_cmpimm(out, 16, 0); +} + +static u_int gencondjmp(intptr_t offset) +{ + return (offset>>2)&0x7ffff; +} + +#define MOD_JUMP(ptr, cond, lbl_bytes, jmp_bytes) *(ptr) = 0x54000000|gencondjmp(lbl_bytes - jmp_bytes)<<5|(cond); +#include "emit_core.cpp" + +static void myfunc(const char *str) { + puts(str); +} + +static void emit_ldr(t_bytes *out, int rs, int rt) { + output_w32(out, 0xB9400000|rs<<5|rt); +} + +static void emit_ldr64(t_bytes *out, int rs, int rt) { + output_w32(out, 0xF9400000|rs<<5|rt); +} + +static void emit_str(t_bytes *out, int rs, int rt) { + output_w32(out, 0xB9000000|rs<<5|rt); +} + +static void emit_str64(t_bytes *out, int rs, int rt) { + output_w32(out, 0x40000000|0xB9000000|rs<<5|rt); +} + +static void emit_str_cond(t_bytes *out, int rs, int rt, int cond) { + int setb=genlabel(); + int done=genlabel(); + + emit_branch_label(out, setb, cond); + emit_branch_label(out, done, 0xE); + + emit_label(out, (int)setb); + emit_str(out, rs, rt); + + emit_label(out, (int)done); +} + +static void emit_ldrb(t_bytes *out, int rs, int rt) { + output_w32(out, 0x39400000|rs<<5|rt); // LDRB R0, [R0] <-- armconverter.com +} + +static void emit_ldrsb(t_bytes *out, int rs, int rt) { + output_w32(out, 0x39C00000|rs<<5|rt); // LDRSB R0, [R0] <-- armconverter.com +} + +static void emit_strb(t_bytes *out, int rs, int rt) { + output_w32(out, 0x39000000|rs<<5|rt); +} + +static void emit_ldrsw(t_bytes *out, int rs, int rt) { + output_w32(out, 0x79C00000|rs<<5|rt); // LDRSB R0, [R0] <-- armconverter.com +} + +static void emit_ldrs(t_bytes *out, int rs, int rt) { + emit_ldr(out, rs, rt); +} + +static void emit_ldrw(t_bytes *out, int rs, int rt) { + output_w32(out, 0x79400000|rs<<5|rt); // LDRB R0, [R0] <-- armconverter.com +} + +static void emit_strw(t_bytes *out, int rs, int rt) { + output_w32(out, 0x79000000|rs<<5|rt); +} + +static void emit_str_ptr_cond(t_bytes *out, uintptr_t ptr, int reg, int cond) { + emit_ptr(out, (uintptr_t)ptr, R10); + emit_str_cond(out, R10, reg, cond); +} + +static void emit_write_ptr32(t_bytes *out, uintptr_t ptr, u_int val) { + emit_ptr(out, (uintptr_t)ptr, R2); + emit_movimm(out, val, R4); + emit_str(out, R2, R4); +} + +static void emit_write_ptr(t_bytes *out, uintptr_t ptr, uintptr_t val) { + emit_ptr(out, ptr, R2); + emit_ptr(out, val, R4); + emit_str64(out, R2, R4); +} + +static void emit_write_ptr32_regptrTO_immFROM(t_bytes *out, int regptr, int val) { /* reg contains ptr */ + emit_movimm(out, (uintptr_t)val, R10); + emit_str(out, regptr, R10); +} +static void emit_write_ptr32_regptrTO_regFROM(t_bytes *out, int regptr, int reg) { /* reg contains ptr */ + emit_str(out, regptr, reg); +} +static void emit_write_ptr32_regptrTO_regptrFROM(t_bytes *out, int regptrto, int regptrfrom) { /* reg contains ptr */ + emit_ldr(out, regptrfrom, regptrfrom); + emit_str(out, regptrto, regptrfrom); +} +static void emit_read_ptr32_regptrFROM_regTO(t_bytes *out, int regptr, int regout) { + emit_ldr(out, regptr, regout); +} + +static void emit_write_ptr32_reg(t_bytes *out, uintptr_t ptr, int reg) { + //LOGE("pointer = %u", ptr); + emit_ptr(out, (uintptr_t)ptr, R8); + emit_str(out, R8, reg); +} + +static void emit_writeBYTE_ptr32_regptrTO_immFROM(t_bytes *out, int regptr, int val) { /* reg contains ptr */ + emit_movimm(out, (uintptr_t)val, R9); + emit_strb(out, regptr, R9); +} +static void emit_writeBYTE_ptr32_regptrTO_regFROM(t_bytes *out, int regptr, int reg) { /* reg contains ptr */ + emit_strb(out, regptr, reg); +} + +static void emit_writeBYTE_ptr32_regptrTO_regptrFROM(t_bytes *out, int regptrto, int regptrfrom) { /* reg contains ptr */ + emit_ldrb(out, regptrfrom, regptrfrom); + emit_strb(out, regptrto, regptrfrom); +} +static void emit_readBYTE_ptr32_regptrFROM_regTO(t_bytes *out, int regptr, int regout) { + emit_ldrb(out, regptr, regout); +} + +static void emit_writeWORD_ptr32_regptrTO_immFROM(t_bytes *out, int regptr, int val) { /* reg contains ptr */ + emit_movimm(out, (uintptr_t)val, R9); + emit_strw(out, regptr, R9); +} +static void emit_writeWORD_ptr32_regptrTO_regFROM(t_bytes *out, int regptr, int reg) { /* reg contains ptr */ + emit_strw(out, regptr, reg); +} +static void emit_writeWORD_ptr32_regptrTO_regptrFROM(t_bytes *out, int regptrto, int regptrfrom) { /* reg contains ptr */ + emit_ldrw(out, regptrfrom, regptrfrom); + emit_strw(out, regptrto, regptrfrom); +} +static void emit_readWORD_ptr32_regptrFROM_regTO(t_bytes *out, int regptr, int regout) { + emit_ldrw(out, regptr, regout); +} + +static void emit_add_ptr32_regptrTO_regFROM(t_bytes *out, int regptr, int reg) { /* reg contains ptr */ + emit_ldr(out, regptr, R10); + emit_add(out, R10, reg, R10); + emit_str(out, regptr, R10); +} +static void emit_adds_ptr32_regptrTO_regFROM(t_bytes *out, int regptr, int reg) { /* reg contains ptr */ + emit_ldr(out, regptr, R10); + emit_adds(out, R10, reg, R10); + emit_str(out, regptr, R10); +} +static void emit_add_ptr32_regptrTO_immFROM(t_bytes *out, int regptr, int val) { /* reg contains ptr */ + emit_movimm(out, (uintptr_t)val, R9); + emit_ldr(out, regptr, R10); + emit_add(out, R10, R9, R10); + emit_str(out, regptr, R10); +} + +static void emit_adds_ptr32_regptrTO_immFROM(t_bytes *out, int regptr, int val) { /* reg contains ptr */ + emit_movimm(out, (uintptr_t)val, R9); + emit_ldr(out, regptr, R10); + emit_adds(out, R10, R9, R10); + emit_str(out, regptr, R10); +} + +static void emit_sub_ptr32_regptrTO_regFROM(t_bytes *out, int regptr, int reg) { /* reg contains ptr */ + emit_ldr(out, regptr, R10); + emit_sub(out, R10, reg, R10); + emit_str(out, regptr, R10); +} +static void emit_subs_ptr32_regptrTO_regFROM(t_bytes *out, int regptr, int reg) { /* reg contains ptr */ + emit_ldr(out, regptr, R10); + emit_subs(out, R10, reg, R10); + emit_str(out, regptr, R10); +} +static void emit_sub_ptr32_regptrTO_immFROM(t_bytes *out, int regptr, int val) { /* reg contains ptr */ + emit_movimm(out, (uintptr_t)val, R9); + emit_ldr(out, regptr, R10); + emit_sub(out, R10, R9, R10); + emit_str(out, regptr, R10); +} +static void emit_subs_ptr32_regptrTO_immFROM(t_bytes *out, int regptr, int val) { /* reg contains ptr */ + emit_movimm(out, (uintptr_t)val, R9); + emit_ldr(out, regptr, R10); + emit_subs(out, R10, R9, R10); + emit_str(out, regptr, R10); +} + +static void emit_neg_ptr32_regptr(t_bytes *out, int regptr) { /* reg contains ptr */ + emit_ldr(out, regptr, R10); + emit_neg(out, R10, R10); + emit_str(out, regptr, R10); +} + +static void emit_negs_ptr32_regptr(t_bytes *out, int regptr) { /* reg contains ptr */ + emit_ldr(out, regptr, R10); + emit_negs(out, R10, R10); + emit_str(out, regptr, R10); +} + +static void emit_ror_ptr32_regptr(t_bytes *out, int regptr, int shift) { + emit_ldr(out, regptr, R10); + emit_ror(out, R10, shift); + emit_str(out, regptr, R10); +} + +static void emit_rors_ptr32_regptr(t_bytes *out, int regptr, int shift) { + emit_ldr(out, regptr, R10); + emit_rors(out, R10, shift); + emit_str(out, regptr, R10); +} + +static void emit_rors_ptr32_regptr_reg(t_bytes *out, int regptr, int regimm) { + emit_ldr(out, regptr, R10); + emit_rors_reg(out, R10, regimm); + emit_str(out, regptr, R10); +} + +static void emit_ror_ptr32_regptr_reg(t_bytes *out, int regptr, int regimm) { + emit_ldr(out, regptr, R10); + emit_ror_reg(out, R10, regimm); + emit_str(out, regptr, R10); +} + +static void emit_cmpimm(t_bytes *out, int rs,int imm) +{ + if (imm == 0) { + output_w32(out, 0x7100001F|rs<<5); // cmp w#, #0 + } + else { + emit_movimm(out, imm, 16); + emit_cmp(out, rs, 16); + } +} + +static void emit_testimm(t_bytes *out, int rs,int imm) +{ + u_int armval, ret; + ret=genimm2(imm,32,&armval); + output_w32(out, 0x72000000|armval<<10|rs<<5|WZR); +} + +static void emit_testimm64(t_bytes * out, int rs,int64_t imm) +{ + u_int armval, ret; + ret=genimm2(imm,64,&armval); + output_w32(out, 0xf2000000|armval<<10|rs<<5|WZR); +} + +static void emit_orimm(t_bytes *out, int rs,int imm,int rt) +{ + u_int armval; + if(imm==0) { + if(rs!=rt) emit_mov(out, rs,rt); + }else if(false && genimm2(imm,32,&armval)) { + output_w32(out, 0x32000000|armval<<10|rs<<5|rt); + }else{ + emit_movimm(out, imm,R9); + output_w32(out, 0x2a000000|R9<<16|rs<<5|rt); + } +} + +static void emit_xorimm(t_bytes *out, int reg, int val, int rt) { + emit_movimm(out, val, R10); + emit_xor(out, reg, R10, rt); +} + +static void emit_andimm(t_bytes *out,int rs,int imm,int rt) +{ + u_int armval; + if(imm==0) { + emit_zeroreg(out, rt); + }else if(false && genimm2((uint64_t)imm,32,&armval)) { + output_w32(out, 0x12000000|armval<<10|rs<<5|rt); + }else{ + emit_movimm(out, imm,R10); + output_w32(out, 0x0a000000|R10<<16|rs<<5|rt); + } +} + +static void emit_andsimm(t_bytes *out,int rs,int imm,int rt) +{ + u_int armval; + if(imm==0) { + emit_zeroreg(out, rt); + emit_cmp(out, rt, rt); + }else if(genimm2((uint64_t)imm,32,&armval)) { + output_w32(out, 0x72000000|armval<<10|rs<<5|rt); + }else{ + emit_movz(out, imm,R10); + output_w32(out, 0x6a000000|R10<<16|rs<<5|rt); + } +} + +static void emit_subimm(t_bytes *out, int reg, int val, int rt) { + emit_movimm(out, val, R10); + emit_sub(out, reg, R10, rt); +} + +static void emit_subsimm(t_bytes *out, int reg, int val, int rt) { + emit_movimm(out, val, R10); + emit_subs(out, reg, R10, rt); +} + +static void emit_and_ptr32_regptr(t_bytes *out, int regptr, int bits) { + emit_ldr(out, regptr, 13); + emit_andimm(out, 13, bits, 13); + emit_str(out, regptr, 13); +} + +static void emit_or_ptr32_regptr(t_bytes *out, int regptr, int bits) { + emit_ldr(out, regptr, 13); + emit_orimm(out, 13, bits, 13); + emit_str(out, regptr, 13); +} + +static void emit_or_ptr8_regptr_reg(t_bytes *out, int regptr, int reg) { + emit_ldrb(out, regptr, 13); + emit_or(out, 13, reg, 13); + emit_strb(out, regptr, 13); +} +static void emit_or_ptr8_regptr_imm(t_bytes *out, int regptr, int imm) { + emit_ldrb(out, regptr, 13); + emit_orimm(out, 13, imm, 13); + emit_strb(out, regptr, 13); +} + +static void emit_or_ptr32_regptr_reg(t_bytes *out, int regptr, int reg) { + emit_ldr(out, regptr, 13); + emit_or(out, 13, reg, 13); + emit_str(out, regptr, 13); +} + +static void emit_and_ptr32_regptr_reg(t_bytes *out, int regptr, int reg) { + emit_ldr(out, regptr, 13); + emit_and(out, 13, reg, 13); + emit_str(out, regptr, 13); +} + +#define Z 0 +#define NE 1 +#define CS 2 +#define CC 3 +#define SS 4 +#define O 6 + +static void emit_setc(t_bytes *out, int reg) { + output_w32(out, 0x1A9F37E0|reg); +} + +static void emit_sets(t_bytes *out, int reg) { + output_w32(out, 0x1A9F57E0|reg); +} + +static void emit_setz(t_bytes *out, int reg) { + output_w32(out, 0x1A9F17E0|reg); +} + +static void emit_setnz(t_bytes *out, int reg) { + output_w32(out, 0x1A9F07E0|reg); +} + +static void emit_setnc(t_bytes *out, int reg) { + output_w32(out, 0x1A9F27E0|reg); +} +static void emit_seto(t_bytes *out, int reg) { + output_w32(out, 0x1A9F77E0|reg); +} + +static void emit_write_ptr32_reg_byte(t_bytes *out, uintptr_t ptr, int reg) { + emit_movimm(out, (uintptr_t)ptr, R8); + + emit_strb(out, R8, reg); +} + +static void emit_lea_reg_reg(t_bytes *out, int reg1, int reg, int scale, int outreg) { + if (scale == 2) emit_lsl(out, reg, 1); // MOV r10, r10, LSL #1 (multiple by 2) + else if (scale == 4) emit_lsl(out, reg, 2); // MOV r10, r10, LSL #2 (multiple by 4) + else if (scale == 8) emit_lsl(out, reg, 3); // MOV r10, r10, LSL #3 (multiple by 8) + + emit_add(out, reg, reg1, R10); + emit_mov(out, R10,outreg ); +} + +static void emit_lea_ptr_reg(t_bytes *out, int ptr, int reg, int scale, int outreg) { + emit_movimm(out, ptr, R10); + + if (scale == 2) emit_lsl(out, reg, 1); // MOV r10, r10, LSL #1 (multiple by 2) + else if (scale == 4) emit_lsl(out, reg, 2); // MOV r10, r10, LSL #2 (multiple by 4) + else if (scale == 8) emit_lsl(out, reg, 3); // MOV r10, r10, LSL #3 (multiple by 8) + + emit_add(out, reg, R10, R10); + emit_mov(out, R10, outreg); +} + +static void emit_lea_ptr_val(t_bytes *out, int ptr, int val, int scale, int outreg) { + emit_movimm(out, ptr, R10); + emit_movimm(out, val, R9); + + if (scale == 2) emit_lsl(out, R9, 2); // MOV r10, r10, LSL #1 (multiple by 2) + else if (scale == 4) emit_lsl(out, R9, 4); // MOV r10, r10, LSL #2 (multiple by 4) + else if (scale == 8) emit_lsl(out, R9, 8); // MOV r10, r10, LSL #3 (multiple by 8) + + emit_add(out, R9, R10, R10); + + emit_mov(out, outreg, R10); +} + +static void clear_carry_flag(t_bytes *out) { + output_w32(out, 0xD53B4213); /* mrs x19, nzcv */ + output_w32(out, 0x9262FA73); // bic x19, x19, #(1<<29) + output_w32(out, 0xD51B4213); // msr nzcv, x19 +} + +static void set_carry_flag(t_bytes *out) { + output_w32(out, 0xD53B4213); /* mrs x19, nzcv */ + output_w32(out, 0xB2630273); // orr x19, x19, #(1<<29) + output_w32(out, 0xD51B4213); // msr nzcv, x19 +} + +static void get_flags(int reg, int v); + +static void get_carry_flag(t_bytes *out, int reg) { + get_flags(reg, VALUE_NO_CACHE); + output_w32(out, 0x121B0000|reg|reg<<5); // and w0, w0, #(1<<5) + output_w32(out, 0x53057C00|reg|reg<<5); // lsr w0, w0, #5 +} + +static void emit_read_ptr32_reg(t_bytes *out, uintptr_t ptr, int rs) { + emit_ptr(out, (uintptr_t)ptr, R10); + emit_ldr(out, R10, rs); +} + +static void emit_read_ptr64_reg(t_bytes *out, uintptr_t ptr, int rs) { + emit_ptr(out, (uintptr_t)ptr, R10); + emit_ldr64(out, R10, rs); +} + +static void emit_read_ptr32_reg_byte(t_bytes *out, uintptr_t ptr, int rs) { + emit_ptr(out, (uintptr_t)ptr, R8); + emit_ldrb(out, R8, rs); +} + + +static void emit_var_copy32(t_bytes *out, uintptr_t from, uintptr_t to) { + emit_ptr(out, (uintptr_t)from, R7); + emit_ptr(out, (uintptr_t)to, R7); +} + +static void emit_var_copy32_reg(t_bytes *out, int from, int to) { + emit_str(out, to, from); +} + +static void emit_set_ptr_imm(t_bytes *out, uintptr_t ptr, int set) { + emit_movimm(out, set, R10); + emit_write_ptr32_reg(out, ptr, R10); +} + +static void emit_inc_ptr_imm(t_bytes *out, uintptr_t ptr, int add) { + emit_read_ptr32_reg(out, ptr, R10); + if (add > 0) { + emit_movimm(out, add, R9); + emit_add(out, R10, R9, R10); + } + else { + emit_movimm(out, add*-1, R9); + emit_sub(out, R10, R9, R10); + } + emit_write_ptr32_reg(out, ptr, R10); +} + +static void emit_sub_ptr_imm(t_bytes *out, uintptr_t ptr, int minus) { + emit_read_ptr32_reg(out, ptr, R10); + emit_movimm(out, minus, R9); + emit_sub(out, R10, R9, R10); + emit_write_ptr32_reg(out, ptr, R10); +} + +static void emit_inc_ptr(t_bytes *out, uintptr_t ptr, uintptr_t ptr2) { + emit_read_ptr32_reg(out, ptr, R2); + emit_read_ptr32_reg(out, ptr2, R3); + emit_add(out, R2, R3, R2); + emit_write_ptr32_reg(out, ptr, R2); +} + +static void emit_inc_ptr_reg(t_bytes *out, uintptr_t ptr, uintptr_t ptr2, int reg) { + emit_read_ptr32_reg(out, ptr, R7); + emit_read_ptr32_reg(out, ptr2, R9); + emit_add(out, reg, R9, R7); +} + +static void emit_inc_reg(t_bytes *out, uintptr_t ptr, int reg) { + emit_read_ptr32_reg(out, ptr, R9); + emit_add(out, reg, R9, reg); +} + +static void emit_brk(t_bytes *out) { + output_w32(out, 0xD4200000); +} + +static void save(t_bytes *out) { +// if debug +#ifdef DEBUG_JIT + output_w32(out, 0xA9BF0BE1); // stp x1, x2, [sp, #-16]! + output_w32(out, 0xA9BF13E3); // stp x3, x4, [sp, #-16]! + output_w32(out, 0xA9BF1BE5); // stp x5, x6, [sp, #-16]! + output_w32(out, 0xA9BF23E7); // stp x7, x8, [sp, #-16]! + output_w32(out, 0xA9BF2BE9); // stp x9, x10, [sp, #-16]! + output_w32(out, 0xA9BF33EB); // stp x11, x12, [sp, #-16]! +#endif + +// end if debug + output_w32(out, 0xA9BF53F3); // stp x19, x20, [sp, #-16]! + output_w32(out, 0xA9BF5BF5); // stp x21, x22, [sp, #-16]! + output_w32(out, 0xF81F0FF7); // str x23, [sp, #-16]! +} + +static void restore(t_bytes *out) { + output_w32(out, 0xF84107F7); // ldr x23, [sp], #16 + output_w32(out, 0xA8C15BF5); // ldp x21, x22, [sp], #16 + output_w32(out, 0xA8C153F3); // ldp x19, x20, [sp], #16 + // if debug + +#ifdef DEBUG_JIT + output_w32(out, 0xA8C133EB); // ldp x11, x12, [sp], #16 + output_w32(out, 0xA8C12BE9); // ldp x9, x10, [sp], #16 + output_w32(out, 0xA8C123E7); // ldp x7, x8, [sp], #16 + output_w32(out, 0xA8C11BE5); // ldp x5, x6, [sp], #16 + output_w32(out, 0xA8C113E3); // ldp x3, x4, [sp], #16 + output_w32(out, 0xA8C10BE1); // ldp x1, x2, [sp], #16 +#endif + // end if debug +} + +static void emit_startfunc(t_bytes *out) { + output_w32(out, 0xA9BF77FE); // stp lr, fp, [SP, #-16]! + save(out); +} + +static void emit_endfunc(t_bytes *out) { + + restore(out); + + output_w32(out, 0xA8C177FE); // ldp lr, fp, [sp], #16 + output_w32(out, 0xD65F03C0); // ret +} + +static void save_call(t_bytes *out) { +// if debug + output_w32(out, 0xA9BF0BE1); // stp x1, x2, [sp, #-16]! + output_w32(out, 0xA9BF13E3); // stp x3, x4, [sp, #-16]! + output_w32(out, 0xA9BF1BE5); // stp x5, x6, [sp, #-16]! + output_w32(out, 0xA9BF23E7); // stp x7, x8, [sp, #-16]! + output_w32(out, 0xA9BF2BE9); // stp x9, x10, [sp, #-16]! + output_w32(out, 0xA9BF33EB); // stp x11, x12, [sp, #-16]! +/* +// end if debug + output_w32(out, 0xA9BF53F3); // stp x19, x20, [sp, #-16]! + output_w32(out, 0xA9BF5BF5); // stp x21, x22, [sp, #-16]! + output_w32(out, 0xF81F0FF7); // str x23, [sp, #-16]!*/ +} + +static void restore_call(t_bytes *out) { +/* output_w32(out, 0xF84107F7); // ldr x23, [sp], #16 + output_w32(out, 0xA8C15BF5); // ldp x21, x22, [sp], #16 + output_w32(out, 0xA8C153F3); // ldp x19, x20, [sp], #16 + // if debug + +*/ + output_w32(out, 0xA8C133EB); // ldp x11, x12, [sp], #16 + output_w32(out, 0xA8C12BE9); // ldp x9, x10, [sp], #16 + output_w32(out, 0xA8C123E7); // ldp x7, x8, [sp], #16 + output_w32(out, 0xA8C11BE5); // ldp x5, x6, [sp], #16 + output_w32(out, 0xA8C113E3); // ldp x3, x4, [sp], #16 + output_w32(out, 0xA8C10BE1); // ldp x1, x2, [sp], #16 + // end if debug +} + +static void callR3(t_bytes *out) { + save_call(out); + output_w32(out, 0xD63F0060); // blr x3 + restore_call(out); +} +static void callR9(t_bytes *out) { + save_call(out); + output_w32(out, 0xD63F0120); // blr x9 + restore_call(out); +} +static void callR7(t_bytes *out) { + save_call(out); + output_w32(out, 0xD63F00E0); // blr x7 + restore_call(out); +} +static void callR6(t_bytes *out) { + save_call(out); + output_w32(out, 0xD63F00C0); // blr x6 + restore_call(out); +} +static void callR8(t_bytes *out) { + save_call(out); + output_w32(out, 0xD63F0100); // blr x8 + restore_call(out); +} + + +//----------------------------------------------------------------------------- +// Shifting macros +//----------------------------------------------------------------------------- +#define SET_NZCV(sign) { \ + JIT_COMMENT("SET_NZCV"); \ + output_w32(g_out, 0xD53B4201); /* mrs x1, nzcv */ \ + flags_ptr(y_reg, VALUE); \ + output_w32(g_out, 0xD358FC21); /* lsr x1, x1, #0x18 */\ + output_w32(g_out, 0x927C0C21); /* and x1, x1, #0xf0 */ \ + output_w32(g_out, 0x92400D29); /* and x9, x9, #0xf */ \ + emit_or64(g_out, R1, y_reg, R1); \ + emit_writeBYTE_ptr32_regptrTO_regFROM(g_out, CACHED_PTR(flags_ptr(R0, ADDRESS)), R1); \ +} + +#define SET_NZC { \ + JIT_COMMENT("SET_NZC"); \ + output_w32(g_out, 0xD53B4201); /* mrs x1, nzcv */ \ + flags_ptr(y_reg, VALUE); \ + output_w32(g_out, 0xD358FC21); /* lsr x1, x1, #0x18 */\ + output_w32(g_out, 0x927A0421); /* and x1, x1, #0xc0 */ \ + if (cf_change) { output_w32(g_out, 0xAA051421); /* orr x1, x1, x5, lsl #5 - x5 == rcf */ } \ + output_w32(g_out, cf_change ? 0x92401129 /* and x9, x9, #0x1f */ : 0x92401529 /* and x9, x9, #0x3f */ ); \ + emit_or64(g_out, R1, y_reg, R1); \ + emit_writeBYTE_ptr32_regptrTO_regFROM(g_out, CACHED_PTR(flags_ptr(R0, ADDRESS)), R1); \ +} + +#define SET_NZC_SHIFTS_ZERO(cf) { \ + JIT_COMMENT("SET_NZC_SHIFTS_ZERO"); \ + flags_ptr(R3, VALUE); \ + emit_and_ptr32_regptr(g_out, CACHED_PTR(flags_ptr(R1, ADDRESS)), 0x1F); \ + if(cf) \ + { \ + emit_lsl(g_out, rcf_reg, 5); \ + emit_orimm(g_out,rcf_reg, (1<<6), rcf_reg); \ + emit_or_ptr32_regptr_reg(g_out, CACHED_PTR(flags_ptr(R1, ADDRESS)), rcf_reg); \ + } \ + else \ + emit_or_ptr32_regptr(g_out, CACHED_PTR(flags_ptr(R1, ADDRESS)), (1<<6)); \ +} + +#define SET_NZ(clear_cv) { \ + output_w32(g_out, 0xD53B4201); /* mrs x1, nzcv */ \ + flags_ptr(y_reg, VALUE); \ + output_w32(g_out, 0xD358FC21); /* lsr x1, x1, #0x18 */ \ + output_w32(g_out, 0x927A0421); /* and x1, x1, #0xc0 */ \ + output_w32(g_out, clear_cv ? 0x92400D29 /* and x9, x9, #0xf */ : 0x92401529 /* and x9, x9, #0x3f */ );\ + emit_or64(g_out, R1, y_reg, R1); \ + emit_writeBYTE_ptr32_regptrTO_regFROM(g_out, CACHED_PTR(flags_ptr(R0, ADDRESS)), R1); \ +} + +#define SET_N { \ + output_w32(g_out, 0xD53B4201); /* mrs x1, nzcv */ \ + flags_ptr(y_reg, VALUE); \ + output_w32(g_out, 0xD358FC21); /* lsr x1, x1, #0x18 */\ + output_w32(g_out, 0x92790021); /* and x1, x1, #0x80 */ \ + output_w32(g_out, 0x92401929); /* and x9, x9, #0x7f */ \ + emit_or64(g_out, R1, y_reg, R1); \ + emit_writeBYTE_ptr32_regptrTO_regFROM(g_out, CACHED_PTR(flags_ptr(R0, ADDRESS)), R1); \ +} + +#define SET_Z { \ + output_w32(g_out, 0xD53B4201); /* mrs x1, nzcv */ \ + flags_ptr(y_reg, VALUE); \ + output_w32(g_out, 0xD358FC21); /* lsr x1, x1, #0x18 */\ + /*if (cf_change) { output_w32(g_out, 0xE1811085);*/ /* ORR R1, R5, LSL #1 *//*}*/ \ + output_w32(g_out, 0x927A0021); /* and x1, x1, #0x40 */ \ + emit_andimm(g_out, R9, 0xbf, R9); /* and sb, sb, #0xbf */ \ + emit_or64(g_out, R1, y_reg, R1); \ + emit_writeBYTE_ptr32_regptrTO_regFROM(g_out, CACHED_PTR(flags_ptr(R0, ADDRESS)), R1); \ +} + +#define SET_Q { \ + flags_ptr(y_reg, VALUE); \ + emit_seto(g_out, y_reg); \ + emit_lsl(g_out, y_reg, 3); \ + emit_or_ptr32_regptr_reg(g_out, CACHED_PTR(flags_ptr(R1, ADDRESS)), y_reg); \ +} + +#define GET_CARRY(invert) { \ + flags_ptr(R2, VALUE); \ + int setb=genlabel(); \ + int done=genlabel(); \ + output_w32(g_out, 0x721B0042); /* ands w2, w2, #0x20 */ \ + emit_branch_label(g_out, setb, 0x1); \ + clear_carry_flag(g_out); \ + emit_branch_label(g_out, done, 0xE); \ + emit_label(g_out, (int)setb); \ + set_carry_flag(g_out); \ + emit_label(g_out, (int)done); \ +} diff --git a/desmume/src/utils/arm_jit/arm_emit.cpp b/desmume/src/utils/arm_jit/arm_emit.cpp new file mode 100755 index 000000000..1fb54a6d8 --- /dev/null +++ b/desmume/src/utils/arm_jit/arm_emit.cpp @@ -0,0 +1,1310 @@ +/* + Copyright (C) 2022 Patrick Herlihy - https://byte4byte.com + Copyright (C) 2022 DeSmuME team + + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This file is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the this software. If not, see . +*/ + +#include +#include +#include +#include + +#define R11 11 +#define R10 10 +#define FP 11 +#define LR 14 +#define PC 15 +#define SP 13 +#define R0 0 +#define R1 1 +#define R2 2 +#define R3 3 +#define R4 4 +#define R5 5 +#define R6 6 +#define R7 7 +#define R8 8 +#define R9 9 + +#define emit_write_ptr(o, p1, p2) emit_write_ptr32(o, p1, p2) +#define emit_ptr(o, ptr, r) emit_movimm(o, ptr, r) +#define emit_add64(o, r1, r2, r3) emit_add(o, r1, r2, r3) +#define emit_mov64(o, r1, r2) emit_mov(o, r1, r2) +#define emit_read_ptr64_reg(o, s, r) emit_read_ptr32_reg(o, s, r) +#define emit_addimm64(o, r1, r2, r3) emit_addimm(o, r1, r2, r3) + +static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm) +{ + return((rn<<16)|(rd<<12)|rm); +} + +static u_int rd_rn_rm_lsl(u_int rd, u_int rn, u_int rm, u_int shift) +{ + return((rn<<16)|(rd<<12)|rm)|((shift*8)<<4); +} + +struct s_bytes; +typedef struct s_bytes t_bytes; + +static void emit_setc(t_bytes *out, int reg); + +static void output_w32(t_bytes *bytes, unsigned int word); + +static int g_allocedSize = 0; + +static u_int genimm(u_int imm,u_int *encoded) +{ + if(imm==0) {*encoded=0;return 1;} + int i=32; + while(i>0) + { + if(imm<256) { + *encoded=((i&30)<<7)|imm; + return 1; + } + imm=(imm>>2)|(imm<<30);i-=2; + } + return 0; +} + +static void output_w32_FROM_PC(t_bytes *bytes, unsigned int word); +static void emit_movimm(t_bytes *out, u_int imm,u_int rt); +static void emit_movimm2pad(t_bytes *out, u_int imm,u_int rt); +static void emit_addnop(t_bytes *out, u_int r); + +static void output_w32(t_bytes *bytes, u32 word); + +static void output_w32_full(t_bytes *bytes, unsigned int word, int type, int label_id, int cond); + +static void emit_movimm(t_bytes *out, u_int imm,u_int rt); +static void emit_mov(t_bytes *out, int rs,int rt); +static void emit_jmpreg(t_bytes *out, u_int r); + +static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift) +{ + return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm); +} + + +static u_int rd_rn_rm_asr(u_int rd, u_int rn, u_int rm, u_int shift) +{ + return((rn<<16)|(rd<<12)|rm)|((shift*8+(0xC-8))<<4); +} + +static u_int rd_rn_rm_lsr(u_int rd, u_int rn, u_int rm, u_int shift) +{ + return((rn<<16)|(rd<<12)|rm)|((shift*8+(0xA - 8))<<4); +} + +static u_int rd_rn_rm_ror(u_int rd, u_int rn, u_int rm, u_int shift) +{ + return((rn<<16)|(rd<<12)|rm)|((shift*8+(0xE - 8))<<4); +} + + +static void emit_mov_lsl(t_bytes *out, int rs, int rt, int shift) { + output_w32(out, 0xe1a00000|rd_rn_rm_lsl(rt,0, rs, shift)); +} +static void emit_mov_lsr(t_bytes *out, int rs, int rt, int shift) { + output_w32(out, 0xE1A00000|rd_rn_rm_lsr(rt,0, rs, shift)); +} +static void emit_mov_ror(t_bytes *out, int rs, int rt, int shift) { + output_w32(out, 0xE1A00000|rd_rn_rm_ror(rt,0, rs, shift)); +} +static void emit_mov_asr(t_bytes *out, int rs, int rt, int shift) { + output_w32(out, 0xE1A00000|rd_rn_rm_asr(rt,0, rs, shift)); +} +static void emit_mov_asl(t_bytes *out, int rs, int rt, int shift) { + emit_mov_lsl(out, rs, rt, shift); +} + +#define SHIFT_REG R0 + + +static unsigned int shift_reg(int reg, int num) { + return (reg<<12)|reg|(num<<8); +} +static void emit_lsl(t_bytes *out, int reg, int n) { + if (n < 32) { + output_w32(out, 0xE1A00000|rd_rn_rm_lsl(reg,0, reg, n)); + } + else { + emit_movimm(out, n, SHIFT_REG); + output_w32(out, 0xE1A00010|shift_reg(reg, SHIFT_REG)); + } +} +static void emit_lsr(t_bytes *out, int reg, int n) { + if (n < 32) { + output_w32(out, 0xE1A00000|rd_rn_rm_lsr(reg,0, reg, n)); + } + else { + emit_movimm(out, n, SHIFT_REG); + output_w32(out, 0xE1A00030|shift_reg(reg, SHIFT_REG)); + } +} +static void emit_asr(t_bytes *out, int reg, int n) { + if (n < 32) { + output_w32(out, 0xE1A00000|rd_rn_rm_asr(reg,0, reg, n)); + } + else { + emit_movimm(out, n, SHIFT_REG); + output_w32(out, 0xE1A00050|shift_reg(reg, SHIFT_REG)); + } +} +static void emit_ror(t_bytes *out, int reg, int n) { + if (n < 32) { + output_w32(out, 0xE1A00000|rd_rn_rm_ror(reg,0, reg, n)); + } + else { + emit_movimm(out, n, SHIFT_REG); + output_w32(out, 0xE1A00070|shift_reg(reg, SHIFT_REG)); + } +} +static void emit_rrx(t_bytes *out, int reg, int n) { + + emit_movimm(out, n, SHIFT_REG); + output_w32(out, 0xE1A00060|shift_reg(reg, SHIFT_REG)); +} +static void emit_lsl_reg(t_bytes *out, int reg, int nreg) { + output_w32(out, 0xE1A00010|shift_reg(reg, nreg)); +} +static void emit_lsr_reg(t_bytes *out, int reg, int nreg) { + output_w32(out, 0xE1A00030|shift_reg(reg, nreg)); +} +static void emit_asr_reg(t_bytes *out, int reg, int nreg) { + output_w32(out, 0xE1A00050|shift_reg(reg, nreg)); +} +static void emit_ror_reg(t_bytes *out, int reg, int nreg) { + output_w32(out, 0xE1A00070|shift_reg(reg, nreg)); +} +static void emit_rrx_reg(t_bytes *out, int reg, int nreg) { + output_w32(out, 0xE1A00060|shift_reg(reg, nreg)); +} + +static unsigned int set_flags() { + return 0x100000; +} + +static void emit_lsls(t_bytes *out, int reg, int n) { + if (n < 32) { + output_w32(out, 0xE1A00000|rd_rn_rm_lsl(reg,0, reg, n)|set_flags()); + emit_setc(out, 5); + } + else { + emit_movimm(out, n, SHIFT_REG); + output_w32(out, 0xE1A00010|shift_reg(reg, SHIFT_REG)|set_flags()); + emit_setc(out, 5); + } +} +static void emit_lsrs(t_bytes *out, int reg, int n) { + if (n < 32) { + output_w32(out, 0xE1A00000|rd_rn_rm_lsr(reg,0, reg, n)|set_flags()); + emit_setc(out, 5); + } + else { + emit_movimm(out, n, SHIFT_REG); + output_w32(out, 0xE1A00030|shift_reg(reg, SHIFT_REG)|set_flags()); + emit_setc(out, 5); + } +} +static void emit_asrs(t_bytes *out, int reg, int n) { + if (n < 32) { + output_w32(out, 0xE1A00000|rd_rn_rm_asr(reg,0, reg, n)|set_flags()); + emit_setc(out, 5); + } + else { + emit_movimm(out, n, SHIFT_REG); + output_w32(out, 0xE1A00050|shift_reg(reg, SHIFT_REG)|set_flags()); + emit_setc(out, 5); + } +} +static void emit_rors(t_bytes *out, int reg, int n) { + if (n < 32) { + output_w32(out, 0xE1A00000|rd_rn_rm_ror(reg,0, reg, n)|set_flags()); + emit_setc(out, 5); + } + else { + emit_movimm(out, n, SHIFT_REG); + output_w32(out, 0xE1A00070|shift_reg(reg, SHIFT_REG)|set_flags()); + emit_setc(out, 5); + } +} +static void emit_rrxs(t_bytes *out, int reg, int n) { + emit_movimm(out, n, SHIFT_REG); + output_w32(out, 0xE1A00060|shift_reg(reg, SHIFT_REG)|set_flags()); + emit_setc(out, 5); +} +static void emit_lsls_reg(t_bytes *out, int reg, int nreg) { + output_w32(out, 0xE1A00010|shift_reg(reg, nreg)|set_flags()); + emit_setc(out, 5); +} +static void emit_lsrs_reg(t_bytes *out, int reg, int nreg) { + output_w32(out, 0xE1A00030|shift_reg(reg, nreg)|set_flags()); + emit_setc(out, 5); +} +static void emit_asrs_reg(t_bytes *out, int reg, int nreg) { + output_w32(out, 0xE1A00050|shift_reg(reg, nreg)|set_flags()); + emit_setc(out, 5); +} +static void emit_rors_reg(t_bytes *out, int reg, int nreg) { + output_w32(out, 0xE1A00070|shift_reg(reg, nreg)|set_flags()); + emit_setc(out, 5); +} +static void emit_rrxs_reg(t_bytes *out, int reg, int nreg) { + output_w32(out, 0xE1A00060|shift_reg(reg, nreg)|set_flags()); + emit_setc(out, 5); +} + +static void emit_mov(t_bytes *out, int rs,int rt) +{ + output_w32(out, 0xe1a00000|rd_rn_rm(rt,0,rs)); +} + +static void emit_mvns(t_bytes *out, int rs,int rt) +{ + output_w32(out, 0xE1E00000|rd_rn_rm(rt,0,rs)|set_flags()); +} + +static void emit_mov_cond(t_bytes *out, int rs,int rt, int cond) +{ + output_w32(out, 0x01a00000|rd_rn_rm(rt,0,rs)|(cond<<28)); +} + +static void emit_movs(t_bytes *out, int rs,int rt) +{ + output_w32(out, 0xe1b00000|rd_rn_rm(rt,0,rs)); +} + +static void emit_add(t_bytes *out, int rs1,int rs2,int rt) +{ + output_w32(out, 0xe0800000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_adds(t_bytes *out, int rs1,int rs2,int rt) +{ + output_w32(out, 0xe0900000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_adc(t_bytes *out, int rs1,int rs2,int rt) +{ + output_w32(out, 0xe0a00000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_adcs(t_bytes *out, int rs1,int rs2,int rt) +{ + output_w32(out, 0xe0b00000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_sbc(t_bytes *out, int rs1,int rs2,int rt) +{ + output_w32(out, 0xe0c00000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_sbcs(t_bytes *out, int rs1,int rs2,int rt) +{ + output_w32(out, 0xe0d00000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_neg(t_bytes *out, int rs, int rt) +{ + output_w32(out, 0xe2600000|rd_rn_rm(rt,rs,0)); +} + +static void emit_negs(t_bytes *out, int rs, int rt) +{ + output_w32(out, 0xe2700000|rd_rn_rm(rt,rs,0)); +} + +static void emit_sub(t_bytes *out, int rs1,int rs2,int rt) +{ + output_w32(out, 0xe0400000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_subs(t_bytes *out, int rs1,int rs2,int rt) +{ + output_w32(out, 0xe0500000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_zeroreg(t_bytes *out, int rt) +{ + output_w32(out, 0xe3a00000|rd_rn_rm(rt,0,0)); +} + +static void emit_movw(t_bytes *out, u_int imm,u_int rt) +{ + output_w32(out, 0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000)); +} +static void emit_movt(t_bytes *out, u_int imm,u_int rt) +{ + output_w32(out, 0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000)); +} +static void emit_movw_cond(t_bytes *out, u_int imm,u_int rt, int cond) +{ + output_w32(out, (0x03000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000))|(cond<<28)); +} +static void emit_movt_cond(t_bytes *out, u_int imm,u_int rt, int cond) +{ + output_w32(out, (0x03400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000))|(cond<<28)); +} + +static void emit_movimms(t_bytes *out, u_int imm,u_int rt) +{ + u_int armval=0; + genimm(imm,&armval); + output_w32(out, 0xe3a00000|rd_rn_rm(rt,0,0)|armval|set_flags()); +} + +static void emit_movimm(t_bytes *out, u_int imm,u_int rt) +{ + u_int armval; + if(genimm(imm,&armval)) { + output_w32(out, 0xe3a00000|rd_rn_rm(rt,0,0)|armval); + }else if(genimm(~imm,&armval)) { + output_w32(out, 0xe3e00000|rd_rn_rm(rt,0,0)|armval); + }else if(imm<65536) { + #ifdef ARMv5_ONLY + output_w32(out, 0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8)); + output_w32(out, 0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0)); + #else + emit_movw(out, imm,rt); + emit_movt(out, 0, rt); + #endif + }else{ + #ifdef ARMv5_ONLY + emit_loadlp(out, imm,rt); + #else + emit_movw(out, imm&0x0000FFFF,rt); + emit_movt(out, imm&0xFFFF0000,rt); + #endif + } +} + +static void emit_test(t_bytes *out, int rs, int rt) +{ + output_w32(out, 0xe1100000|rd_rn_rm(0,rs,rt)); +} + +static void emit_testimm(t_bytes *out, int rs,int imm); + +static void emit_not(t_bytes *out, int rs,int rt) +{ + output_w32(out, 0xe1e00000|rd_rn_rm(rt,0,rs)); +} + +static void emit_and(t_bytes *out, u_int rs1,u_int rs2,u_int rt) +{ + output_w32(out, 0xe0000000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_or(t_bytes *out, u_int rs1,u_int rs2,u_int rt) +{ + output_w32(out, 0xe1800000|rd_rn_rm(rt,rs1,rs2)); +} +static void emit_or_and_set_flags(t_bytes *out, int rs1,int rs2,int rt) +{ + output_w32(out, 0xe1900000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_xor(t_bytes *out, u_int rs1,u_int rs2,u_int rt) +{ + output_w32(out, 0xe0200000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_ands(t_bytes *out, u_int rs1,u_int rs2,u_int rt) +{ + output_w32(out, 0xe0100000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_ors(t_bytes *out, u_int rs1,u_int rs2,u_int rt) +{ + emit_or_and_set_flags(out, rs1,rs2,rt); +} +static void emit_xors(t_bytes *out, u_int rs1,u_int rs2,u_int rt) +{ + output_w32(out, 0xe0300000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_addimm(t_bytes *out, u_int rs,int imm,u_int rt) +{ + if(imm!=0) { + u_int armval; + if(genimm(imm,&armval)) { + output_w32(out, 0xe2800000|rd_rn_rm(rt,rs,0)|armval); + }else if(genimm(-imm,&armval)) { + output_w32(out, 0xe2400000|rd_rn_rm(rt,rs,0)|armval); + }else if(imm<0) { + output_w32(out, 0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8)); + output_w32(out, 0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0)); + }else{ + output_w32(out, 0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8)); + output_w32(out, 0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0)); + } + } + else if(rs!=rt) emit_mov(out, rs,rt); +} + +static void emit_addsimm(t_bytes *out, u_int rs,int imm,u_int rt) +{ + if(imm!=0) { + u_int armval; + if(genimm(imm,&armval)) { + output_w32(out, 0xe2800000|set_flags()|rd_rn_rm(rt,rs,0)|armval); + }else if(genimm(-imm,&armval)) { + output_w32(out, 0xe2400000|set_flags()|rd_rn_rm(rt,rs,0)|armval); + }else if(imm<0) { + output_w32(out, 0xe2400000|set_flags()|rd_rn_imm_shift(rt,rs,(-imm)>>8,8)); + output_w32(out, 0xe2400000|set_flags()|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0)); + }else{ + output_w32(out, 0xe2800000|set_flags()|rd_rn_imm_shift(rt,rs,imm>>8,8)); + output_w32(out, 0xe2800000|set_flags()|rd_rn_imm_shift(rt,rt,imm&0xff,0)); + } + } + else if(rs!=rt) emit_mov(out, rs,rt); +} + +static void emit_addimm_and_set_flags(t_bytes *out,int imm,int rt) +{ + u_int armval; + if(genimm(imm,&armval)) { + output_w32(out, 0xe2900000|rd_rn_rm(rt,rt,0)|armval); + }else if(genimm(-imm,&armval)) { + output_w32(out, 0xe2500000|rd_rn_rm(rt,rt,0)|armval); + }else if(imm<0) { + output_w32(out, 0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8)); + output_w32(out, 0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0)); + }else{ + output_w32(out, 0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8)); + output_w32(out, 0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0)); + } +} + +static void emit_cmpimm(t_bytes *out, int reg, int val); + +static void emit_cmp(t_bytes *out, int rs,int rt) +{ + output_w32(out, 0xe1500000|rd_rn_rm(0,rs,rt)); +} + +static void emit_smulbb(t_bytes *out, u_int rs1, u_int rs2) { + output_w32(out, 0xE1600080|(rs1<<16)|(rs2<<8)|rs1); +} + +static void emit_smulbt(t_bytes *out, u_int rs1, u_int rs2) { + output_w32(out, 0xE16000C0|(rs1<<16)|(rs2<<8)|rs1); +} + +static void emit_smultb(t_bytes *out, u_int rs1, u_int rs2) { + output_w32(out, 0xE16000A0|(rs1<<16)|(rs2<<8)|rs1); +} + +static void emit_smultt(t_bytes *out, u_int rs1, u_int rs2) { + output_w32(out, 0xE16000E0|(rs1<<16)|(rs2<<8)|rs1); +} +static void emit_smulwt(t_bytes *out, u_int rs1, u_int rs2) { + output_w32(out, 0xE12000E0|(rs1<<16)|(rs2<<8)|rs1); +} +static void emit_smulwb(t_bytes *out, u_int rs1, u_int rs2) { + output_w32(out, 0xE12000A0|(rs1<<16)|(rs2<<8)|rs1); +} + +static void emit_smlawb(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + output_w32(out, 0xE1200080|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); +} +static void emit_smlawt(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + output_w32(out, 0xE12000C0|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); +} + +static void emit_smlabb(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + output_w32(out, 0xE1000080|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); +} +static void emit_smlabt(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + output_w32(out, 0xE10000C0|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); +} +static void emit_smlatb(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + output_w32(out, 0xE10000A0|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); +} +static void emit_smlatt(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + output_w32(out, 0xE10000E0|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); +} + +static void emit_smlalbb(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + output_w32(out, 0xE1400080|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); +} +static void emit_smlalbt(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + output_w32(out, 0xE14000C0|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); +} +static void emit_smlaltb(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + output_w32(out, 0xE14000A0|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); +} +static void emit_smlaltt(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + output_w32(out, 0xE14000E0|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); +} + +static void emit_mul(t_bytes *out, u_int rs1,u_int rs2) +{ + output_w32(out, 0xe0000090|(rs1<<16)|(rs2<<8)|rs1); +} +static void emit_muls(t_bytes *out, u_int rs1,u_int rs2) +{ + output_w32(out, 0xe0000090|(rs1<<16)|(rs2<<8)|rs1|set_flags()); +} + +static void emit_mla(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + output_w32(out, 0xE0200090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); +} +static void emit_mlas(t_bytes *out, u_int hi, u_int lo, u_int rs1, u_int rs2) +{ + output_w32(out, 0xE0200090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); +} + +static void emit_umull(t_bytes *out, u_int lo, u_int hi, u_int rs1, u_int rs2) +{ + output_w32(out, 0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); +} +static void emit_umlal(t_bytes *out, u_int lo, u_int hi, u_int rs1, u_int rs2) +{ + output_w32(out, 0xe0a00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); +} +static void emit_smull(t_bytes *out, u_int lo, u_int hi, u_int rs1, u_int rs2) +{ + output_w32(out, 0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); +} +static void emit_smulltemp(t_bytes *out, u_int hi, u_int lo, u_int rs2) +{ + emit_mov(out, lo, R0); + u_int rs1 = R0; + output_w32(out, 0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); +} +static void emit_smlal(t_bytes *out, u_int lo, u_int hi, u_int rs1, u_int rs2) +{ + output_w32(out, 0xe0e00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); +} + +static void emit_umulls(t_bytes *out, u_int lo, u_int hi, u_int rs1, u_int rs2) +{ + output_w32(out, 0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1|set_flags()); +} +static void emit_umlals(t_bytes *out, u_int lo, u_int hi, u_int rs1, u_int rs2) +{ + output_w32(out, 0xe0a00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1|set_flags()); +} +static void emit_smulls(t_bytes *out, u_int lo, u_int hi, u_int rs1, u_int rs2) +{ + output_w32(out, 0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1|set_flags()); +} +static void emit_smlals(t_bytes *out, u_int lo, u_int hi, u_int rs1, u_int rs2) +{ + output_w32(out, 0xe0e00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1|set_flags()); +} + +static void emit_clz(t_bytes *out, int rs,int rt) +{ + output_w32(out,0xe16f0f10|rd_rn_rm(rt,0,rs)); +} + +static void emit_subcs(t_bytes *out, int rs1,int rs2,int rt) +{ + output_w32(out,0x20400000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_teq(t_bytes *out, int rs, int rt) +{ + output_w32(out,0xe1300000|rd_rn_rm(0,rs,rt)); +} + +typedef unsigned int (*JittedFunc)(); + +#define MOD_JUMP(ptr, cond, lbl_bytes, jmp_bytes) *(ptr) = 0x0a000000|(((lbl_bytes-jmp_bytes-8)>>2)&0xffffff)|(cond<<28); +#include "emit_core.cpp" + +static void myfunc(const char *str) { + puts(str); +} + +static void emit_ldr(t_bytes *out, int rs, int rt) { + output_w32(out, 0xe5900000|rd_rn_rm(rt,rs,0)|0); +} + +static void emit_str(t_bytes *out, int rs, int rt) { + output_w32(out, 0xe5800000|rd_rn_rm(rt,rs,0)|0); +} + +static void emit_str_cond(t_bytes *out, int rs, int rt, int cond) { + output_w32(out, 0x05800000|rd_rn_rm(rt,rs,0)|(cond<<28)); +} + +static void emit_ldrb(t_bytes *out, int rs, int rt) { + output_w32(out, 0xE5D00000|rd_rn_rm(rt,rs,0)|0); // LDRB R0, [R0] <-- armconverter.com +} + +static void emit_ldrsb(t_bytes *out, int rs, int rt) { + output_w32(out, 0xE1D000D0|rd_rn_rm(rt,rs,0)|0); // LDRSB R0, [R0] <-- armconverter.com +} + +static void emit_strb(t_bytes *out, int rs, int rt) { + output_w32(out, 0xE5C00000|rd_rn_rm(rt,rs,0)|0); +} + +static void emit_ldrsw(t_bytes *out, int rs, int rt) { + output_w32(out, 0xE1D000F0|rd_rn_rm(rt,rs,0)|0); // LDRSB R0, [R0] <-- armconverter.com +} + +static void emit_ldrs(t_bytes *out, int rs, int rt) { + emit_ldr(out, rs, rt); +} + +static void emit_ldrw(t_bytes *out, int rs, int rt) { + output_w32(out, 0xE1D000B0|rd_rn_rm(rt,rs,0)|0); // LDRB R0, [R0] <-- armconverter.com +} + +static void emit_strw(t_bytes *out, int rs, int rt) { + output_w32(out, 0xE1C000B0|rd_rn_rm(rt,rs,0)|0); +} + +static void emit_push(t_bytes *out, int rs) { + emit_addimm(out, SP, -4, SP); + emit_str(out, SP, rs); +} + +static void emit_pop(t_bytes *out, int rs) { + emit_ldr(out, SP, rs); + emit_addimm(out, SP, 4, SP); +} + + +static void emit_str_ptr_cond(t_bytes *out, uintptr_t ptr, int reg, int cond) { + emit_movimm(out, (uintptr_t)ptr, R10); + emit_str_cond(out, R10, reg, cond); +} + +static void printR1(unsigned int blah, unsigned int r) { + //LOGE("R1 = %u", r); +} + +static void emit_write_ptr32(t_bytes *out, uintptr_t ptr, uintptr_t val) { + emit_movimm(out, ptr, R2); + emit_movimm(out, val, R4); + emit_str(out, R2, R4); +} + +static void emit_write_ptr32_regptrTO_immFROM(t_bytes *out, int regptr, int val) { /* reg contains ptr */ + emit_movimm(out, (uintptr_t)val, R10); + emit_str(out, regptr, R10); +} +static void emit_write_ptr32_regptrTO_regFROM(t_bytes *out, int regptr, int reg) { /* reg contains ptr */ + emit_str(out, regptr, reg); +} +static void emit_write_ptr32_regptrTO_regptrFROM(t_bytes *out, int regptrto, int regptrfrom) { /* reg contains ptr */ + emit_ldr(out, regptrfrom, regptrfrom); + emit_str(out, regptrto, regptrfrom); +} +static void emit_read_ptr32_regptrFROM_regTO(t_bytes *out, int regptr, int regout) { + emit_ldr(out, regptr, regout); +} + +static void emit_write_ptr32_reg(t_bytes *out, uintptr_t ptr, int reg) { + emit_movimm(out, (uintptr_t)ptr, R8); + emit_str(out, R8, reg); +} + +static void emit_writeBYTE_ptr32_regptrTO_immFROM(t_bytes *out, int regptr, int val) { /* reg contains ptr */ + emit_movimm(out, (uintptr_t)val, R9); + emit_strb(out, regptr, R9); +} +static void emit_writeBYTE_ptr32_regptrTO_regFROM(t_bytes *out, int regptr, int reg) { /* reg contains ptr */ + emit_strb(out, regptr, reg); +} +static void emit_writeBYTE_ptr32_regptrTO_regptrFROM(t_bytes *out, int regptrto, int regptrfrom) { /* reg contains ptr */ + emit_ldrb(out, regptrfrom, regptrfrom); + emit_strb(out, regptrto, regptrfrom); +} +static void emit_readBYTE_ptr32_regptrFROM_regTO(t_bytes *out, int regptr, int regout) { + emit_ldrb(out, regptr, regout); +} + +static void emit_writeWORD_ptr32_regptrTO_immFROM(t_bytes *out, int regptr, int val) { /* reg contains ptr */ + emit_movimm(out, (uintptr_t)val, R9); + emit_strw(out, regptr, R9); +} +static void emit_writeWORD_ptr32_regptrTO_regFROM(t_bytes *out, int regptr, int reg) { /* reg contains ptr */ + emit_strw(out, regptr, reg); +} +static void emit_writeWORD_ptr32_regptrTO_regptrFROM(t_bytes *out, int regptrto, int regptrfrom) { /* reg contains ptr */ + emit_ldrw(out, regptrfrom, regptrfrom); + emit_strw(out, regptrto, regptrfrom); +} +static void emit_readWORD_ptr32_regptrFROM_regTO(t_bytes *out, int regptr, int regout) { + emit_ldrw(out, regptr, regout); +} + +static void emit_add_ptr32_regptrTO_regFROM(t_bytes *out, int regptr, int reg) { /* reg contains ptr */ + emit_ldr(out, regptr, R10); + emit_add(out, R10, reg, R10); + emit_str(out, regptr, R10); +} +static void emit_adds_ptr32_regptrTO_regFROM(t_bytes *out, int regptr, int reg) { /* reg contains ptr */ + emit_ldr(out, regptr, R10); + emit_adds(out, R10, reg, R10); + emit_str(out, regptr, R10); +} +static void emit_add_ptr32_regptrTO_immFROM(t_bytes *out, int regptr, int val) { /* reg contains ptr */ + emit_movimm(out, (uintptr_t)val, R9); + emit_ldr(out, regptr, R10); + emit_add(out, R10, R9, R10); + emit_str(out, regptr, R10); +} + +static void emit_adds_ptr32_regptrTO_immFROM(t_bytes *out, int regptr, int val) { /* reg contains ptr */ + emit_movimm(out, (uintptr_t)val, R9); + emit_ldr(out, regptr, R10); + emit_adds(out, R10, R9, R10); + emit_str(out, regptr, R10); +} + +static void emit_sub_ptr32_regptrTO_regFROM(t_bytes *out, int regptr, int reg) { /* reg contains ptr */ + emit_ldr(out, regptr, R10); + emit_sub(out, R10, reg, R10); + emit_str(out, regptr, R10); +} +static void emit_subs_ptr32_regptrTO_regFROM(t_bytes *out, int regptr, int reg) { /* reg contains ptr */ + emit_ldr(out, regptr, R10); + emit_subs(out, R10, reg, R10); + emit_str(out, regptr, R10); +} +static void emit_sub_ptr32_regptrTO_immFROM(t_bytes *out, int regptr, int val) { /* reg contains ptr */ + emit_movimm(out, (uintptr_t)val, R9); + emit_ldr(out, regptr, R10); + emit_sub(out, R10, R9, R10); + emit_str(out, regptr, R10); +} +static void emit_subs_ptr32_regptrTO_immFROM(t_bytes *out, int regptr, int val) { /* reg contains ptr */ + emit_movimm(out, (uintptr_t)val, R9); + emit_ldr(out, regptr, R10); + emit_subs(out, R10, R9, R10); + emit_str(out, regptr, R10); +} + +static void emit_neg_ptr32_regptr(t_bytes *out, int regptr) { /* reg contains ptr */ + emit_ldr(out, regptr, R10); + emit_neg(out, R10, R10); + emit_str(out, regptr, R10); +} + +static void emit_negs_ptr32_regptr(t_bytes *out, int regptr) { /* reg contains ptr */ + emit_ldr(out, regptr, R10); + emit_negs(out, R10, R10); + emit_str(out, regptr, R10); +} + +static void emit_ror_ptr32_regptr(t_bytes *out, int regptr, int shift) { + emit_ldr(out, regptr, R10); + emit_ror(out, R10, shift); + emit_str(out, regptr, R10); +} + +static void emit_rors_ptr32_regptr(t_bytes *out, int regptr, int shift) { + emit_ldr(out, regptr, R10); + emit_rors(out, R10, shift); + emit_str(out, regptr, R10); +} + +static void emit_rors_ptr32_regptr_reg(t_bytes *out, int regptr, int regimm) { + emit_ldr(out, regptr, R10); + emit_rors_reg(out, R10, regimm); + emit_str(out, regptr, R10); +} + +static void emit_ror_ptr32_regptr_reg(t_bytes *out, int regptr, int regimm) { + emit_ldr(out, regptr, R10); + emit_ror_reg(out, R10, regimm); + emit_str(out, regptr, R10); +} + +static void emit_cmpimm(t_bytes *out, int reg, int val) { + u_int armval; + if(genimm(val,&armval)) { + output_w32(out, 0xe3500000|rd_rn_rm(0,reg,0)|armval); + } + else { + emit_movimm(out, val, R10); + emit_cmp(out, reg, R10); + } +} + +static void emit_testimm(t_bytes *out, int reg, int val) { + u_int armval; + if(genimm(val,&armval)) { + output_w32(out, 0xe3100000|rd_rn_rm(0,reg,0)|armval); + } + else { + emit_movimm(out, val, R10); + emit_test(out, reg, R10); + } +} + + +static void emit_orimm(t_bytes *out, int reg, int val, int rt) { + u_int armval; + if(genimm(val,&armval)) { + output_w32(out, 0xe3800000|rd_rn_rm(rt,reg,0)|armval); + } + else { + emit_movimm(out, val, R9); + emit_or(out, reg, R9, rt); + } +} + +static void emit_xorimm(t_bytes *out, int reg, int val, int rt) { + u_int armval; + if(genimm(val,&armval)) { + output_w32(out, 0xe2200000|rd_rn_rm(rt,reg,0)|armval); + } + else { + emit_movimm(out, val, R10); + emit_xor(out, reg, R10, rt); + } +} + +static void emit_andimm(t_bytes *out, int reg, int val, int rt) { + u_int armval; + if(genimm(val,&armval)) { + output_w32(out, 0xe2000000|rd_rn_rm(rt,reg,0)|armval); + } + else { + emit_movimm(out, val, R10); + emit_and(out, reg, R10, rt); + } +} + +static void emit_andsimm(t_bytes *out, u_int reg, u_int val, u_int rt) +{ + u_int armval; + if(genimm(val,&armval)) { + output_w32(out, 0xe2000000|rd_rn_rm(rt,reg,0)|armval|set_flags()); + } + else { + emit_movimm(out, val, R10); + emit_ands(out, reg, R10, rt); + } +} + +static void emit_subimm(t_bytes *out, int reg, int val, int rt) { + emit_movimm(out, val, R10); + emit_sub(out, reg, R10, rt); +} + +static void emit_subsimm(t_bytes *out, int reg, int val, int rt) { + emit_movimm(out, val, R10); + emit_subs(out, reg, R10, rt); +} + + +static void emit_and_ptr32_regptr(t_bytes *out, int regptr, int bits) { + emit_ldr(out, regptr, 12); + emit_andimm(out, 12, bits, 12); + emit_str(out, regptr, 12); +} + +static void emit_or_ptr32_regptr(t_bytes *out, int regptr, int bits) { + emit_ldr(out, regptr, 12); + emit_orimm(out, 12, bits, 12); + emit_str(out, regptr, 12); +} + +static void emit_or_ptr8_regptr_reg(t_bytes *out, int regptr, int reg) { + emit_ldrb(out, regptr, 12); + emit_or(out, 12, reg, 12); + emit_strb(out, regptr, 12); +} +static void emit_or_ptr8_regptr_imm(t_bytes *out, int regptr, int imm) { + emit_ldrb(out, regptr, 12); + emit_orimm(out, 12, imm, 12); + emit_strb(out, regptr, 12); +} + +static void emit_or_ptr32_regptr_reg(t_bytes *out, int regptr, int reg) { + emit_ldr(out, regptr, 12); + emit_or(out, 12, reg, 12); + emit_str(out, regptr, 12); +} + +static void emit_and_ptr32_regptr_reg(t_bytes *out, int regptr, int reg) { + emit_ldr(out, regptr, 12); + emit_and(out, 12, reg, 12); + emit_str(out, regptr, 12); +} + + +static void emit_movimm_cond(t_bytes *out, int imm, int rt, int cond) { + + emit_movw_cond(out, imm&0x0000FFFF,rt, cond); + emit_movt_cond(out, imm&0xFFFF0000,rt, cond); +} + +#define Z 0 +#define NE 1 +#define CS 2 +#define CC 3 +#define SS 4 +#define O 6 + +static void emit_set_cond(t_bytes *out, int reg, int cond) { + output_w32(out, 0xE3A00000|(reg<<12)); + output_w32(out, 0x03A00001|(reg<<12)|(cond<<28)); +} + +static void emit_setc(t_bytes *out, int reg) { + emit_set_cond(out, reg, CS); +} + +static void emit_sets(t_bytes *out, int reg) { + emit_set_cond(out, reg, SS); +} + +static void emit_setz(t_bytes *out, int reg) { + emit_set_cond(out, reg, Z); +} + +static void emit_setnz(t_bytes *out, int reg) { + emit_set_cond(out, reg, NE); +} + +static void emit_setnc(t_bytes *out, int reg) { + emit_set_cond(out, reg, CC); +} +static void emit_seto(t_bytes *out, int reg) { + emit_set_cond(out, reg, O); +} + +static void emit_write_ptr32_reg_byte(t_bytes *out, uintptr_t ptr, int reg) { + emit_movimm(out, (uintptr_t)ptr, R8); + + + + emit_strb(out, R8, reg); +} + +static void emit_lea_reg_reg(t_bytes *out, int reg1, int reg, int scale, int outreg) { + if (scale == 2) emit_lsl(out, reg, 1); // MOV r10, r10, LSL #1 (multiple by 2) + else if (scale == 4) emit_lsl(out, reg, 2); // MOV r10, r10, LSL #2 (multiple by 4) + else if (scale == 8) emit_lsl(out, reg, 3); // MOV r10, r10, LSL #3 (multiple by 8) + + emit_add(out, reg, reg1, R10); + emit_mov(out, R10,outreg ); +} + +static void emit_lea_ptr_reg(t_bytes *out, int ptr, int reg, int scale, int outreg) { + emit_movimm(out, ptr, R10); + + if (scale == 2) emit_lsl(out, reg, 1); // MOV r10, r10, LSL #1 (multiple by 2) + else if (scale == 4) emit_lsl(out, reg, 2); // MOV r10, r10, LSL #2 (multiple by 4) + else if (scale == 8) emit_lsl(out, reg, 3); // MOV r10, r10, LSL #3 (multiple by 8) + + emit_add(out, reg, R10, R10); + emit_mov(out, R10, outreg); +} + +/* uses R9, R10 */ +static void emit_lea_ptr_val(t_bytes *out, int ptr, int val, int scale, int outreg) { + emit_movimm(out, ptr, R10); + emit_movimm(out, val, R9); + + if (scale == 2) emit_lsl(out, R9, 2); // MOV r10, r10, LSL #1 (multiple by 2) + else if (scale == 4) emit_lsl(out, R9, 4); // MOV r10, r10, LSL #2 (multiple by 4) + else if (scale == 8) emit_lsl(out, R9, 8); // MOV r10, r10, LSL #3 (multiple by 8) + + emit_add(out, R9, R10, R10); + + emit_mov(out, outreg, R10); +} + +/* NOTE - think these 2 functions are inverted */ +static void set_carry_flag(t_bytes *out) { + output_w32(out, 0xE10F1000); // mrs R1, cpsr + output_w32(out, 0xE3C11202); // bic R1, R1, #(1<<29) + output_w32(out, 0xE128F001); // msr cpsr_f, R1 +} + +static void clear_carry_flag(t_bytes *out) { + output_w32(out, 0xE10F1000); // mrs R1, cpsr + output_w32(out, 0xE3811202); // orr R1, R1, #(1<<29) + output_w32(out, 0xE128F001); // msr cpsr_f, R1 +} + +static void emit_read_ptr32_reg(t_bytes *out, uintptr_t ptr, int rs) { + emit_movimm(out, ptr, R10); + + emit_ldr(out, R10, rs); +} + +static void emit_read_ptr32_reg_byte(t_bytes *out, uintptr_t ptr, int rs) { + emit_movimm(out, (uintptr_t)ptr, R8); + emit_ldrb(out, R8, rs); +} + + +static void emit_var_copy32(t_bytes *out, uintptr_t from, uintptr_t to) { + emit_read_ptr32_reg(out, from, R7); + emit_write_ptr32_reg(out, to, R7); +} + +static void emit_var_copy32_reg(t_bytes *out, int from, int to) { + emit_str(out, to, from); +} + +static void emit_set_ptr_imm(t_bytes *out, uintptr_t ptr, int set) { + emit_movimm(out, set, R10); + emit_write_ptr32_reg(out, ptr, R10); +} + +static void emit_inc_ptr_imm(t_bytes *out, uintptr_t ptr, int add) { + emit_read_ptr32_reg(out, ptr, R10); + if (add > 0) { + emit_movimm(out, add, R9); + emit_add(out, R10, R9, R10); + } + else { + emit_movimm(out, add*-1, R9); + emit_sub(out, R10, R9, R10); + } + emit_write_ptr32_reg(out, ptr, R10); +} + +static void emit_sub_ptr_imm(t_bytes *out, uintptr_t ptr, int minus) { + emit_read_ptr32_reg(out, ptr, R10); + emit_movimm(out, minus, R9); + emit_sub(out, R10, R9, R10); + emit_write_ptr32_reg(out, ptr, R10); +} + +static void emit_inc_ptr(t_bytes *out, uintptr_t ptr, uintptr_t ptr2) { + emit_read_ptr32_reg(out, ptr, R2); + emit_read_ptr32_reg(out, ptr2, R3); + emit_add(out, R2, R3, R2); + emit_write_ptr32_reg(out, ptr, R2); +} + +static void emit_inc_ptr_reg(t_bytes *out, uintptr_t ptr, uintptr_t ptr2, int reg) { + emit_read_ptr32_reg(out, ptr, R7); + emit_read_ptr32_reg(out, ptr2, R9); + emit_add(out, reg, R9, R7); +} + +static void emit_inc_reg(t_bytes *out, uintptr_t ptr, int reg) { + emit_read_ptr32_reg(out, ptr, R9); + emit_add(out, reg, R9, reg); +} + +static void emit_mrs_cpsr(t_bytes *out, int reg) { + output_w32(out, 0xE10F0000|rd_rn_rm(0,reg,0)|0); +} + +static void branchn() { + +} + +static void emit_brk(t_bytes *out) { + output_w32(out, 0xE1200070); +} + +unsigned int genlabel(); + +static void emit_startfunc(t_bytes *out) { + + output_w32(out, 0xE92D4FF0); // push {r4, r5, r6, r7, r8, sb, sl, fp, lr} + //output_w32(out, 0xE28DB000); // add fp, sp, #0 + //output_w32(out, 0xE24DDC01); // sub sp, sp, #0x100 + + +/*u_int skip; + skip=genlabel(); + + emit_movimm(out, 0x0, R0); + // emit_label(out, (int)skip); + emit_addimm(out, R0, 0x1, R0); + emit_movimm(out, 0x1, R1); + emit_cmpimm(out, R1, 0x1); + emit_brk(out); + emit_branch_label(out, skip, 0x0); + + emit_movimm(out, 0x2, R2); + emit_movimm(out, 0x3, R3); + emit_movimm(out, 0x4, R4); + emit_movimm(out, 0x5, R5); + + emit_label(out, skip); + + emit_movimm(out, 0x6, R6);*/ + + //thread 12 break + + //emit_brk(out); +} + +static void emit_endfunc(t_bytes *out) { + //output_w32(out, 0xE24BD000); // sub sp, fp, #0 + output_w32(out, 0xE8BD8FF0); // pop {r4, r5, r6, r7, r8, sb, sl, fp, pc} +} + +static void callR3(t_bytes *out) { + output_w32(out, 0xE92D4FFE); // push {r1, r2, r3, r4, r5, r6, r7, r8, sb, sl, fp, lr} + output_w32(out, 0xE12FFF33); // blx r3 + output_w32(out, 0xE8BD4FFE); // pop {r1, r2, r3, r4, r5, r6, r7, r8, sb, sl, fp, lr} +} +static void callR9(t_bytes *out) { + output_w32(out, 0xE92D4FFE); // push {r1, r2, r3, r4, r5, r6, r7, r8, sb, sl, fp, lr} + output_w32(out, 0xE12FFF39); // blx sb + output_w32(out, 0xE8BD4FFE); // pop {r1, r2, r3, r4, r5, r6, r7, r8, sb, sl, fp, lr} +} +static void callR7(t_bytes *out) { + output_w32(out, 0xE92D4FFE); // push {r1, r2, r3, r4, r5, r6, r7, r8, sb, sl, fp, lr} + output_w32(out, 0xE12FFF37); // blx r7 + output_w32(out, 0xE8BD4FFE); // pop {r1, r2, r3, r4, r5, r6, r7, r8, sb, sl, fp, lr} +} +static void callR6(t_bytes *out) { + output_w32(out, 0xE92D4FFE); // push {r1, r2, r3, r4, r5, r6, r7, r8, sb, sl, fp, lr} + output_w32(out, 0xE12FFF36); // blx r6 + output_w32(out, 0xE8BD4FFE); // pop {r1, r2, r3, r4, r5, r6, r7, r8, sb, sl, fp, lr} +} +static void callR8(t_bytes *out) { + output_w32(out, 0xE92D4FFE); // push {r1, r2, r3, r4, r5, r6, r7, r8, sb, sl, fp, lr} + output_w32(out, 0xE12FFF38); // blx r8 + output_w32(out, 0xE8BD4FFE); // pop {r1, r2, r3, r4, r5, r6, r7, r8, sb, sl, fp, lr} +} + +static void save(t_bytes *out) { + output_w32(out, 0xE92D4FFF); // push {r0, r1, r2, r3, r4, r5, r6, r7, r8, sb, sl, fp, lr} +} + +static void restore(t_bytes *out) { + output_w32(out, 0xE8BD4FFF); // pop {r0, r1, r2, r3, r4, r5, r6, r7, r8, sb, sl, fp, lr} +} + + +//----------------------------------------------------------------------------- +// Shifting macros +//----------------------------------------------------------------------------- +#define GET_CARRY(invert) { \ + flags_ptr(R2, VALUE); \ + /*printreg3(1, R2);*/ \ + int setb=genlabel(); \ + int done=genlabel(); \ + output_w32(g_out, 0xE2122020); /* ands r2, r2, #0x20 */ \ + /*emit_andimm(g_out, R2, 0x20, R2); \ + emit_cmpimm(g_out, R2, 0x20);*/ \ + emit_branch_label(g_out, setb, 0x1); \ + set_carry_flag(g_out); \ + emit_branch_label(g_out, done, 0xE); \ + emit_label(g_out, (int)setb); \ + clear_carry_flag(g_out); \ + emit_label(g_out, (int)done); \ + /*c.bt(flags_ptr, 5); \ + if (invert) c.cmc();*/ \ +} + +//----------------------------------------------------------------------------- +// Shifting macros +//----------------------------------------------------------------------------- +#define SET_NZCV(sign) { \ + JIT_COMMENT("SET_NZCV"); \ + output_w32(g_out, 0xE10F1000); /* mrs r1, apsr */ \ + flags_ptr(y_reg, VALUE); \ + output_w32(g_out, 0xE1A01C21); /* lsr r1, r1, #0x18 */\ + output_w32(g_out, 0xE20110F0); /* and r1, r1, #0xf0 */ \ + output_w32(g_out, 0xE209900F); /* and sb, sb, #0xf */ \ + emit_or(g_out, R1, y_reg, R1); \ + emit_writeBYTE_ptr32_regptrTO_regFROM(g_out, CACHED_PTR(flags_ptr(R0, ADDRESS)), R1); \ +} + +#define SET_NZC { \ + JIT_COMMENT("SET_NZC"); \ + output_w32(g_out, 0xE10F1000); /* mrs r1, apsr */ \ + flags_ptr(y_reg, VALUE); \ + output_w32(g_out, 0xE1A01C21); /* lsr r1, r1, #0x18 */\ + output_w32(g_out, 0xE20110C0); /* and r1, r1, #0xc0 */ \ + if (cf_change) { output_w32(g_out, 0xE1811285); /* orr r1, r1, r5, lsl #5 */ } \ + output_w32(g_out, cf_change ? 0xE209901F /* and sb, sb, #0x1f */ : 0xE209903F /* and sb, sb, #0x3f */ ); \ + emit_or(g_out, R1, y_reg, R1); \ + emit_writeBYTE_ptr32_regptrTO_regFROM(g_out, CACHED_PTR(flags_ptr(R0, ADDRESS)), R1); \ +} + +#define SET_NZC_SHIFTS_ZERO(cf) { \ + JIT_COMMENT("SET_NZC_SHIFTS_ZERO"); \ + flags_ptr(R3, VALUE); \ + emit_and_ptr32_regptr(g_out, CACHED_PTR(flags_ptr(R1, ADDRESS)), 0x1F); \ + if(cf) \ + { \ + emit_lsl(g_out, rcf_reg, 5); \ + emit_orimm(g_out,rcf_reg, (1<<6), rcf_reg); \ + emit_or_ptr32_regptr_reg(g_out, CACHED_PTR(flags_ptr(R1, ADDRESS)), rcf_reg); \ + } \ + else \ + emit_or_ptr32_regptr(g_out, CACHED_PTR(flags_ptr(R1, ADDRESS)), (1<<6)); \ + JIT_COMMENT("end SET_NZC_SHIFTS_ZERO"); \ +} + +#define SET_NZ(clear_cv) { \ + output_w32(g_out, 0xE10F1000); /* mrs r1, apsr */ \ + flags_ptr(y_reg, VALUE); \ + output_w32(g_out, 0xE1A01C21); /* lsr r1, r1, #0x18 */ \ + output_w32(g_out, 0xE20110C0); /* and r1, r1, #0xc0 */ \ + /*if (cf_change) { output_w32(g_out, 0xE1811085);*/ /* ORR R1, R5, LSL #1 *//*}*/ \ + output_w32(g_out, clear_cv ? 0xE209900F /* and sb, sb, #0xf */ : 0xE209903F /* and sb, sb, #0x3f */ );\ + emit_or(g_out, R1, y_reg, R1); \ + emit_writeBYTE_ptr32_regptrTO_regFROM(g_out, PTR_STORE_REG/*flags_ptr(R0, ADDRESS)*/, R1); \ +} + +#define SET_N { \ + output_w32(g_out, 0xE10F1000); /* mrs r1, apsr */ \ + flags_ptr(y_reg, VALUE); \ + output_w32(g_out, 0xE1A01C21); /* lsr r1, r1, #0x18 */\ + output_w32(g_out, 0xE2011080); /* and r1, r1, #0x80 */ \ + /*if (cf_change) { output_w32(g_out, 0xE1811085);*/ /* ORR R1, R5, LSL #1 *//*}*/ \ + /*output_w32(g_out, 0xE209907F);*/ /* and R9, #0x7F */ \ + output_w32(g_out, 0xE209907F); /* and sb, sb, #0x7f */ \ + emit_or(g_out, R1, y_reg, R1); \ + emit_writeBYTE_ptr32_regptrTO_regFROM(g_out, PTR_STORE_REG/*flags_ptr(R0, ADDRESS)*/, R1); \ +} + +#define SET_Z { \ + output_w32(g_out, 0xE10F1000); /* mrs r1, apsr */ \ + flags_ptr(y_reg, VALUE); \ + output_w32(g_out, 0xE1A01C21); /* lsr r1, r1, #0x18 */\ + output_w32(g_out, 0xE2011040); /* and r1, r1, #0x40 */ \ + output_w32(g_out, 0xE20990BF); /* and sb, sb, #0xbf */ \ + emit_or(g_out, R1, y_reg, R1); \ + emit_writeBYTE_ptr32_regptrTO_regFROM(g_out, PTR_STORE_REG/*flags_ptr(R0, ADDRESS)*/, R1); \ +} + +#define SET_Q { \ + output_w32(g_out, 0xE10F1000); /* mrs r1, apsr */ \ + flags_ptr(y_reg, VALUE); \ + output_w32(g_out, 0xE1A01C21); /*lsr R1, #24 */\ + output_w32(g_out, 0xE2011010); /* lsr r1, r1, #0x18 */ \ + output_w32(g_out, 0xE20990EF); /* and sb, sb, #0xef */ \ + emit_or(g_out, R1, y_reg, R1); \ + emit_writeBYTE_ptr32_regptrTO_regFROM(g_out, PTR_STORE_REG/*flags_ptr(R0, ADDRESS)*/, R1); \ +} diff --git a/desmume/src/utils/arm_jit/arm_jit_arm.cpp b/desmume/src/utils/arm_jit/arm_jit_arm.cpp new file mode 100755 index 000000000..4d8649a80 --- /dev/null +++ b/desmume/src/utils/arm_jit/arm_jit_arm.cpp @@ -0,0 +1,4370 @@ +/* + Copyright (C) 2022 Patrick Herlihy - https://byte4byte.com + Copyright (C) 2022 DeSmuME team + + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This file is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the this software. If not, see . +*/ + +#include "types.h" + +#ifdef HAVE_JIT + +#define PTR_STORE_REG 11 + +#include +#include + +#include "utils/bits.h" +#include "armcpu.h" +#include "instructions.h" +#include "instruction_attributes.h" +#include "MMU.h" +#include "MMU_timing.h" +#include "arm_jit.h" +#include "bios.h" + +#include +static const char *disassemble(u32 opcode); + +#define ADDRESS 0 +#define VALUE 1 +#define VALUE_NO_CACHE 2 + +#ifdef __aarch64__ +#include "arm64_emit.cpp" +#else +#include "arm_emit.cpp" +#endif +t_bytes *g_out = NULL; + +#ifdef __APPLE__ +#include +#include +static int g_jitWrite = 0; +void jit_exec() { + if (g_jitWrite) { + (void)pthread_jit_write_protect_np(1); + g_jitWrite = 0; + } +} + +void jit_write() { + if (! g_jitWrite) { + (void)pthread_jit_write_protect_np(0); + g_jitWrite = 1; + } +} +#else +#define jit_write(...) +#define jit_exec(...) +#endif + +#define tmp_reg R2 +#define tmp2_reg R3 +#define x_reg R8 +#define y_reg R9 +#define imm_reg R4 +#define rcf_reg R5 +#define rhs_reg R6 +#define lhs_reg R7 + +#define rcf rcf_reg +#define rhs rhs_reg +#define lhs lhs_reg +#define x x_reg +#define y y_reg +#define tmp tmp_reg +#define tmp2 tmp2_reg + +#define LOG_JIT_LEVEL 0 +#define PROFILER_JIT_LEVEL 0 + +#if (PROFILER_JIT_LEVEL > 0) +#include +#endif + +#if (LOG_JIT_LEVEL > 0) +#define LOG_JIT 1 +#define JIT_COMMENT(...) c.comment(__VA_ARGS__) +#define printJIT(buf, val) { \ +} +#else +#define LOG_JIT 0 +#define JIT_COMMENT(...) +#define printJIT(buf, val) +#endif + +#define CACHED_PTR(exp) PTR_STORE_REG + +u32 saveBlockSizeJIT = 0; + +static volatile unsigned int label_gen_num=0; +unsigned int genlabel() { + return ++label_gen_num; +} + +#include + +#define GpVar uintptr_t + +static volatile int PROCNUM; +static volatile int *PROCNUM_ptr = &PROCNUM; +static int bb_opcodesize; +static int bb_adr; +static bool bb_thumb; +static GpVar bb_cpu; +static GpVar bb_cycles; +static GpVar bb_total_cycles; +static u32 bb_constant_cycles; + +#define cpu (&ARMPROC) + +static void save(); +static void restore(); + +static int inline cpu_get_ptr(void *start, int o, int r, int v) { + emit_ptr(g_out, (uintptr_t)((uintptr_t)(&ARMPROC)+o), v == VALUE ? PTR_STORE_REG : r); + if (v) emit_ldr(g_out, v == VALUE ? PTR_STORE_REG : r, r); + return r; +} + +static int inline cpu_get_ptrB(void *start, int o, int r, int v) { + emit_ptr(g_out, (uintptr_t)((uintptr_t)(&ARMPROC)+o), v == VALUE ? PTR_STORE_REG : r); + if (v) emit_ldrb(g_out, v == VALUE ? PTR_STORE_REG : r, r); + return r; +} + + +static int inline cpu_get_ptrW(void *start, int o, int r, int v) { + emit_ptr(g_out, (uintptr_t)((uintptr_t)(&ARMPROC)+o), v == VALUE ? PTR_STORE_REG : r); + if (v) emit_ldrw(g_out, v == VALUE ? PTR_STORE_REG : r, r); + return r; +} + +static int inline cpu_get_ptrS(void *start, int o, int r, int v) { + emit_ptr(g_out, (uintptr_t)((uintptr_t)(&ARMPROC)+o), v == VALUE ? PTR_STORE_REG : r); + if (v) emit_ldrs(g_out, v == VALUE ? PTR_STORE_REG : r, r); + return r; +} + +static int inline cpu_get_ptrSB(void *start, int o, int r, int v) { + emit_ptr(g_out, (uintptr_t)((uintptr_t)(&ARMPROC)+o), v == VALUE ? PTR_STORE_REG : r); + if (v) emit_ldrsb(g_out, v == VALUE ? PTR_STORE_REG : r, r); + return r; +} + + +static int inline cpu_get_ptrSW(void *start, int o, int r, int v) { + emit_ptr(g_out, (uintptr_t)((uintptr_t)(&ARMPROC)+o), v == VALUE ? PTR_STORE_REG : r); + if (v) emit_ldrsw(g_out, v == VALUE ? PTR_STORE_REG : r, r); + return r; +} + + +static int inline get_ptr(void *start, int o, int r, int v) { + emit_read_ptr64_reg(g_out, (uintptr_t)start, r); + emit_addimm64(g_out, r, o, r); + if (v) emit_ldr(g_out, r, r); + return r; +} + +static int inline get_ptrB(void *start, int o, int r, int v) { + emit_read_ptr64_reg(g_out, (uintptr_t)start, r); + emit_addimm64(g_out, r, o, r); + if (v) emit_ldrb(g_out, r, r); + return r; +} + + +static int inline get_ptrW(void *start, int o, int r, int v) { + emit_read_ptr64_reg(g_out, (uintptr_t)start, r); + emit_addimm64(g_out, r, o, r); + if (v) emit_ldrw(g_out, r, r); + return r; +} + +static int inline get_ptrS(void *start, int o, int r, int v) { + emit_read_ptr64_reg(g_out, (uintptr_t)start, r); + emit_addimm64(g_out, r, o, r); + if (v) emit_ldrs(g_out, r, r); + return r; +} + +static int inline get_ptrSB(void *start, int o, int r, int v) { + emit_read_ptr64_reg(g_out, (uintptr_t)start, r); + emit_addimm64(g_out, r, o, r); + if (v) emit_ldrsb(g_out, r, r); + return r; +} + + +static int inline get_ptrSW(void *start, int o, int r, int v) { + emit_read_ptr64_reg(g_out, (uintptr_t)start, r); + emit_addimm64(g_out, r, o, r); + if (v) emit_ldrsw(g_out, r, r); + return r; +} + +#define cpu_dword_ptr(z, o, r, v) cpu_get_ptr(&z, o, r, v) +#define cpu_byte_ptr(z, o, r, v) cpu_get_ptrB(&z, o, r, v) +#define cpu_word_ptr(z, o, r, v) cpu_get_ptrW(&z, o, r, v) +#define cpu_dwords_ptr(z, o, r, v) cpu_get_ptrS(&z, o, r, v) +#define cpu_bytes_ptr(z, o, r, v) cpu_get_ptrSB(&z, o, r, v) +#define cpu_words_ptr(z, o, r, v) cpu_get_ptrSW(&z, o, r, v) + +#define dword_ptr(z, o, r, v) get_ptr(&z, o, r, v) +#define byte_ptr(z, o, r, v) get_ptrB(&z, o, r, v) +#define word_ptr(z, o, r, v) get_ptrW(&z, o, r, v) +#define dwords_ptr(z, o, r, v) get_ptrS(&z, o, r, v) +#define bytes_ptr(z, o, r, v) get_ptrSB(&z, o, r, v) +#define words_ptr(z, o, r, v) get_ptrSW(&z, o, r, v) + +#define bb_next_instruction (bb_adr + bb_opcodesize) +#define bb_r15 (bb_adr + 2 * bb_opcodesize) + +#define cpu_ptr(x, reg, v) cpu_dword_ptr(bb_cpu, offsetof(armcpu_t, x), reg, v) +#define cpu_ptr_byte(x, y, reg, v) cpu_byte_ptr(bb_cpu, offsetof(armcpu_t, x) + y, reg, v) +#define flags_ptr(reg, v) cpu_ptr_byte(CPSR.val, 3, reg, v) +#define reg_ptr(x, reg, v) cpu_dword_ptr(bb_cpu, offsetof(armcpu_t, R) + 4*(x), reg, v) +#define reg_pos_ptr(x, reg, v) cpu_dword_ptr(bb_cpu, offsetof(armcpu_t, R) + 4*REG_POS(i,(x)), reg, v) +#define reg_pos_ptrL(x, reg, v) cpu_word_ptr( bb_cpu, offsetof(armcpu_t, R) + 4*REG_POS(i,(x)), reg, v) +#define reg_pos_ptrH(x, reg, v) cpu_word_ptr( bb_cpu, offsetof(armcpu_t, R) + 4*REG_POS(i,(x)) + 2, reg, v) +#define reg_pos_ptrB(x, reg, v) cpu_byte_ptr( bb_cpu, offsetof(armcpu_t, R) + 4*REG_POS(i,(x)), reg, v) +#define reg_pos_ptrSL(x, reg, v) cpu_words_ptr( bb_cpu, offsetof(armcpu_t, R) + 4*REG_POS(i,(x)), reg, v) +#define reg_pos_ptrSH(x, reg, v) cpu_words_ptr( bb_cpu, offsetof(armcpu_t, R) + 4*REG_POS(i,(x)) + 2, reg, v) +#define reg_pos_ptrSB(x, reg, v) cpu_bytes_ptr( bb_cpu, offsetof(armcpu_t, R) + 4*REG_POS(i,(x)), reg, v) +#define reg_pos_thumb(x, reg, v) cpu_dword_ptr(bb_cpu, offsetof(armcpu_t, R) + 4*((i>>(x))&0x7), reg, v) +#define reg_pos_thumbB(x, reg, v) cpu_byte_ptr(bb_cpu, offsetof(armcpu_t, R) + 4*((i>>(x))&0x7), reg, v) +#define cp15_ptr(x, reg, v) dword_ptr(bb_cp15, offsetof(armcp15_t, x), reg, v) +#define cp15_ptr_off(x, y, reg, v) dword_ptr(bb_cp15, offsetof(armcp15_t, x) + y, reg, v) +#define mmu_ptr(x, reg, v) dword_ptr(bb_mmu, offsetof(MMU_struct, x), reg, v) +#define mmu_ptr_byte(x, reg, v) byte_ptr(bb_mmu, offsetof(MMU_struct, x), reg, v) +#define _REG_NUM(i, n) ((i>>(n))&0x7) + +#define special_page (1==1 /*g_i == 80 || g_i == 81 || g_i == 82*/) +#define print_page (1==1) + +static inline void get_flags(int reg, int v) { + flags_ptr(reg, v); +} + + +#ifdef MAPPED_JIT_FUNCS +CACHE_ALIGN JIT_struct JIT; + +uintptr_t *JIT_struct::JIT_MEM[2][0x4000] = {{0}}; + +static uintptr_t *JIT_MEM[2][32] = { + //arm9 + { + /* 0X*/ DUP2(JIT.ARM9_ITCM), + /* 1X*/ DUP2(JIT.ARM9_ITCM), // mirror + /* 2X*/ DUP2(JIT.MAIN_MEM), + /* 3X*/ DUP2(JIT.SWIRAM), + /* 4X*/ DUP2(NULL), + /* 5X*/ DUP2(NULL), + /* 6X*/ NULL, + JIT.ARM9_LCDC, // Plain ARM9-CPU Access (LCDC mode) (max 656KB) + /* 7X*/ DUP2(NULL), + /* 8X*/ DUP2(NULL), + /* 9X*/ DUP2(NULL), + /* AX*/ DUP2(NULL), + /* BX*/ DUP2(NULL), + /* CX*/ DUP2(NULL), + /* DX*/ DUP2(NULL), + /* EX*/ DUP2(NULL), + /* FX*/ DUP2(JIT.ARM9_BIOS) + }, + //arm7 + { + /* 0X*/ DUP2(JIT.ARM7_BIOS), + /* 1X*/ DUP2(NULL), + /* 2X*/ DUP2(JIT.MAIN_MEM), + /* 3X*/ JIT.SWIRAM, + JIT.ARM7_ERAM, + /* 4X*/ NULL, + JIT.ARM7_WIRAM, + /* 5X*/ DUP2(NULL), + /* 6X*/ JIT.ARM7_WRAM, // VRAM allocated as Work RAM to ARM7 (max. 256K) + NULL, + /* 7X*/ DUP2(NULL), + /* 8X*/ DUP2(NULL), + /* 9X*/ DUP2(NULL), + /* AX*/ DUP2(NULL), + /* BX*/ DUP2(NULL), + /* CX*/ DUP2(NULL), + /* DX*/ DUP2(NULL), + /* EX*/ DUP2(NULL), + /* FX*/ DUP2(NULL) + } +}; + +static u32 JIT_MASK[2][32] = { + //arm9 + { + /* 0X*/ DUP2(0x00007FFF), + /* 1X*/ DUP2(0x00007FFF), + /* 2X*/ DUP2(0x003FFFFF), // FIXME _MMU_MAIN_MEM_MASK + /* 3X*/ DUP2(0x00007FFF), + /* 4X*/ DUP2(0x00000000), + /* 5X*/ DUP2(0x00000000), + /* 6X*/ 0x00000000, + 0x000FFFFF, + /* 7X*/ DUP2(0x00000000), + /* 8X*/ DUP2(0x00000000), + /* 9X*/ DUP2(0x00000000), + /* AX*/ DUP2(0x00000000), + /* BX*/ DUP2(0x00000000), + /* CX*/ DUP2(0x00000000), + /* DX*/ DUP2(0x00000000), + /* EX*/ DUP2(0x00000000), + /* FX*/ DUP2(0x00007FFF) + }, + //arm7 + { + /* 0X*/ DUP2(0x00003FFF), + /* 1X*/ DUP2(0x00000000), + /* 2X*/ DUP2(0x003FFFFF), + /* 3X*/ 0x00007FFF, + 0x0000FFFF, + /* 4X*/ 0x00000000, + 0x0000FFFF, + /* 5X*/ DUP2(0x00000000), + /* 6X*/ 0x0003FFFF, + 0x00000000, + /* 7X*/ DUP2(0x00000000), + /* 8X*/ DUP2(0x00000000), + /* 9X*/ DUP2(0x00000000), + /* AX*/ DUP2(0x00000000), + /* BX*/ DUP2(0x00000000), + /* CX*/ DUP2(0x00000000), + /* DX*/ DUP2(0x00000000), + /* EX*/ DUP2(0x00000000), + /* FX*/ DUP2(0x00000000) + } +}; + +static void init_jit_mem() +{ + static bool inited = false; + if(inited) + return; + inited = true; + for(int proc=0; proc<2; proc++) + for(int i=0; i<0x4000; i++) + JIT.JIT_MEM[proc][i] = JIT_MEM[proc][i>>9] + (((i<<14) & JIT_MASK[proc][i>>9]) >> 1); +} + +#else +DS_ALIGN(4096) uintptr_t compiled_funcs[1<<26] = {0}; +#endif + +static u8 recompile_counts[(1<<26)/16]; + +static void emit_branch(int cond, int to); +static void _armlog(u8 proc, u32 addr, u32 opcode); + +//----------------------------------------------------------------------------- +// Generic instruction wrapper +//----------------------------------------------------------------------------- + +template +static u32 FASTCALL OP_DECODE() +{ + u32 cycles; + u32 adr = cpu->instruct_adr; + if(thumb) + { + cpu->next_instruction = adr + 2; + cpu->R[15] = adr + 4; + u32 opcode = _MMU_read16(adr); +#ifdef DEBUG_JIT + _armlog(PROCNUM, adr, opcode); + callExe2(opcode>>6, opcode, thumb); +#endif + cycles = thumb_instructions_set[PROCNUM][opcode>>6](opcode); + } + else + { + cpu->next_instruction = adr + 4; + cpu->R[15] = adr + 8; + u32 opcode = _MMU_read32(adr); +#ifdef DEBUG_JIT + _armlog(PROCNUM, adr, opcode); +#endif + if(CONDITION(opcode) == 0xE || TEST_COND(CONDITION(opcode), CODE(opcode), cpu->CPSR)) { +#ifdef DEBUG_JIT + callExe2(INSTRUCTION_INDEX(opcode), opcode, thumb); +#endif + cycles = arm_instructions_set[PROCNUM][INSTRUCTION_INDEX(opcode)](opcode); + } + else + cycles = 1; + } + cpu->instruct_adr = cpu->next_instruction; + return cycles; +} + +static const ArmOpCompiled op_decode[2][2] = { OP_DECODE<0,0>, OP_DECODE<0,1>, OP_DECODE<1,0>, OP_DECODE<1,1> }; + +#define changeCPSR { \ + emit_ptr(g_out, (uintptr_t)NDS_Reschedule, R3);\ + callR3(g_out); \ +} + +#if (PROFILER_JIT_LEVEL > 0) +struct PROFILER_COUNTER_INFO +{ + u64 count; + char name[64]; +}; + +struct JIT_PROFILER +{ + JIT_PROFILER() + { + memset(&arm_count[0], 0, sizeof(arm_count)); + memset(&thumb_count[0], 0, sizeof(thumb_count)); + } + + u64 arm_count[4096]; + u64 thumb_count[1024]; +} profiler_counter[2]; + +static GpVar bb_profiler; + +#define profiler_counter_arm(opcode) qword_ptr(bb_profiler, offsetof(JIT_PROFILER, arm_count) + (INSTRUCTION_INDEX(opcode)*sizeof(u64))) +#define profiler_counter_thumb(opcode) qword_ptr(bb_profiler, offsetof(JIT_PROFILER, thumb_count) + ((opcode>>6)*sizeof(u64))) + +#if (PROFILER_JIT_LEVEL > 1) +struct PROFILER_ENTRY +{ + u32 addr; + u32 cycles; +} profiler_entry[2][1<<26]; + +static GpVar bb_profiler_entry; +#endif + +#endif + +#define S_DST_R15 { \ + JIT_COMMENT("S_DST_R15"); \ + cpu_ptr(next_instruction, R3, VALUE);\ + cpu_ptr(SPSR.val, R9, VALUE);\ + emit_mov(g_out, R9, R10); \ + emit_andimm(g_out, R10, 0x1F, R10); \ + emit_ptr(g_out, (uintptr_t)&ARMPROC, 0); \ + emit_mov(g_out, R10, R1); \ + emit_ptr(g_out, (uintptr_t)armcpu_switchMode, R3); \ + callR3(g_out); \ + emit_write_ptr32_regptrTO_regFROM(g_out, cpu_ptr(CPSR.val, R1, ADDRESS), R9); \ + emit_andimm(g_out, R9, (1<<5), R9); \ + emit_lsr(g_out, R9, 5); \ + emit_lea_ptr_reg(g_out, 0xFFFFFFFC,R9, 2, R10); \ + emit_and(g_out, R10, reg_ptr(15, R1, VALUE), R10); \ + emit_write_ptr32_regptrTO_regFROM(g_out, cpu_ptr(next_instruction, R1, ADDRESS), R10); \ +} + +// ============================================================================================= IMM +#define LSL_IMM \ + bool rhs_is_imm = false; \ + static unsigned int rhs_var;\ + u32 imm2 = ((i>>7)&0x1F); \ + reg_pos_ptr(0, rhs_reg, VALUE); \ + if(imm2) emit_lsl(g_out, rhs_reg, imm2); \ + u32 rhs_first = cpu->R[REG_POS(i,0)] << imm2;\ + emit_movimm(g_out, imm2, imm_reg); + +#define S_LSL_IMM \ + bool rhs_is_imm = false; \ + u8 cf_change = 0; \ + static unsigned int rhs_var;\ + u32 imm2 = ((i>>7)&0x1F); \ + reg_pos_ptr(0, rhs_reg, VALUE); \ + if (imm2) \ + { \ + cf_change = 1; \ + emit_lsls(g_out, rhs_reg, imm2); \ + } \ + emit_movimm(g_out, imm2, imm_reg); + +#define LSR_IMM \ + bool rhs_is_imm = false; \ + static unsigned int rhs_var;\ + u32 imm2 = ((i>>7)&0x1F); \ + if(imm2) \ + { \ + reg_pos_ptr(0, rhs_reg, VALUE); \ + emit_lsr(g_out, rhs, imm2); \ + } \ + else \ + emit_movimm(g_out, 0, rhs_reg); \ + u32 rhs_first = imm2 ? cpu->R[REG_POS(i,0)] >> imm2 : 0; \ + emit_movimm(g_out, imm2, imm_reg); + +#define S_LSR_IMM \ + bool rhs_is_imm = false; \ + u8 cf_change = 1; \ + static unsigned int rhs_var; \ + u32 imm2 = ((i>>7)&0x1F); \ + reg_pos_ptr(0, rhs_reg, VALUE); \ + if (!imm2) \ + { \ + emit_testimm(g_out, rhs_reg, (1 << 31)); \ + emit_setnz(g_out, rcf); \ + emit_xor(g_out, rhs_reg, rhs_reg, rhs_reg); \ + } \ + else \ + { \ + emit_lsrs(g_out, rhs_reg, imm2); \ + } \ + emit_movimm(g_out, imm2, imm_reg); + +#define ASR_IMM \ + bool rhs_is_imm = false; \ + static unsigned int rhs_var;\ + u32 imm2 = ((i>>7)&0x1F); \ + reg_pos_ptr(0, rhs_reg, VALUE); \ + if(!imm2) imm2 = 31; \ + emit_asr(g_out, rhs_reg, imm2); \ + u32 rhs_first = (s32)cpu->R[REG_POS(i,0)] >> imm2; \ + emit_movimm(g_out, imm2, imm_reg); + +#define S_ASR_IMM \ + bool rhs_is_imm = false; \ + u8 cf_change = 1; \ + static unsigned int rhs_var; \ + u32 imm2 = ((i>>7)&0x1F); \ + reg_pos_ptr(0, rhs_reg, VALUE); \ + if (!imm2) imm2 = 31; \ + emit_asrs(g_out, rhs_reg, imm2); \ + emit_movimm(g_out, imm2, imm_reg); \ + +#define ROR_IMM \ + bool rhs_is_imm = false; \ + u32 imm2 = ((i>>7)&0x1F); \ + static unsigned int rhs_var; \ + reg_pos_ptr(0, rhs_reg, VALUE); \ + if (!imm2) \ + { \ + GET_CARRY(0) \ + emit_rrx(g_out, rhs_reg, 1); \ + } \ + else \ + emit_ror(g_out, rhs_reg, imm2); \ + u32 rhs_first = imm2?ROR(cpu->R[REG_POS(i,0)], imm2) : ((u32)cpu->CPSR.bits.C<<31)|(cpu->R[REG_POS(i,0)]>>1); \ + emit_movimm(g_out, imm2, imm_reg); + +#define S_ROR_IMM \ + bool rhs_is_imm = false; \ + u8 cf_change = 1; \ + static unsigned int rhs_var; \ + u32 imm2 = ((i>>7)&0x1F); \ + reg_pos_ptr(0, rhs_reg, VALUE); \ + if (!imm2) \ + { \ + GET_CARRY(0) \ + emit_rrxs(g_out, rhs_reg, 1); \ + } \ + else \ + emit_rors(g_out, rhs_reg, imm2); \ + emit_movimm(g_out, imm2, imm_reg); \ + +#define REG_OFF \ + bool rhs_is_imm = false; \ + static unsigned int rhs_var; \ + reg_pos_ptr(0, rhs_reg, VALUE); \ + u32 rhs_first = cpu->R[REG_POS(i,0)]; + +#define IMM_VAL \ + JIT_COMMENT("IMM_VAL"); \ + bool rhs_is_imm = true; \ + u32 rhs_var = ROR((i&0xFF), (i>>7)&0x1E); \ + u32 rhs_first = rhs_var; \ + emit_movimm(g_out, rhs_var, rhs_reg); + +#define S_IMM_VAL \ + bool rhs_is_imm = true; \ + u8 cf_change = 0; \ + static unsigned int rhs_var; \ + rhs_var = ROR((i&0xFF), (i>>7)&0x1E); \ + if ((i>>8)&0xF) \ + { \ + cf_change = 1; \ + emit_movimm(g_out, BIT31(rhs_var), rcf); \ + } \ + u32 rhs_first = rhs_var; \ + emit_movimm(g_out, rhs_var, rhs_reg); + +#define IMM_OFF \ + JIT_COMMENT("IMM_OFF"); \ + bool rhs_is_imm = true; \ + u32 rhs_var = ((i>>4)&0xF0)+(i&0xF); \ + u32 rhs_first = rhs_var; \ + emit_movimm(g_out, rhs_var, rhs_reg); + +#define IMM_OFF_12 \ + JIT_COMMENT("IMM_OFF_12"); \ + bool rhs_is_imm = true; \ + u32 rhs_var = (i & 0xFFF); \ + u32 rhs_first = rhs_var; \ + emit_movimm(g_out, rhs_var, rhs_reg); + +// ============================================================================================= REG +#define LSX_REG(name, x86inst, sign) \ + bool rhs_is_imm = false; \ + static unsigned int rhs_var; \ + reg_pos_ptrB(8, imm_reg, VALUE); \ + reg_pos_ptr(0, rhs_reg, VALUE); \ + x86inst(g_out, rhs_reg, imm_reg); + +#define S_LSX_REG(name, x86inst, sign) \ + bool rhs_is_imm = false; \ + u8 cf_change = 1; \ + GET_CARRY(0); \ + reg_pos_ptr(8, imm_reg, VALUE); \ + reg_pos_ptr(0, rhs_reg, VALUE); \ + x86inst(g_out, rhs_reg, imm_reg); \ + +#define LSL_REG LSX_REG(LSL_REG, emit_lsl_reg, 0) +#define LSR_REG LSX_REG(LSR_REG, emit_lsr_reg, 0) +#define ASR_REG LSX_REG(ASR_REG, emit_asr_reg, 1) +#define S_LSL_REG S_LSX_REG(S_LSL_REG, emit_lsls_reg, 0) +#define S_LSR_REG S_LSX_REG(S_LSR_REG, emit_lsrs_reg, 0) +#define S_ASR_REG S_LSX_REG(S_ASR_REG, emit_asrs_reg, 1) + +#define ROR_REG \ + bool rhs_is_imm = false; \ + static unsigned int rhs_var; \ + reg_pos_ptr(0, rhs_reg, VALUE); \ + reg_pos_ptrB(8, imm_reg, VALUE); \ + emit_ror_reg(g_out, rhs_reg, imm_reg); + +#define S_ROR_REG \ + bool rhs_is_imm = false; \ + bool cf_change = 1; \ + static unsigned int rhs_var; \ + GET_CARRY(0); \ + reg_pos_ptr(8, imm_reg, VALUE); \ + reg_pos_ptr(0, rhs_reg, VALUE); \ + emit_rors_reg(g_out, rhs_reg, imm_reg); \ + +//==================================================================== common funcs +static void emit_MMU_aluMemCycles(int alu_cycles, void *mem_cycles, int population) +{ + if(PROCNUM==ARMCPU_ARM9) + { + if(population < alu_cycles) + { + emit_movimm(g_out, alu_cycles, x_reg); + emit_read_ptr32_reg(g_out, (uintptr_t)mem_cycles, R2); + emit_cmp(g_out, R2, x_reg); + emit_str_ptr_cond(g_out, (uintptr_t)mem_cycles, x_reg, 0xb); + } + } + else + emit_inc_ptr_imm(g_out, (uintptr_t)mem_cycles, alu_cycles); +} + +//----------------------------------------------------------------------------- +// OPs +//----------------------------------------------------------------------------- +#define OP_ARITHMETIC(arg, x86inst, symmetric, flags) \ + arg; \ + if(REG_POS(i,12) == REG_POS(i,16)) \ + { \ + reg_pos_ptr(12, R1, VALUE);\ + x86inst(g_out, R1, rhs, R1); \ + emit_write_ptr32_regptrTO_regFROM(g_out,CACHED_PTR(reg_pos_ptr(12, R2, ADDRESS)), R1); \ + } \ + else if(symmetric && !rhs_is_imm) \ + { \ + x86inst(g_out, rhs_reg, reg_pos_ptr(16, R1, VALUE), rhs_reg); \ + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(12, R1, ADDRESS), rhs_reg); \ + } \ + else \ + { \ + reg_pos_ptr(16, lhs, VALUE); \ + x86inst(g_out, lhs, rhs, lhs); \ + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(12, R1, ADDRESS), lhs_reg); \ + } \ + if(flags) \ + { \ + if(REG_POS(i,12)==15) \ + { \ + S_DST_R15; \ + emit_inc_ptr_imm(g_out, (uintptr_t)&bb_total_cycles, 2); \ + return 1; \ + } \ + SET_NZCV(!symmetric); \ + } \ + else \ + { \ + if(REG_POS(i,12)==15) \ + { \ + reg_ptr(15, tmp_reg, VALUE); \ + emit_write_ptr32_regptrTO_regFROM(g_out, cpu_ptr(next_instruction, R1, ADDRESS), tmp_reg); \ + emit_inc_ptr_imm(g_out, (uintptr_t)&bb_total_cycles, 2); \ + } \ + } \ + return 1; + +#define OP_ARITHMETIC_R(arg, x86inst, flags) \ + arg; \ + emit_mov(g_out, rhs, lhs); \ + x86inst(g_out, lhs, reg_pos_ptr(16, R1, VALUE), lhs); \ + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(12, R1, ADDRESS), lhs); \ + if(flags) \ + { \ + if(REG_POS(i,12)==15) \ + { \ + S_DST_R15; \ + emit_inc_ptr_imm(g_out, (uintptr_t)&bb_total_cycles, 2); \ + return 1; \ + } \ + SET_NZCV(1); \ + } \ + else \ + { \ + if(REG_POS(i,12)==15) \ + { \ + emit_write_ptr32_regptrTO_regFROM(g_out, cpu_ptr(next_instruction, R1, ADDRESS), lhs); \ + emit_inc_ptr_imm(g_out, (uintptr_t)&bb_total_cycles, 2); \ + } \ + } \ + return 1; + +#define OP_ARITHMETIC_S(arg, x86inst, symmetric) \ + arg; \ + if(REG_POS(i,12) == REG_POS(i,16)) \ + { \ + reg_pos_ptr(12, R1, VALUE); \ + x86inst(g_out, R1, rhs, R1); \ + emit_write_ptr32_regptrTO_regFROM(g_out, CACHED_PTR(reg_pos_ptr(12, R2, ADDRESS)), R1); \ + } \ + else if(symmetric && !rhs_is_imm) \ + { \ + x86inst(g_out, rhs_reg, reg_pos_ptr(16, R1, VALUE), rhs_reg); \ + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(12, R1, ADDRESS), rhs_reg); \ + } \ + else \ + { \ + reg_pos_ptr(16, lhs, VALUE); \ + x86inst(g_out, lhs, rhs, lhs); \ + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(12, R1, ADDRESS), lhs); \ + } \ + if(REG_POS(i,12)==15) \ + { \ + S_DST_R15; \ + emit_inc_ptr_imm(g_out, (uintptr_t)&bb_total_cycles, 2); \ + return 1; \ + } \ + SET_NZC; \ + return 1; + +static int OP_AND_LSL_IMM(const u32 i) { OP_ARITHMETIC(LSL_IMM, emit_and, 1, 0); } +static int OP_AND_LSL_REG(const u32 i) { OP_ARITHMETIC(LSL_REG, emit_and, 1, 0); } +static int OP_AND_LSR_IMM(const u32 i) { OP_ARITHMETIC(LSR_IMM, emit_and, 1, 0); } +static int OP_AND_LSR_REG(const u32 i) { OP_ARITHMETIC(LSR_REG, emit_and, 1, 0); } +static int OP_AND_ASR_IMM(const u32 i) { OP_ARITHMETIC(ASR_IMM, emit_and, 1, 0); } +static int OP_AND_ASR_REG(const u32 i) { OP_ARITHMETIC(ASR_REG, emit_and, 1, 0); } +static int OP_AND_ROR_IMM(const u32 i) { OP_ARITHMETIC(ROR_IMM, emit_and, 1, 0); } +static int OP_AND_ROR_REG(const u32 i) { OP_ARITHMETIC(ROR_REG, emit_and, 1, 0); } +static int OP_AND_IMM_VAL(const u32 i) { OP_ARITHMETIC(IMM_VAL, emit_and, 1, 0); } + +static int OP_EOR_LSL_IMM(const u32 i) { OP_ARITHMETIC(LSL_IMM, emit_xor, 1, 0); } +static int OP_EOR_LSL_REG(const u32 i) { OP_ARITHMETIC(LSL_REG, emit_xor, 1, 0); } +static int OP_EOR_LSR_IMM(const u32 i) { OP_ARITHMETIC(LSR_IMM, emit_xor, 1, 0); } +static int OP_EOR_LSR_REG(const u32 i) { OP_ARITHMETIC(LSR_REG, emit_xor, 1, 0); } +static int OP_EOR_ASR_IMM(const u32 i) { OP_ARITHMETIC(ASR_IMM, emit_xor, 1, 0); } +static int OP_EOR_ASR_REG(const u32 i) { OP_ARITHMETIC(ASR_REG, emit_xor, 1, 0); } +static int OP_EOR_ROR_IMM(const u32 i) { OP_ARITHMETIC(ROR_IMM, emit_xor, 1, 0); } +static int OP_EOR_ROR_REG(const u32 i) { OP_ARITHMETIC(ROR_REG, emit_xor, 1, 0); } +static int OP_EOR_IMM_VAL(const u32 i) { OP_ARITHMETIC(IMM_VAL, emit_xor, 1, 0); } + +static int OP_ORR_LSL_IMM(const u32 i) { OP_ARITHMETIC(LSL_IMM, emit_or, 1, 0); } +static int OP_ORR_LSL_REG(const u32 i) { OP_ARITHMETIC(LSL_REG, emit_or, 1, 0); } +static int OP_ORR_LSR_IMM(const u32 i) { OP_ARITHMETIC(LSR_IMM, emit_or, 1, 0); } +static int OP_ORR_LSR_REG(const u32 i) { OP_ARITHMETIC(LSR_REG, emit_or, 1, 0); } +static int OP_ORR_ASR_IMM(const u32 i) { OP_ARITHMETIC(ASR_IMM, emit_or, 1, 0); } +static int OP_ORR_ASR_REG(const u32 i) { OP_ARITHMETIC(ASR_REG, emit_or, 1, 0); } +static int OP_ORR_ROR_IMM(const u32 i) { OP_ARITHMETIC(ROR_IMM, emit_or, 1, 0); } +static int OP_ORR_ROR_REG(const u32 i) { OP_ARITHMETIC(ROR_REG, emit_or, 1, 0); } +static int OP_ORR_IMM_VAL(const u32 i) { OP_ARITHMETIC(IMM_VAL, emit_or, 1, 0); } + +static int OP_ADD_LSL_IMM(const u32 i) { OP_ARITHMETIC(LSL_IMM, emit_add, 1, 0); } +static int OP_ADD_LSL_REG(const u32 i) { OP_ARITHMETIC(LSL_REG, emit_add, 1, 0); } +static int OP_ADD_LSR_IMM(const u32 i) { OP_ARITHMETIC(LSR_IMM, emit_add, 1, 0); } +static int OP_ADD_LSR_REG(const u32 i) { OP_ARITHMETIC(LSR_REG, emit_add, 1, 0); } +static int OP_ADD_ASR_IMM(const u32 i) { OP_ARITHMETIC(ASR_IMM, emit_add, 1, 0); } +static int OP_ADD_ASR_REG(const u32 i) { OP_ARITHMETIC(ASR_REG, emit_add, 1, 0); } +static int OP_ADD_ROR_IMM(const u32 i) { OP_ARITHMETIC(ROR_IMM, emit_add, 1, 0); } +static int OP_ADD_ROR_REG(const u32 i) { OP_ARITHMETIC(ROR_REG, emit_add, 1, 0); } +static int OP_ADD_IMM_VAL(const u32 i) { OP_ARITHMETIC(IMM_VAL, emit_add, 1, 0); } + +static int OP_SUB_LSL_IMM(const u32 i) { OP_ARITHMETIC(LSL_IMM, emit_sub, 0, 0); } +static int OP_SUB_LSL_REG(const u32 i) { OP_ARITHMETIC(LSL_REG, emit_sub, 0, 0); } +static int OP_SUB_LSR_IMM(const u32 i) { OP_ARITHMETIC(LSR_IMM, emit_sub, 0, 0); } +static int OP_SUB_LSR_REG(const u32 i) { OP_ARITHMETIC(LSR_REG, emit_sub, 0, 0); } +static int OP_SUB_ASR_IMM(const u32 i) { OP_ARITHMETIC(ASR_IMM, emit_sub, 0, 0); } +static int OP_SUB_ASR_REG(const u32 i) { OP_ARITHMETIC(ASR_REG, emit_sub, 0, 0); } +static int OP_SUB_ROR_IMM(const u32 i) { OP_ARITHMETIC(ROR_IMM, emit_sub, 0, 0); } +static int OP_SUB_ROR_REG(const u32 i) { OP_ARITHMETIC(ROR_REG, emit_sub, 0, 0); } +static int OP_SUB_IMM_VAL(const u32 i) { OP_ARITHMETIC(IMM_VAL, emit_sub, 0, 0); } + +static int OP_RSB_LSL_IMM(const u32 i) { OP_ARITHMETIC_R(LSL_IMM, emit_sub, 0); } +static int OP_RSB_LSL_REG(const u32 i) { OP_ARITHMETIC_R(LSL_REG, emit_sub, 0); } +static int OP_RSB_LSR_IMM(const u32 i) { OP_ARITHMETIC_R(LSR_IMM, emit_sub, 0); } +static int OP_RSB_LSR_REG(const u32 i) { OP_ARITHMETIC_R(LSR_REG, emit_sub, 0); } +static int OP_RSB_ASR_IMM(const u32 i) { OP_ARITHMETIC_R(ASR_IMM, emit_sub, 0); } +static int OP_RSB_ASR_REG(const u32 i) { OP_ARITHMETIC_R(ASR_REG, emit_sub, 0); } +static int OP_RSB_ROR_IMM(const u32 i) { OP_ARITHMETIC_R(ROR_IMM, emit_sub, 0); } +static int OP_RSB_ROR_REG(const u32 i) { OP_ARITHMETIC_R(ROR_REG, emit_sub, 0); } +static int OP_RSB_IMM_VAL(const u32 i) { OP_ARITHMETIC_R(IMM_VAL, emit_sub, 0); } + +// ================================ S instructions +static int OP_AND_S_LSL_IMM(const u32 i) { OP_ARITHMETIC_S(S_LSL_IMM, emit_ands, 1); } +static int OP_AND_S_LSL_REG(const u32 i) { OP_ARITHMETIC_S(S_LSL_REG, emit_ands, 1); } +static int OP_AND_S_LSR_IMM(const u32 i) { OP_ARITHMETIC_S(S_LSR_IMM, emit_ands, 1); } +static int OP_AND_S_LSR_REG(const u32 i) { OP_ARITHMETIC_S(S_LSR_REG, emit_ands, 1); } +static int OP_AND_S_ASR_IMM(const u32 i) { OP_ARITHMETIC_S(S_ASR_IMM, emit_ands, 1); } +static int OP_AND_S_ASR_REG(const u32 i) { OP_ARITHMETIC_S(S_ASR_REG, emit_ands, 1); } +static int OP_AND_S_ROR_IMM(const u32 i) { OP_ARITHMETIC_S(S_ROR_IMM, emit_ands, 1); } +static int OP_AND_S_ROR_REG(const u32 i) { OP_ARITHMETIC_S(S_ROR_REG, emit_ands, 1); } +static int OP_AND_S_IMM_VAL(const u32 i) { OP_ARITHMETIC_S(S_IMM_VAL, emit_ands, 1); } + +static int OP_EOR_S_LSL_IMM(const u32 i) { OP_ARITHMETIC_S(S_LSL_IMM, emit_xors, 1); } +static int OP_EOR_S_LSL_REG(const u32 i) { OP_ARITHMETIC_S(S_LSL_REG, emit_xors, 1); } +static int OP_EOR_S_LSR_IMM(const u32 i) { OP_ARITHMETIC_S(S_LSR_IMM, emit_xors, 1); } +static int OP_EOR_S_LSR_REG(const u32 i) { OP_ARITHMETIC_S(S_LSR_REG, emit_xors, 1); } +static int OP_EOR_S_ASR_IMM(const u32 i) { OP_ARITHMETIC_S(S_ASR_IMM, emit_xors, 1); } +static int OP_EOR_S_ASR_REG(const u32 i) { OP_ARITHMETIC_S(S_ASR_REG, emit_xors, 1); } +static int OP_EOR_S_ROR_IMM(const u32 i) { OP_ARITHMETIC_S(S_ROR_IMM, emit_xors, 1); } +static int OP_EOR_S_ROR_REG(const u32 i) { OP_ARITHMETIC_S(S_ROR_REG, emit_xors, 1); } +static int OP_EOR_S_IMM_VAL(const u32 i) { OP_ARITHMETIC_S(S_IMM_VAL, emit_xors, 1); } + +static int OP_ORR_S_LSL_IMM(const u32 i) { OP_ARITHMETIC_S(S_LSL_IMM, emit_ors, 1); } +static int OP_ORR_S_LSL_REG(const u32 i) { OP_ARITHMETIC_S(S_LSL_REG, emit_ors, 1); } +static int OP_ORR_S_LSR_IMM(const u32 i) { OP_ARITHMETIC_S(S_LSR_IMM, emit_ors, 1); } +static int OP_ORR_S_LSR_REG(const u32 i) { OP_ARITHMETIC_S(S_LSR_REG, emit_ors, 1); } +static int OP_ORR_S_ASR_IMM(const u32 i) { OP_ARITHMETIC_S(S_ASR_IMM, emit_ors, 1); } +static int OP_ORR_S_ASR_REG(const u32 i) { OP_ARITHMETIC_S(S_ASR_REG, emit_ors, 1); } +static int OP_ORR_S_ROR_IMM(const u32 i) { OP_ARITHMETIC_S(S_ROR_IMM, emit_ors, 1); } +static int OP_ORR_S_ROR_REG(const u32 i) { OP_ARITHMETIC_S(S_ROR_REG, emit_ors, 1); } +static int OP_ORR_S_IMM_VAL(const u32 i) { OP_ARITHMETIC_S(S_IMM_VAL, emit_ors, 1); } + +static int OP_ADD_S_LSL_IMM(const u32 i) { OP_ARITHMETIC(LSL_IMM, emit_adds, 1, 1); } +static int OP_ADD_S_LSL_REG(const u32 i) { OP_ARITHMETIC(LSL_REG, emit_adds, 1, 1); } +static int OP_ADD_S_LSR_IMM(const u32 i) { OP_ARITHMETIC(LSR_IMM, emit_adds, 1, 1); } +static int OP_ADD_S_LSR_REG(const u32 i) { OP_ARITHMETIC(LSR_REG, emit_adds, 1, 1); } +static int OP_ADD_S_ASR_IMM(const u32 i) { OP_ARITHMETIC(ASR_IMM, emit_adds, 1, 1); } +static int OP_ADD_S_ASR_REG(const u32 i) { OP_ARITHMETIC(ASR_REG, emit_adds, 1, 1); } +static int OP_ADD_S_ROR_IMM(const u32 i) { OP_ARITHMETIC(ROR_IMM, emit_adds, 1, 1); } +static int OP_ADD_S_ROR_REG(const u32 i) { OP_ARITHMETIC(ROR_REG, emit_adds, 1, 1); } +static int OP_ADD_S_IMM_VAL(const u32 i) { OP_ARITHMETIC(IMM_VAL, emit_adds, 1, 1); } + +static int OP_SUB_S_LSL_IMM(const u32 i) { OP_ARITHMETIC(LSL_IMM, emit_subs, 0, 1); } +static int OP_SUB_S_LSL_REG(const u32 i) { OP_ARITHMETIC(LSL_REG, emit_subs, 0, 1); } +static int OP_SUB_S_LSR_IMM(const u32 i) { OP_ARITHMETIC(LSR_IMM, emit_subs, 0, 1); } +static int OP_SUB_S_LSR_REG(const u32 i) { OP_ARITHMETIC(LSR_REG, emit_subs, 0, 1); } +static int OP_SUB_S_ASR_IMM(const u32 i) { OP_ARITHMETIC(ASR_IMM, emit_subs, 0, 1); } +static int OP_SUB_S_ASR_REG(const u32 i) { OP_ARITHMETIC(ASR_REG, emit_subs, 0, 1); } +static int OP_SUB_S_ROR_IMM(const u32 i) { OP_ARITHMETIC(ROR_IMM, emit_subs, 0, 1); } +static int OP_SUB_S_ROR_REG(const u32 i) { OP_ARITHMETIC(ROR_REG, emit_subs, 0, 1); } +static int OP_SUB_S_IMM_VAL(const u32 i) { OP_ARITHMETIC(IMM_VAL, emit_subs, 0, 1); } + +static int OP_RSB_S_LSL_IMM(const u32 i) { OP_ARITHMETIC_R(LSL_IMM, emit_subs, 1); } +static int OP_RSB_S_LSL_REG(const u32 i) { OP_ARITHMETIC_R(LSL_REG, emit_subs, 1); } +static int OP_RSB_S_LSR_IMM(const u32 i) { OP_ARITHMETIC_R(LSR_IMM, emit_subs, 1); } +static int OP_RSB_S_LSR_REG(const u32 i) { OP_ARITHMETIC_R(LSR_REG, emit_subs, 1); } +static int OP_RSB_S_ASR_IMM(const u32 i) { OP_ARITHMETIC_R(ASR_IMM, emit_subs, 1); } +static int OP_RSB_S_ASR_REG(const u32 i) { OP_ARITHMETIC_R(ASR_REG, emit_subs, 1); } +static int OP_RSB_S_ROR_IMM(const u32 i) { OP_ARITHMETIC_R(ROR_IMM, emit_subs, 1); } +static int OP_RSB_S_ROR_REG(const u32 i) { OP_ARITHMETIC_R(ROR_REG, emit_subs, 1); } +static int OP_RSB_S_IMM_VAL(const u32 i) { OP_ARITHMETIC_R(IMM_VAL, emit_subs, 1); } + +static int OP_ADC_LSL_IMM(const u32 i) { OP_ARITHMETIC(LSL_IMM; GET_CARRY(0), emit_adcs, 1, 0); } +static int OP_ADC_LSL_REG(const u32 i) { OP_ARITHMETIC(LSL_REG; GET_CARRY(0), emit_adcs, 1, 0); } +static int OP_ADC_LSR_IMM(const u32 i) { OP_ARITHMETIC(LSR_IMM; GET_CARRY(0), emit_adcs, 1, 0); } +static int OP_ADC_LSR_REG(const u32 i) { OP_ARITHMETIC(LSR_REG; GET_CARRY(0), emit_adcs, 1, 0); } +static int OP_ADC_ASR_IMM(const u32 i) { OP_ARITHMETIC(ASR_IMM; GET_CARRY(0), emit_adcs, 1, 0); } +static int OP_ADC_ASR_REG(const u32 i) { OP_ARITHMETIC(ASR_REG; GET_CARRY(0), emit_adcs, 1, 0); } +static int OP_ADC_ROR_IMM(const u32 i) { OP_ARITHMETIC(ROR_IMM; GET_CARRY(0), emit_adcs, 1, 0); } +static int OP_ADC_ROR_REG(const u32 i) { OP_ARITHMETIC(ROR_REG; GET_CARRY(0), emit_adcs, 1, 0); } +static int OP_ADC_IMM_VAL(const u32 i) { OP_ARITHMETIC(IMM_VAL; GET_CARRY(0), emit_adcs, 1, 0); } + +static int OP_ADC_S_LSL_IMM(const u32 i) { OP_ARITHMETIC(LSL_IMM; GET_CARRY(0), emit_adcs, 1, 1); } +static int OP_ADC_S_LSL_REG(const u32 i) { OP_ARITHMETIC(LSL_REG; GET_CARRY(0), emit_adcs, 1, 1); } +static int OP_ADC_S_LSR_IMM(const u32 i) { OP_ARITHMETIC(LSR_IMM; GET_CARRY(0), emit_adcs, 1, 1); } +static int OP_ADC_S_LSR_REG(const u32 i) { OP_ARITHMETIC(LSR_REG; GET_CARRY(0), emit_adcs, 1, 1); } +static int OP_ADC_S_ASR_IMM(const u32 i) { OP_ARITHMETIC(ASR_IMM; GET_CARRY(0), emit_adcs, 1, 1); } +static int OP_ADC_S_ASR_REG(const u32 i) { OP_ARITHMETIC(ASR_REG; GET_CARRY(0), emit_adcs, 1, 1); } +static int OP_ADC_S_ROR_IMM(const u32 i) { OP_ARITHMETIC(ROR_IMM; GET_CARRY(0), emit_adcs, 1, 1); } +static int OP_ADC_S_ROR_REG(const u32 i) { OP_ARITHMETIC(ROR_REG; GET_CARRY(0), emit_adcs, 1, 1); } +static int OP_ADC_S_IMM_VAL(const u32 i) { OP_ARITHMETIC(IMM_VAL; GET_CARRY(0), emit_adcs, 1, 1); } + +static int OP_SBC_LSL_IMM(const u32 i) { OP_ARITHMETIC(LSL_IMM; GET_CARRY(1), emit_sbc, 0, 0); } +static int OP_SBC_LSL_REG(const u32 i) { OP_ARITHMETIC(LSL_REG; GET_CARRY(1), emit_sbc, 0, 0); } +static int OP_SBC_LSR_IMM(const u32 i) { OP_ARITHMETIC(LSR_IMM; GET_CARRY(1), emit_sbc, 0, 0); } +static int OP_SBC_LSR_REG(const u32 i) { OP_ARITHMETIC(LSR_REG; GET_CARRY(1), emit_sbc, 0, 0); } +static int OP_SBC_ASR_IMM(const u32 i) { OP_ARITHMETIC(ASR_IMM; GET_CARRY(1), emit_sbc, 0, 0); } +static int OP_SBC_ASR_REG(const u32 i) { OP_ARITHMETIC(ASR_REG; GET_CARRY(1), emit_sbc, 0, 0); } +static int OP_SBC_ROR_IMM(const u32 i) { OP_ARITHMETIC(ROR_IMM; GET_CARRY(1), emit_sbc, 0, 0); } +static int OP_SBC_ROR_REG(const u32 i) { OP_ARITHMETIC(ROR_REG; GET_CARRY(1), emit_sbc, 0, 0); } +static int OP_SBC_IMM_VAL(const u32 i) { OP_ARITHMETIC(IMM_VAL; GET_CARRY(1), emit_sbc, 0, 0); } + +static int OP_SBC_S_LSL_IMM(const u32 i) { OP_ARITHMETIC(LSL_IMM; GET_CARRY(1), emit_sbcs, 0, 1); } +static int OP_SBC_S_LSL_REG(const u32 i) { OP_ARITHMETIC(LSL_REG; GET_CARRY(1), emit_sbcs, 0, 1); } +static int OP_SBC_S_LSR_IMM(const u32 i) { OP_ARITHMETIC(LSR_IMM; GET_CARRY(1), emit_sbcs, 0, 1); } +static int OP_SBC_S_LSR_REG(const u32 i) { OP_ARITHMETIC(LSR_REG; GET_CARRY(1), emit_sbcs, 0, 1); } +static int OP_SBC_S_ASR_IMM(const u32 i) { OP_ARITHMETIC(ASR_IMM; GET_CARRY(1), emit_sbcs, 0, 1); } +static int OP_SBC_S_ASR_REG(const u32 i) { OP_ARITHMETIC(ASR_REG; GET_CARRY(1), emit_sbcs, 0, 1); } +static int OP_SBC_S_ROR_IMM(const u32 i) { OP_ARITHMETIC(ROR_IMM; GET_CARRY(1), emit_sbcs, 0, 1); } +static int OP_SBC_S_ROR_REG(const u32 i) { OP_ARITHMETIC(ROR_REG; GET_CARRY(1), emit_sbcs, 0, 1); } +static int OP_SBC_S_IMM_VAL(const u32 i) { OP_ARITHMETIC(IMM_VAL; GET_CARRY(1), emit_sbcs, 0, 1); } + +static int OP_RSC_LSL_IMM(const u32 i) { OP_ARITHMETIC_R(LSL_IMM; GET_CARRY(1), emit_sbc, 0); } +static int OP_RSC_LSL_REG(const u32 i) { OP_ARITHMETIC_R(LSL_REG; GET_CARRY(1), emit_sbc, 0); } +static int OP_RSC_LSR_IMM(const u32 i) { OP_ARITHMETIC_R(LSR_IMM; GET_CARRY(1), emit_sbc, 0); } +static int OP_RSC_LSR_REG(const u32 i) { OP_ARITHMETIC_R(LSR_REG; GET_CARRY(1), emit_sbc, 0); } +static int OP_RSC_ASR_IMM(const u32 i) { OP_ARITHMETIC_R(ASR_IMM; GET_CARRY(1), emit_sbc, 0); } +static int OP_RSC_ASR_REG(const u32 i) { OP_ARITHMETIC_R(ASR_REG; GET_CARRY(1), emit_sbc, 0); } +static int OP_RSC_ROR_IMM(const u32 i) { OP_ARITHMETIC_R(ROR_IMM; GET_CARRY(1), emit_sbc, 0); } +static int OP_RSC_ROR_REG(const u32 i) { OP_ARITHMETIC_R(ROR_REG; GET_CARRY(1), emit_sbc, 0); } +static int OP_RSC_IMM_VAL(const u32 i) { OP_ARITHMETIC_R(IMM_VAL; GET_CARRY(1), emit_sbc, 0); } + +static int OP_RSC_S_LSL_IMM(const u32 i) { OP_ARITHMETIC_R(LSL_IMM; GET_CARRY(1), emit_sbcs, 1); } +static int OP_RSC_S_LSL_REG(const u32 i) { OP_ARITHMETIC_R(LSL_REG; GET_CARRY(1), emit_sbcs, 1); } +static int OP_RSC_S_LSR_IMM(const u32 i) { OP_ARITHMETIC_R(LSR_IMM; GET_CARRY(1), emit_sbcs, 1); } +static int OP_RSC_S_LSR_REG(const u32 i) { OP_ARITHMETIC_R(LSR_REG; GET_CARRY(1), emit_sbcs, 1); } +static int OP_RSC_S_ASR_IMM(const u32 i) { OP_ARITHMETIC_R(ASR_IMM; GET_CARRY(1), emit_sbcs, 1); } +static int OP_RSC_S_ASR_REG(const u32 i) { OP_ARITHMETIC_R(ASR_REG; GET_CARRY(1), emit_sbcs, 1); } +static int OP_RSC_S_ROR_IMM(const u32 i) { OP_ARITHMETIC_R(ROR_IMM; GET_CARRY(1), emit_sbcs, 1); } +static int OP_RSC_S_ROR_REG(const u32 i) { OP_ARITHMETIC_R(ROR_REG; GET_CARRY(1), emit_sbcs, 1); } +static int OP_RSC_S_IMM_VAL(const u32 i) { OP_ARITHMETIC_R(IMM_VAL; GET_CARRY(1), emit_sbcs, 1); } + +static int OP_BIC_LSL_IMM(const u32 i) { OP_ARITHMETIC(LSL_IMM; emit_not(g_out, rhs_reg, rhs_reg), emit_and, 1, 0); } +static int OP_BIC_LSL_REG(const u32 i) { OP_ARITHMETIC(LSL_REG; emit_not(g_out, rhs_reg, rhs_reg), emit_and, 1, 0); } +static int OP_BIC_LSR_IMM(const u32 i) { OP_ARITHMETIC(LSR_IMM; emit_not(g_out, rhs_reg, rhs_reg), emit_and, 1, 0); } +static int OP_BIC_LSR_REG(const u32 i) { OP_ARITHMETIC(LSR_REG; emit_not(g_out, rhs_reg, rhs_reg), emit_and, 1, 0); } +static int OP_BIC_ASR_IMM(const u32 i) { OP_ARITHMETIC(ASR_IMM; emit_not(g_out, rhs_reg, rhs_reg), emit_and, 1, 0); } +static int OP_BIC_ASR_REG(const u32 i) { OP_ARITHMETIC(ASR_REG; emit_not(g_out, rhs_reg, rhs_reg), emit_and, 1, 0); } +static int OP_BIC_ROR_IMM(const u32 i) { OP_ARITHMETIC(ROR_IMM; emit_not(g_out, rhs_reg, rhs_reg), emit_and, 1, 0); } +static int OP_BIC_ROR_REG(const u32 i) { OP_ARITHMETIC(ROR_REG; emit_not(g_out, rhs_reg, rhs_reg), emit_and, 1, 0); } +static int OP_BIC_IMM_VAL(const u32 i) { OP_ARITHMETIC(IMM_VAL; rhs_var = ~rhs_var; emit_movimm(g_out, rhs_var, rhs_reg);, emit_and, 1, 0); } + +static int OP_BIC_S_LSL_IMM(const u32 i) { OP_ARITHMETIC_S(S_LSL_IMM; emit_not(g_out, rhs_reg, rhs_reg), emit_ands, 1); } +static int OP_BIC_S_LSL_REG(const u32 i) { OP_ARITHMETIC_S(S_LSL_REG; emit_not(g_out, rhs_reg, rhs_reg), emit_ands, 1); } +static int OP_BIC_S_LSR_IMM(const u32 i) { OP_ARITHMETIC_S(S_LSR_IMM; emit_not(g_out, rhs_reg, rhs_reg), emit_ands, 1); } +static int OP_BIC_S_LSR_REG(const u32 i) { OP_ARITHMETIC_S(S_LSR_REG; emit_not(g_out, rhs_reg, rhs_reg), emit_ands, 1); } +static int OP_BIC_S_ASR_IMM(const u32 i) { OP_ARITHMETIC_S(S_ASR_IMM; emit_not(g_out, rhs_reg, rhs_reg), emit_ands, 1); } +static int OP_BIC_S_ASR_REG(const u32 i) { OP_ARITHMETIC_S(S_ASR_REG; emit_not(g_out, rhs_reg, rhs_reg), emit_ands, 1); } +static int OP_BIC_S_ROR_IMM(const u32 i) { OP_ARITHMETIC_S(S_ROR_IMM; emit_not(g_out, rhs_reg, rhs_reg), emit_ands, 1); } +static int OP_BIC_S_ROR_REG(const u32 i) { OP_ARITHMETIC_S(S_ROR_REG; emit_not(g_out, rhs_reg, rhs_reg), emit_ands, 1); } +static int OP_BIC_S_IMM_VAL(const u32 i) { OP_ARITHMETIC_S(S_IMM_VAL; rhs_var = ~rhs_var; emit_movimm(g_out, rhs_var, rhs_reg);, emit_ands, 1); } + +//----------------------------------------------------------------------------- +// TST +//----------------------------------------------------------------------------- +#define OP_TST_(arg) \ + arg; \ + emit_test(g_out, reg_pos_ptr(16, R1, VALUE), rhs_reg); \ + SET_NZC; \ + return 1; + +static int OP_TST_LSL_IMM(const u32 i) { OP_TST_(S_LSL_IMM); } +static int OP_TST_LSL_REG(const u32 i) { OP_TST_(S_LSL_REG); } +static int OP_TST_LSR_IMM(const u32 i) { OP_TST_(S_LSR_IMM); } +static int OP_TST_LSR_REG(const u32 i) { OP_TST_(S_LSR_REG); } +static int OP_TST_ASR_IMM(const u32 i) { OP_TST_(S_ASR_IMM); } +static int OP_TST_ASR_REG(const u32 i) { OP_TST_(S_ASR_REG); } +static int OP_TST_ROR_IMM(const u32 i) { OP_TST_(S_ROR_IMM); } +static int OP_TST_ROR_REG(const u32 i) { OP_TST_(S_ROR_REG); } +static int OP_TST_IMM_VAL(const u32 i) { OP_TST_(S_IMM_VAL); } + +//----------------------------------------------------------------------------- +// TEQ +//----------------------------------------------------------------------------- +#define OP_TEQ_(arg) \ + arg; \ + emit_teq(g_out, reg_pos_ptr(16, R1, VALUE), rhs_reg); \ + SET_NZC; \ + return 1; + +static int OP_TEQ_LSL_IMM(const u32 i) { OP_TEQ_(S_LSL_IMM); } +static int OP_TEQ_LSL_REG(const u32 i) { OP_TEQ_(S_LSL_REG); } +static int OP_TEQ_LSR_IMM(const u32 i) { OP_TEQ_(S_LSR_IMM); } +static int OP_TEQ_LSR_REG(const u32 i) { OP_TEQ_(S_LSR_REG); } +static int OP_TEQ_ASR_IMM(const u32 i) { OP_TEQ_(S_ASR_IMM); } +static int OP_TEQ_ASR_REG(const u32 i) { OP_TEQ_(S_ASR_REG); } +static int OP_TEQ_ROR_IMM(const u32 i) { OP_TEQ_(S_ROR_IMM); } +static int OP_TEQ_ROR_REG(const u32 i) { OP_TEQ_(S_ROR_REG); } +static int OP_TEQ_IMM_VAL(const u32 i) { OP_TEQ_(S_IMM_VAL); } + +//----------------------------------------------------------------------------- +// CMP +//----------------------------------------------------------------------------- +#define OP_CMP(arg) \ + arg; \ + emit_cmp(g_out, reg_pos_ptr(16, R1, VALUE), rhs); \ + SET_NZCV(1); \ + return 1; + +static int OP_CMP_LSL_IMM(const u32 i) { OP_CMP(LSL_IMM); } +static int OP_CMP_LSL_REG(const u32 i) { OP_CMP(LSL_REG); } +static int OP_CMP_LSR_IMM(const u32 i) { OP_CMP(LSR_IMM); } +static int OP_CMP_LSR_REG(const u32 i) { OP_CMP(LSR_REG); } +static int OP_CMP_ASR_IMM(const u32 i) { OP_CMP(ASR_IMM); } +static int OP_CMP_ASR_REG(const u32 i) { OP_CMP(ASR_REG); } +static int OP_CMP_ROR_IMM(const u32 i) { OP_CMP(ROR_IMM); } +static int OP_CMP_ROR_REG(const u32 i) { OP_CMP(ROR_REG); } +static int OP_CMP_IMM_VAL(const u32 i) { OP_CMP(IMM_VAL); } +#undef OP_CMP + +//----------------------------------------------------------------------------- +// CMN +//----------------------------------------------------------------------------- +#define OP_CMN(arg) \ + arg; \ + u32 rhs_imm = *(u32*)&rhs_var; \ + int sign = rhs_is_imm && (rhs_imm != -rhs_imm); \ + if(sign) \ + emit_cmpimm(g_out, reg_pos_ptr(16, R1, VALUE), -rhs_imm); \ + else \ + { \ + reg_pos_ptr(16, lhs, VALUE);\ + emit_adds(g_out, lhs, rhs, lhs); \ + } \ + SET_NZCV(sign); \ + return 1; + +static int OP_CMN_LSL_IMM(const u32 i) { OP_CMN(LSL_IMM); } +static int OP_CMN_LSL_REG(const u32 i) { OP_CMN(LSL_REG); } +static int OP_CMN_LSR_IMM(const u32 i) { OP_CMN(LSR_IMM); } +static int OP_CMN_LSR_REG(const u32 i) { OP_CMN(LSR_REG); } +static int OP_CMN_ASR_IMM(const u32 i) { OP_CMN(ASR_IMM); } +static int OP_CMN_ASR_REG(const u32 i) { OP_CMN(ASR_REG); } +static int OP_CMN_ROR_IMM(const u32 i) { OP_CMN(ROR_IMM); } +static int OP_CMN_ROR_REG(const u32 i) { OP_CMN(ROR_REG); } +static int OP_CMN_IMM_VAL(const u32 i) { OP_CMN(IMM_VAL); } +#undef OP_CMN + +//----------------------------------------------------------------------------- +// MOV +//----------------------------------------------------------------------------- +#define OP_MOV(arg) \ + arg; \ + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(12, R1, ADDRESS), rhs);\ + if(REG_POS(i,12)==15) \ + { \ + emit_write_ptr32_regptrTO_regFROM(g_out, cpu_ptr(next_instruction, R1, ADDRESS), rhs);\ + return 1; \ + } \ + return 1; + +static int OP_MOV_LSL_IMM(const u32 i) { if (i == 0xE1A00000) { /* nop */ JIT_COMMENT("nop"); return 1; } OP_MOV(LSL_IMM); } +static int OP_MOV_LSL_REG(const u32 i) { OP_MOV(LSL_REG; if (REG_POS(i,0) == 15) emit_addimm(g_out, rhs, 4, rhs);); } +static int OP_MOV_LSR_IMM(const u32 i) { OP_MOV(LSR_IMM); } +static int OP_MOV_LSR_REG(const u32 i) { OP_MOV(LSR_REG; if (REG_POS(i,0) == 15) emit_addimm(g_out, rhs, 4, rhs);); } +static int OP_MOV_ASR_IMM(const u32 i) { OP_MOV(ASR_IMM); } +static int OP_MOV_ASR_REG(const u32 i) { OP_MOV(ASR_REG); } +static int OP_MOV_ROR_IMM(const u32 i) { OP_MOV(ROR_IMM); } +static int OP_MOV_ROR_REG(const u32 i) { OP_MOV(ROR_REG); } +static int OP_MOV_IMM_VAL(const u32 i) { OP_MOV(IMM_VAL); } + +#define OP_MOV_S(arg) \ + arg; \ + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(12, R1, ADDRESS), rhs);\ + if(REG_POS(i,12)==15) \ + { \ + S_DST_R15; \ + emit_inc_ptr_imm(g_out, (uintptr_t)&bb_total_cycles, 2); \ + return 1; \ + } \ + if(!rhs_is_imm) \ + emit_cmpimm(g_out, rhs_reg, 0); \ + else \ + emit_cmpimm(g_out, reg_pos_ptr(12, R1, VALUE), 0); \ + SET_NZC; \ + return 1; + +static int OP_MOV_S_LSL_IMM(const u32 i) { OP_MOV_S(S_LSL_IMM); } +static int OP_MOV_S_LSL_REG(const u32 i) { OP_MOV_S(S_LSL_REG; if (REG_POS(i,0) == 15) emit_addimm(g_out, rhs_reg, 4, rhs_reg);); } +static int OP_MOV_S_LSR_IMM(const u32 i) { OP_MOV_S(S_LSR_IMM); } +static int OP_MOV_S_LSR_REG(const u32 i) { OP_MOV_S(S_LSR_REG; if (REG_POS(i,0) == 15) emit_addimm(g_out, rhs_reg, 4, rhs_reg);); } +static int OP_MOV_S_ASR_IMM(const u32 i) { OP_MOV_S(S_ASR_IMM); } +static int OP_MOV_S_ASR_REG(const u32 i) { OP_MOV_S(S_ASR_REG); } +static int OP_MOV_S_ROR_IMM(const u32 i) { OP_MOV_S(S_ROR_IMM); } +static int OP_MOV_S_ROR_REG(const u32 i) { OP_MOV_S(S_ROR_REG); } +static int OP_MOV_S_IMM_VAL(const u32 i) { OP_MOV_S(S_IMM_VAL); } + +//----------------------------------------------------------------------------- +// MVN +//----------------------------------------------------------------------------- +static int OP_MVN_LSL_IMM(const u32 i) { OP_MOV(LSL_IMM; emit_not(g_out, rhs_reg, rhs_reg)); } +static int OP_MVN_LSL_REG(const u32 i) { OP_MOV(LSL_REG; emit_not(g_out, rhs_reg, rhs_reg)); } +static int OP_MVN_LSR_IMM(const u32 i) { OP_MOV(LSR_IMM; emit_not(g_out, rhs_reg, rhs_reg)); } +static int OP_MVN_LSR_REG(const u32 i) { OP_MOV(LSR_REG; emit_not(g_out, rhs_reg, rhs_reg)); } +static int OP_MVN_ASR_IMM(const u32 i) { OP_MOV(ASR_IMM; emit_not(g_out, rhs_reg, rhs_reg)); } +static int OP_MVN_ASR_REG(const u32 i) { OP_MOV(ASR_REG; emit_not(g_out, rhs_reg, rhs_reg)); } +static int OP_MVN_ROR_IMM(const u32 i) { OP_MOV(ROR_IMM; emit_not(g_out, rhs_reg, rhs_reg)); } +static int OP_MVN_ROR_REG(const u32 i) { OP_MOV(ROR_REG; emit_not(g_out, rhs_reg, rhs_reg)); } +static int OP_MVN_IMM_VAL(const u32 i) { OP_MOV(IMM_VAL; rhs_var = ~rhs_var; emit_movimm(g_out, rhs_var, rhs_reg);); } + +static int OP_MVN_S_LSL_IMM(const u32 i) { OP_MOV_S(S_LSL_IMM; emit_not(g_out, rhs_reg, rhs_reg)); } +static int OP_MVN_S_LSL_REG(const u32 i) { OP_MOV_S(S_LSL_REG; emit_not(g_out, rhs_reg, rhs_reg)); } +static int OP_MVN_S_LSR_IMM(const u32 i) { OP_MOV_S(S_LSR_IMM; emit_not(g_out, rhs_reg, rhs_reg)); } +static int OP_MVN_S_LSR_REG(const u32 i) { OP_MOV_S(S_LSR_REG; emit_not(g_out, rhs_reg, rhs_reg)); } +static int OP_MVN_S_ASR_IMM(const u32 i) { OP_MOV_S(S_ASR_IMM; emit_not(g_out, rhs_reg, rhs_reg)); } +static int OP_MVN_S_ASR_REG(const u32 i) { OP_MOV_S(S_ASR_REG; emit_not(g_out, rhs_reg, rhs_reg)); } +static int OP_MVN_S_ROR_IMM(const u32 i) { OP_MOV_S(S_ROR_IMM; emit_not(g_out, rhs_reg, rhs_reg)); } +static int OP_MVN_S_ROR_REG(const u32 i) { OP_MOV_S(S_ROR_REG; emit_not(g_out, rhs_reg, rhs_reg)); } +static int OP_MVN_S_IMM_VAL(const u32 i) { OP_MOV_S(S_IMM_VAL; rhs_var = ~rhs_var; emit_movimm(g_out, rhs_var, rhs_reg);); } + +//----------------------------------------------------------------------------- +// QADD / QDADD / QSUB / QDSUB +//----------------------------------------------------------------------------- +// TODO +static int OP_QADD(const u32 i) { printf("JIT: unimplemented OP_QADD\n"); return 0; } +static int OP_QSUB(const u32 i) { printf("JIT: unimplemented OP_QSUB\n"); return 0; } +static int OP_QDADD(const u32 i) { printf("JIT: unimplemented OP_QDADD\n"); return 0; } +static int OP_QDSUB(const u32 i) { printf("JIT: unimplemented OP_QDSUB\n"); return 0; } + +//----------------------------------------------------------------------------- +// MUL +//----------------------------------------------------------------------------- +static void MUL_Mxx_END(int reg, bool sign, int cycles) +{ + if(sign) + { + emit_mov(g_out, reg, y); + emit_asr(g_out, reg, 31); + emit_xor(g_out, reg, y, reg); + } + emit_orimm(g_out, reg, 1, reg); + + emit_movimm(g_out, 31, R0); + emit_clz(g_out, reg, tmp); + emit_sub(g_out, R0, tmp, tmp); + emit_lsr(g_out, tmp, 3); + emit_addimm(g_out, tmp, cycles+1, tmp); + emit_write_ptr32_reg(g_out, (uintptr_t)&bb_cycles, tmp); +} + +#define OP_MUL_(op, width, sign, accum, flags) \ + reg_pos_ptr(0, lhs, VALUE); \ + reg_pos_ptr(8, rhs, VALUE); \ + op; \ + if(width && accum) \ + { \ + if(flags) \ + { \ + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(12, R3, ADDRESS), R0); \ + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(16, R3, ADDRESS), R1); \ + } \ + else \ + { \ + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(12, R3, ADDRESS), R0); \ + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(16, R3, ADDRESS), R1); \ + } \ + } \ + else if(width) \ + { \ + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(12, R3, ADDRESS), R0); \ + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(16, R3, ADDRESS), R1); \ + } \ + else \ + { \ + if (accum) { emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(16, R3, ADDRESS), hi); } \ + else { emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(16, R3, ADDRESS), lhs); } \ + } \ + if (flags) { SET_NZ(0); } \ + MUL_Mxx_END(rhs, sign, 1+width+accum); \ + return 1;\ + +#define hi R2 + +static int OP_MUL(const u32 i) { OP_MUL_(emit_mul(g_out, lhs,rhs), 0, 1, 0, 0); } +static int OP_MLA(const u32 i) { OP_MUL_(emit_mla(g_out, hi,reg_pos_ptr(12, R0, VALUE),lhs,rhs), 0, 1, 1, 0); } +static int OP_UMULL(const u32 i) { OP_MUL_(emit_umull(g_out, R0, R1,lhs,rhs), 1, 0, 0, 0); } +static int OP_UMLAL(const u32 i) { OP_MUL_(emit_umlal(g_out, reg_pos_ptr(12, R0, VALUE), reg_pos_ptr(16, R1, VALUE),lhs,rhs), 1, 0, 1, 0); } +static int OP_SMULL(const u32 i) { OP_MUL_(emit_smull(g_out, R0, R1,lhs,rhs), 1, 1, 0, 0); } +static int OP_SMLAL(const u32 i) { OP_MUL_(emit_smlal(g_out, reg_pos_ptr(12, R0, VALUE), reg_pos_ptr(16, R1, VALUE),lhs,rhs), 1, 1, 1, 0); } + +static int OP_MUL_S(const u32 i) { OP_MUL_(emit_muls(g_out, lhs,rhs), 0, 1, 0, 1); } +static int OP_MLA_S(const u32 i) { OP_MUL_(emit_mlas(g_out, hi,reg_pos_ptr(12, R1, VALUE),lhs,rhs), 0, 1, 1, 1); } +static int OP_UMULL_S(const u32 i) { OP_MUL_(emit_umulls(g_out, R0, R1, lhs,rhs), 1, 0, 0, 1); } +static int OP_UMLAL_S(const u32 i) { OP_MUL_(emit_umlals(g_out, reg_pos_ptr(12, R0, VALUE), reg_pos_ptr(16, R1, VALUE),lhs,rhs), 1, 0, 1, 1); } +static int OP_SMULL_S(const u32 i) { OP_MUL_(emit_smulls(g_out, R0, R1,lhs,rhs), 1, 1, 0, 1); } +static int OP_SMLAL_S(const u32 i) { OP_MUL_(emit_smlals(g_out, reg_pos_ptr(12, R0, VALUE), reg_pos_ptr(16, R1, VALUE),lhs,rhs), 1, 1, 1, 1); } + +#undef hi + +#define OP_MULxy_(op, x, y, width, accum, flags) \ + reg_pos_ptrS##x(0, lhs, VALUE); \ + reg_pos_ptrS##y(8, rhs, VALUE); \ + op; \ + if(width && accum) \ + { \ + if(flags) \ + { \ + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(12, R3, ADDRESS), R0); \ + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(16, R3, ADDRESS), R1); \ + } \ + else \ + { \ + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(12, R3, ADDRESS), R0); \ + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(16, R3, ADDRESS), R1); \ + } \ + } \ + else if(width) \ + { \ + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(12, R3, ADDRESS), R0); \ + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(16, R3, ADDRESS), R1); \ + } \ + else \ + { \ + if (accum) { emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(16, R3, ADDRESS), hi); } \ + else { emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(16, R3, ADDRESS), lhs); } \ + } \ + if (flags) { SET_Q; } \ + return 1;\ + +#define hi R2 + + +//----------------------------------------------------------------------------- +// SMUL +//----------------------------------------------------------------------------- +static int OP_SMUL_B_B(const u32 i) { OP_MULxy_(emit_smulbb(g_out, lhs, rhs), L, L, 0, 0, 0); } +static int OP_SMUL_B_T(const u32 i) { OP_MULxy_(emit_smulbt(g_out, lhs, rhs), L, H, 0, 0, 0); } +static int OP_SMUL_T_B(const u32 i) { OP_MULxy_(emit_smultb(g_out, lhs, rhs), H, L, 0, 0, 0); } +static int OP_SMUL_T_T(const u32 i) { OP_MULxy_(emit_smultt(g_out, lhs, rhs), H, H, 0, 0, 0); } + +//----------------------------------------------------------------------------- +// SMLA +//----------------------------------------------------------------------------- +static int OP_SMLA_B_B(const u32 i) { + OP_MULxy_(emit_smlabb(g_out, hi,reg_pos_ptr(12, R0, VALUE),lhs,rhs), L, L, 0, 1, 1); +} +static int OP_SMLA_B_T(const u32 i) { OP_MULxy_(emit_smlabt(g_out, hi,reg_pos_ptr(12, R0, VALUE),lhs,rhs), L, H, 0, 1, 1); } +static int OP_SMLA_T_B(const u32 i) { OP_MULxy_(emit_smlatb(g_out, hi,reg_pos_ptr(12, R0, VALUE),lhs,rhs), H, L, 0, 1, 1); } +static int OP_SMLA_T_T(const u32 i) { OP_MULxy_(emit_smlatt(g_out, hi,reg_pos_ptr(12, R0, VALUE),lhs,rhs), H, H, 0, 1, 1); } + +//----------------------------------------------------------------------------- +// SMLAL +//----------------------------------------------------------------------------- +static int OP_SMLAL_B_B(const u32 i) { OP_MULxy_(emit_smlalbb(g_out, reg_pos_ptr(12, R0, VALUE), reg_pos_ptr(16, R1, VALUE),lhs,rhs), L, L, 1, 1, 1); } +static int OP_SMLAL_B_T(const u32 i) { OP_MULxy_(emit_smlalbt(g_out, reg_pos_ptr(12, R0, VALUE), reg_pos_ptr(16, R1, VALUE),lhs,rhs), L, H, 1, 1, 1); } +static int OP_SMLAL_T_B(const u32 i) { OP_MULxy_(emit_smlaltb(g_out, reg_pos_ptr(12, R0, VALUE), reg_pos_ptr(16, R1, VALUE),lhs,rhs), H, L, 1, 1, 1); } +static int OP_SMLAL_T_T(const u32 i) { OP_MULxy_(emit_smlaltt(g_out, reg_pos_ptr(12, R0, VALUE), reg_pos_ptr(16, R1, VALUE),lhs,rhs), H, H, 1, 1, 1); } + +#undef hi + +//----------------------------------------------------------------------------- +// SMULW / SMLAW +//----------------------------------------------------------------------------- + +#define OP_SMxxW_(op, x, accum, flags) \ + reg_pos_ptrS##x(8, lhs, VALUE);\ + reg_pos_ptr(0, rhs, VALUE); \ + op; \ + if (accum) { emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(16, R3, ADDRESS), hi); } \ + else { emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(16, R3, ADDRESS), lhs); } \ + if (flags) { SET_Q; } \ + return 1; + +#define hi R2 + +static int OP_SMULW_B(const u32 i) { OP_SMxxW_(emit_smulwb(g_out, lhs, rhs), L, 0, 0); } +static int OP_SMULW_T(const u32 i) { OP_SMxxW_(emit_smulwt(g_out, lhs, rhs), H, 0, 0); } +static int OP_SMLAW_B(const u32 i) { OP_SMxxW_(emit_smlawb(g_out, hi,reg_pos_ptr(12, R0, VALUE),lhs,rhs), L, 1, 1); } +static int OP_SMLAW_T(const u32 i) { OP_SMxxW_(emit_smlawt(g_out, hi,reg_pos_ptr(12, R0, VALUE),lhs,rhs), H, 1, 1); } + + +#undef hi + +//----------------------------------------------------------------------------- +// MRS / MSR +//----------------------------------------------------------------------------- +static int OP_MRS_CPSR(const u32 i) +{ + cpu_ptr(CPSR, x_reg, VALUE); + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(12, R1, ADDRESS), x_reg); + + return 1; +} + +static int OP_MRS_SPSR(const u32 i) +{ + cpu_ptr(SPSR, x_reg, VALUE); + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(12, R1, ADDRESS), x_reg); + + return 1; +} + +#define OP_MSR_(reg, args, sw) \ + args; \ + emit_mov(g_out, rhs, operand); \ + switch (((i>>16) & 0xF)) \ + { \ + case 0x1: \ + { \ + unsigned int __skip=genlabel(); \ + cpu_ptr(CPSR, mode, VALUE); \ + emit_andimm(g_out, mode, 0x1F, mode); \ + emit_cmpimm(g_out, mode, USR); \ + emit_branch_label(g_out, __skip, 0x0); \ + if (sw) \ + { \ + emit_mov(g_out, rhs_reg, mode); \ + emit_andimm(g_out, mode, 0x1F, mode); \ + emit_ptr(g_out, (uintptr_t)&ARMPROC, R0); \ + emit_mov(g_out, mode, R1);\ + emit_ptr(g_out, (uintptr_t)armcpu_switchMode, R3); \ + callR3(g_out); \ + } \ + emit_writeBYTE_ptr32_regptrTO_regFROM(g_out, cpu_ptr_byte(reg, 0, R1, ADDRESS), operand); \ + changeCPSR; \ + emit_label(g_out, (int)__skip); \ + } \ + return 1; \ + case 0x2: \ + { \ + unsigned int __skip=genlabel(); \ + cpu_ptr(CPSR, mode, VALUE); \ + emit_andimm(g_out, mode, 0x1F, mode); \ + emit_cmpimm(g_out, mode, USR); \ + emit_branch_label(g_out, __skip, 0x0); \ + emit_lsr(g_out, operand, 8); \ + emit_writeBYTE_ptr32_regptrTO_regFROM(g_out, cpu_ptr_byte(reg, 1, R1, ADDRESS), operand); \ + changeCPSR; \ + emit_label(g_out, (int)__skip); \ + } \ + return 1; \ + case 0x4: \ + { \ + unsigned int __skip=genlabel(); \ + cpu_ptr(CPSR, mode, VALUE); \ + emit_andimm(g_out, mode, 0x1F, mode); \ + emit_cmpimm(g_out, mode, USR); \ + emit_branch_label(g_out, __skip, 0x0); \ + emit_lsr(g_out, operand, 16); \ + emit_writeBYTE_ptr32_regptrTO_regFROM(g_out, cpu_ptr_byte(reg, 2, R1, ADDRESS), operand); \ + changeCPSR; \ + emit_label(g_out, (int)__skip); \ + } \ + return 1; \ + case 0x8: \ + { \ + emit_lsr(g_out, operand, 24); \ + emit_writeBYTE_ptr32_regptrTO_regFROM(g_out, cpu_ptr_byte(reg, 3, R1, ADDRESS), operand); \ + changeCPSR; \ + } \ + return 1; \ + default: \ + break; \ + } \ +\ + static u32 byte_mask = (BIT16(i)?0x000000FF:0x00000000) | \ + (BIT17(i)?0x0000FF00:0x00000000) | \ + (BIT18(i)?0x00FF0000:0x00000000) | \ + (BIT19(i)?0xFF000000:0x00000000); \ + static u32 byte_mask_USR = (BIT19(i)?0xFF000000:0x00000000); \ + cpu_ptr(reg.val, xPSR_mem, ADDRESS); \ + unsigned int __USR=genlabel(); \ + unsigned int __done=genlabel(); \ + cpu_ptr(CPSR.val, mode, VALUE); \ + emit_andimm(g_out, mode, 0x1F, mode); \ + emit_cmpimm(g_out, mode, USR); \ + emit_branch_label(g_out, __USR, 0x0); \ + if (sw && BIT16(i)) \ + { \ + \ + emit_mov(g_out, rhs_reg, mode); \ + emit_andimm(g_out, mode, 0x1F, mode); \ + emit_ptr(g_out, (uintptr_t)&ARMPROC, R0); \ + emit_mov(g_out, mode, R1);\ + emit_ptr(g_out, (uintptr_t)armcpu_switchMode, R3); \ + callR3(g_out); \ + } \ + cpu_ptr(reg.val, xPSR, VALUE); \ + emit_andimm(g_out, operand, byte_mask, operand); \ + emit_andimm(g_out, xPSR, ~byte_mask, xPSR); \ + emit_or(g_out, xPSR, operand, xPSR); \ + emit_write_ptr32_regptrTO_regFROM(g_out, cpu_ptr(reg.val, R1, ADDRESS), xPSR); \ + emit_branch_label(g_out, __done, 0xE); \ + emit_label(g_out, (int)__USR); \ + cpu_ptr(reg.val, xPSR, VALUE); \ + emit_andimm(g_out, operand, byte_mask_USR, operand); \ + emit_andimm(g_out, xPSR, ~byte_mask_USR, xPSR); \ + emit_or(g_out, xPSR, operand, xPSR); \ + emit_write_ptr32_regptrTO_regFROM(g_out, cpu_ptr(reg.val, R1, ADDRESS), xPSR); \ + emit_label(g_out, (int)__done); \ + changeCPSR; \ + return 1; + +#define operand R8 +#define mode R9 +#define xPSR R4 +#define xPSR_mem R3 + +static int OP_MSR_CPSR(const u32 i) { OP_MSR_(CPSR, REG_OFF, 1); } +static int OP_MSR_SPSR(const u32 i) { OP_MSR_(SPSR, REG_OFF, 0); } +static int OP_MSR_CPSR_IMM_VAL(const u32 i) { OP_MSR_(CPSR, IMM_VAL, 1); } +static int OP_MSR_SPSR_IMM_VAL(const u32 i) { OP_MSR_(SPSR, IMM_VAL, 0); } + +#undef operand +#undef mode +#undef xPSR +#undef xPSR_mem + +//----------------------------------------------------------------------------- +// LDR +//----------------------------------------------------------------------------- +typedef u32 (FASTCALL* OpLDR)(u32, u32*); + +// 98% of all memory accesses land in the same region as the first execution of +// that instruction, so keep multiple copies with different fastpaths. +// The copies don't need to differ in any way; the point is merely to cooperate +// with x86 branch prediction. + +enum { + MEMTYPE_GENERIC = 0, // no assumptions + MEMTYPE_MAIN = 1, + MEMTYPE_DTCM = 2, + MEMTYPE_ERAM = 3, + MEMTYPE_SWIRAM = 4, + MEMTYPE_OTHER = 5, // memory that is known to not be MAIN, DTCM, ERAM, or SWIRAM +}; + +static u32 classify_adr(u32 adr, bool store) +{ + if(PROCNUM==ARMCPU_ARM9 && (adr & ~0x3FFF) == MMU.DTCMRegion) + return MEMTYPE_DTCM; + else if((adr & 0x0F000000) == 0x02000000) + return MEMTYPE_MAIN; + else if(PROCNUM==ARMCPU_ARM7 && !store && (adr & 0xFF800000) == 0x03800000) + return MEMTYPE_ERAM; + else if(PROCNUM==ARMCPU_ARM7 && !store && (adr & 0xFF800000) == 0x03000000) + return MEMTYPE_SWIRAM; + else + return MEMTYPE_GENERIC; +} + +template +static u32 FASTCALL OP_LDR(u32 adr, u32 *dstreg) +{ + u32 data = READ32(cpu->mem_if->data, adr); + if(adr&3) + data = ROR(data, 8*(adr&3)); + *dstreg = data; + return MMU_aluMemAccessCycles(3,adr); +} + +template +static u32 FASTCALL OP_LDRH(u32 adr, u32 *dstreg) +{ + *dstreg = READ16(cpu->mem_if->data, adr); + return MMU_aluMemAccessCycles(3,adr); +} + +template +static u32 FASTCALL OP_LDRSH(u32 adr, u32 *dstreg) +{ + *dstreg = (s16)READ16(cpu->mem_if->data, adr); + return MMU_aluMemAccessCycles(3,adr); +} + +template +static u32 FASTCALL OP_LDRB(u32 adr, u32 *dstreg) +{ + *dstreg = READ8(cpu->mem_if->data, adr); + u32 ret = MMU_aluMemAccessCycles(3,adr); + return ret; +} + +template +static u32 FASTCALL OP_LDRSB(u32 adr, u32 *dstreg) +{ + *dstreg = (s8)READ8(cpu->mem_if->data, adr); + return MMU_aluMemAccessCycles(3,adr); +} + +#define T(op) op<0,0>, op<0,1>, op<0,2>, NULL, NULL, op<1,0>, op<1,1>, NULL, op<1,3>, op<1,4> +static const OpLDR LDR_tab[2][5] = { T(OP_LDR) }; +static const OpLDR LDRH_tab[2][5] = { T(OP_LDRH) }; +static const OpLDR LDRSH_tab[2][5] = { T(OP_LDRSH) }; +static const OpLDR LDRB_tab[2][5] = { T(OP_LDRB) }; +static const OpLDR LDRSB_tab[2][5] = { T(OP_LDRSB) }; +#undef T + +static u32 add(u32 lhst, u32 rhst) { return lhst + rhst; } +static u32 sub(u32 lhst, u32 rhst) { return lhst - rhst; } + +#define OP_LDR_(mem_op, arg, sign_op, writeback) \ + reg_pos_ptr(16, adr, VALUE); \ + reg_pos_ptr(12, dst, ADDRESS); \ + arg; \ + if(!rhs_is_imm || *(u32*)&rhs_var) \ + { \ + if(writeback == 0) \ + emit_##sign_op(g_out, adr, rhs_reg, adr); \ + else if(writeback < 0) \ + { \ + emit_##sign_op(g_out, adr, rhs_reg, adr); \ + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(16, R1, ADDRESS), adr); \ + } \ + else if(writeback > 0) \ + { \ + emit_mov(g_out, adr, tmp_reg); \ + emit_##sign_op(g_out, tmp_reg, rhs_reg, tmp_reg); \ + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(16, R1, ADDRESS), tmp_reg); \ + } \ + } \ + u32 adr_first = sign_op(cpu->R[REG_POS(i,16)], rhs_first); \ + emit_mov64(g_out,adr, R0); \ + emit_mov64(g_out, dst, R1); \ + emit_ptr(g_out, (uintptr_t)mem_op##_tab[PROCNUM][classify_adr(adr_first,0)], R3); \ + callR3(g_out); \ + emit_write_ptr32_reg(g_out, (uintptr_t)&bb_cycles, R0); \ + if(REG_POS(i,12)==15) \ + { \ + reg_ptr(15, tmp_reg, VALUE); \ + if (PROCNUM == 0) \ + { \ + emit_mov(g_out, tmp_reg, tmp2_reg); \ + emit_andimm(g_out, tmp2_reg, 1, tmp2_reg); \ + emit_lsl(g_out, tmp2_reg, 5); \ + emit_or_ptr32_regptr_reg(g_out, cpu_ptr(CPSR, R1, ADDRESS), tmp2_reg); \ + emit_andimm(g_out, tmp_reg, 0xFFFFFFFE, tmp_reg); \ + } \ + else \ + { \ + emit_andimm(g_out, tmp_reg, 0xFFFFFFFC, tmp_reg); \ + } \ + emit_write_ptr32_regptrTO_regFROM(g_out, cpu_ptr(next_instruction, R1, ADDRESS), tmp_reg); \ + } \ + return 1; + +#define dst R8 +#define adr R9 + +// LDR +static int OP_LDR_P_IMM_OFF(const u32 i) { OP_LDR_(LDR, IMM_OFF_12, add, 0); } +static int OP_LDR_M_IMM_OFF(const u32 i) { OP_LDR_(LDR, IMM_OFF_12, sub, 0); } +static int OP_LDR_P_LSL_IMM_OFF(const u32 i) { OP_LDR_(LDR, LSL_IMM, add, 0); } +static int OP_LDR_M_LSL_IMM_OFF(const u32 i) { OP_LDR_(LDR, LSL_IMM, sub, 0); } +static int OP_LDR_P_LSR_IMM_OFF(const u32 i) { OP_LDR_(LDR, LSR_IMM, add, 0); } +static int OP_LDR_M_LSR_IMM_OFF(const u32 i) { OP_LDR_(LDR, LSR_IMM, sub, 0); } +static int OP_LDR_P_ASR_IMM_OFF(const u32 i) { OP_LDR_(LDR, ASR_IMM, add, 0); } +static int OP_LDR_M_ASR_IMM_OFF(const u32 i) { OP_LDR_(LDR, ASR_IMM, sub, 0); } +static int OP_LDR_P_ROR_IMM_OFF(const u32 i) { OP_LDR_(LDR, ROR_IMM, add, 0); } +static int OP_LDR_M_ROR_IMM_OFF(const u32 i) { OP_LDR_(LDR, ROR_IMM, sub, 0); } + +static int OP_LDR_P_IMM_OFF_PREIND(const u32 i) { OP_LDR_(LDR, IMM_OFF_12, add, -1); } +static int OP_LDR_M_IMM_OFF_PREIND(const u32 i) { OP_LDR_(LDR, IMM_OFF_12, sub, -1); } +static int OP_LDR_P_LSL_IMM_OFF_PREIND(const u32 i) { OP_LDR_(LDR, LSL_IMM, add, -1); } +static int OP_LDR_M_LSL_IMM_OFF_PREIND(const u32 i) { OP_LDR_(LDR, LSL_IMM, sub, -1); } +static int OP_LDR_P_LSR_IMM_OFF_PREIND(const u32 i) { OP_LDR_(LDR, LSR_IMM, add, -1); } +static int OP_LDR_M_LSR_IMM_OFF_PREIND(const u32 i) { OP_LDR_(LDR, LSR_IMM, sub, -1); } +static int OP_LDR_P_ASR_IMM_OFF_PREIND(const u32 i) { OP_LDR_(LDR, ASR_IMM, add, -1); } +static int OP_LDR_M_ASR_IMM_OFF_PREIND(const u32 i) { OP_LDR_(LDR, ASR_IMM, sub, -1); } +static int OP_LDR_P_ROR_IMM_OFF_PREIND(const u32 i) { OP_LDR_(LDR, ROR_IMM, add, -1); } +static int OP_LDR_M_ROR_IMM_OFF_PREIND(const u32 i) { OP_LDR_(LDR, ROR_IMM, sub, -1); } +static int OP_LDR_P_IMM_OFF_POSTIND(const u32 i) { OP_LDR_(LDR, IMM_OFF_12, add, 1); } +static int OP_LDR_M_IMM_OFF_POSTIND(const u32 i) { OP_LDR_(LDR, IMM_OFF_12, sub, 1); } +static int OP_LDR_P_LSL_IMM_OFF_POSTIND(const u32 i) { OP_LDR_(LDR, LSL_IMM, add, 1); } +static int OP_LDR_M_LSL_IMM_OFF_POSTIND(const u32 i) { OP_LDR_(LDR, LSL_IMM, sub, 1); } +static int OP_LDR_P_LSR_IMM_OFF_POSTIND(const u32 i) { OP_LDR_(LDR, LSR_IMM, add, 1); } +static int OP_LDR_M_LSR_IMM_OFF_POSTIND(const u32 i) { OP_LDR_(LDR, LSR_IMM, sub, 1); } +static int OP_LDR_P_ASR_IMM_OFF_POSTIND(const u32 i) { OP_LDR_(LDR, ASR_IMM, add, 1); } +static int OP_LDR_M_ASR_IMM_OFF_POSTIND(const u32 i) { OP_LDR_(LDR, ASR_IMM, sub, 1); } +static int OP_LDR_P_ROR_IMM_OFF_POSTIND(const u32 i) { OP_LDR_(LDR, ROR_IMM, add, 1); } +static int OP_LDR_M_ROR_IMM_OFF_POSTIND(const u32 i) { OP_LDR_(LDR, ROR_IMM, sub, 1); } + +// LDRH +static int OP_LDRH_P_IMM_OFF(const u32 i) { OP_LDR_(LDRH, IMM_OFF, add, 0); } +static int OP_LDRH_M_IMM_OFF(const u32 i) { OP_LDR_(LDRH, IMM_OFF, sub, 0); } +static int OP_LDRH_P_REG_OFF(const u32 i) { OP_LDR_(LDRH, REG_OFF, add, 0); } +static int OP_LDRH_M_REG_OFF(const u32 i) { OP_LDR_(LDRH, REG_OFF, sub, 0); } + +static int OP_LDRH_PRE_INDE_P_IMM_OFF(const u32 i) { OP_LDR_(LDRH, IMM_OFF, add, -1); } +static int OP_LDRH_PRE_INDE_M_IMM_OFF(const u32 i) { OP_LDR_(LDRH, IMM_OFF, sub, -1); } +static int OP_LDRH_PRE_INDE_P_REG_OFF(const u32 i) { OP_LDR_(LDRH, REG_OFF, add, -1); } +static int OP_LDRH_PRE_INDE_M_REG_OFF(const u32 i) { OP_LDR_(LDRH, REG_OFF, sub, -1); } +static int OP_LDRH_POS_INDE_P_IMM_OFF(const u32 i) { OP_LDR_(LDRH, IMM_OFF, add, 1); } +static int OP_LDRH_POS_INDE_M_IMM_OFF(const u32 i) { OP_LDR_(LDRH, IMM_OFF, sub, 1); } +static int OP_LDRH_POS_INDE_P_REG_OFF(const u32 i) { OP_LDR_(LDRH, REG_OFF, add, 1); } +static int OP_LDRH_POS_INDE_M_REG_OFF(const u32 i) { OP_LDR_(LDRH, REG_OFF, sub, 1); } + +// LDRSH +static int OP_LDRSH_P_IMM_OFF(const u32 i) { OP_LDR_(LDRSH, IMM_OFF, add, 0); } +static int OP_LDRSH_M_IMM_OFF(const u32 i) { OP_LDR_(LDRSH, IMM_OFF, sub, 0); } +static int OP_LDRSH_P_REG_OFF(const u32 i) { OP_LDR_(LDRSH, REG_OFF, add, 0); } +static int OP_LDRSH_M_REG_OFF(const u32 i) { OP_LDR_(LDRSH, REG_OFF, sub, 0); } + +static int OP_LDRSH_PRE_INDE_P_IMM_OFF(const u32 i) { OP_LDR_(LDRSH, IMM_OFF, add, -1); } +static int OP_LDRSH_PRE_INDE_M_IMM_OFF(const u32 i) { OP_LDR_(LDRSH, IMM_OFF, sub, -1); } +static int OP_LDRSH_PRE_INDE_P_REG_OFF(const u32 i) { OP_LDR_(LDRSH, REG_OFF, add, -1); } +static int OP_LDRSH_PRE_INDE_M_REG_OFF(const u32 i) { OP_LDR_(LDRSH, REG_OFF, sub, -1); } +static int OP_LDRSH_POS_INDE_P_IMM_OFF(const u32 i) { OP_LDR_(LDRSH, IMM_OFF, add, 1); } +static int OP_LDRSH_POS_INDE_M_IMM_OFF(const u32 i) { OP_LDR_(LDRSH, IMM_OFF, sub, 1); } +static int OP_LDRSH_POS_INDE_P_REG_OFF(const u32 i) { OP_LDR_(LDRSH, REG_OFF, add, 1); } +static int OP_LDRSH_POS_INDE_M_REG_OFF(const u32 i) { OP_LDR_(LDRSH, REG_OFF, sub, 1); } + +// LDRB +static int OP_LDRB_P_IMM_OFF(const u32 i) { OP_LDR_(LDRB, IMM_OFF_12, add, 0); } +static int OP_LDRB_M_IMM_OFF(const u32 i) { OP_LDR_(LDRB, IMM_OFF_12, sub, 0); } +static int OP_LDRB_P_LSL_IMM_OFF(const u32 i) { OP_LDR_(LDRB, LSL_IMM, add, 0); } +static int OP_LDRB_M_LSL_IMM_OFF(const u32 i) { OP_LDR_(LDRB, LSL_IMM, sub, 0); } +static int OP_LDRB_P_LSR_IMM_OFF(const u32 i) { OP_LDR_(LDRB, LSR_IMM, add, 0); } +static int OP_LDRB_M_LSR_IMM_OFF(const u32 i) { OP_LDR_(LDRB, LSR_IMM, sub, 0); } +static int OP_LDRB_P_ASR_IMM_OFF(const u32 i) { OP_LDR_(LDRB, ASR_IMM, add, 0); } +static int OP_LDRB_M_ASR_IMM_OFF(const u32 i) { OP_LDR_(LDRB, ASR_IMM, sub, 0); } +static int OP_LDRB_P_ROR_IMM_OFF(const u32 i) { OP_LDR_(LDRB, ROR_IMM, add, 0); } +static int OP_LDRB_M_ROR_IMM_OFF(const u32 i) { OP_LDR_(LDRB, ROR_IMM, sub, 0); } + +static int OP_LDRB_P_IMM_OFF_PREIND(const u32 i) { OP_LDR_(LDRB, IMM_OFF_12, add, -1); } +static int OP_LDRB_M_IMM_OFF_PREIND(const u32 i) { OP_LDR_(LDRB, IMM_OFF_12, sub, -1); } +static int OP_LDRB_P_LSL_IMM_OFF_PREIND(const u32 i) { OP_LDR_(LDRB, LSL_IMM, add, -1); } +static int OP_LDRB_M_LSL_IMM_OFF_PREIND(const u32 i) { OP_LDR_(LDRB, LSL_IMM, sub, -1); } +static int OP_LDRB_P_LSR_IMM_OFF_PREIND(const u32 i) { OP_LDR_(LDRB, LSR_IMM, add, -1); } +static int OP_LDRB_M_LSR_IMM_OFF_PREIND(const u32 i) { OP_LDR_(LDRB, LSR_IMM, sub, -1); } +static int OP_LDRB_P_ASR_IMM_OFF_PREIND(const u32 i) { OP_LDR_(LDRB, ASR_IMM, add, -1); } +static int OP_LDRB_M_ASR_IMM_OFF_PREIND(const u32 i) { OP_LDR_(LDRB, ASR_IMM, sub, -1); } +static int OP_LDRB_P_ROR_IMM_OFF_PREIND(const u32 i) { OP_LDR_(LDRB, ROR_IMM, add, -1); } +static int OP_LDRB_M_ROR_IMM_OFF_PREIND(const u32 i) { OP_LDR_(LDRB, ROR_IMM, sub, -1); } +static int OP_LDRB_P_IMM_OFF_POSTIND(const u32 i) { OP_LDR_(LDRB, IMM_OFF_12, add, 1); } +static int OP_LDRB_M_IMM_OFF_POSTIND(const u32 i) { OP_LDR_(LDRB, IMM_OFF_12, sub, 1); } +static int OP_LDRB_P_LSL_IMM_OFF_POSTIND(const u32 i) { OP_LDR_(LDRB, LSL_IMM, add, 1); } +static int OP_LDRB_M_LSL_IMM_OFF_POSTIND(const u32 i) { OP_LDR_(LDRB, LSL_IMM, sub, 1); } +static int OP_LDRB_P_LSR_IMM_OFF_POSTIND(const u32 i) { OP_LDR_(LDRB, LSR_IMM, add, 1); } +static int OP_LDRB_M_LSR_IMM_OFF_POSTIND(const u32 i) { OP_LDR_(LDRB, LSR_IMM, sub, 1); } +static int OP_LDRB_P_ASR_IMM_OFF_POSTIND(const u32 i) { OP_LDR_(LDRB, ASR_IMM, add, 1); } +static int OP_LDRB_M_ASR_IMM_OFF_POSTIND(const u32 i) { OP_LDR_(LDRB, ASR_IMM, sub, 1); } +static int OP_LDRB_P_ROR_IMM_OFF_POSTIND(const u32 i) { OP_LDR_(LDRB, ROR_IMM, add, 1); } +static int OP_LDRB_M_ROR_IMM_OFF_POSTIND(const u32 i) { OP_LDR_(LDRB, ROR_IMM, sub, 1); } + +// LDRSB +static int OP_LDRSB_P_IMM_OFF(const u32 i) { OP_LDR_(LDRSB, IMM_OFF, add, 0); } +static int OP_LDRSB_M_IMM_OFF(const u32 i) { OP_LDR_(LDRSB, IMM_OFF, sub, 0); } +static int OP_LDRSB_P_REG_OFF(const u32 i) { OP_LDR_(LDRSB, REG_OFF, add, 0); } +static int OP_LDRSB_M_REG_OFF(const u32 i) { OP_LDR_(LDRSB, REG_OFF, sub, 0); } + +static int OP_LDRSB_PRE_INDE_P_IMM_OFF(const u32 i) { OP_LDR_(LDRSB, IMM_OFF, add, -1); } +static int OP_LDRSB_PRE_INDE_M_IMM_OFF(const u32 i) { OP_LDR_(LDRSB, IMM_OFF, sub, -1); } +static int OP_LDRSB_PRE_INDE_P_REG_OFF(const u32 i) { OP_LDR_(LDRSB, REG_OFF, add, -1); } +static int OP_LDRSB_PRE_INDE_M_REG_OFF(const u32 i) { OP_LDR_(LDRSB, REG_OFF, sub, -1); } +static int OP_LDRSB_POS_INDE_P_IMM_OFF(const u32 i) { OP_LDR_(LDRSB, IMM_OFF, add, 1); } +static int OP_LDRSB_POS_INDE_M_IMM_OFF(const u32 i) { OP_LDR_(LDRSB, IMM_OFF, sub, 1); } +static int OP_LDRSB_POS_INDE_P_REG_OFF(const u32 i) { OP_LDR_(LDRSB, REG_OFF, add, 1); } +static int OP_LDRSB_POS_INDE_M_REG_OFF(const u32 i) { OP_LDR_(LDRSB, REG_OFF, sub, 1); } + +#undef dst +#undef adr + +//----------------------------------------------------------------------------- +// STR +//----------------------------------------------------------------------------- +template +static u32 FASTCALL OP_STR(u32 adr, u32 data) +{ + WRITE32(cpu->mem_if->data, adr, data); + return MMU_aluMemAccessCycles(2,adr); +} + +template +static u32 FASTCALL OP_STRH(u32 adr, u32 data) +{ + WRITE16(cpu->mem_if->data, adr, data); + return MMU_aluMemAccessCycles(2,adr); +} + +template +static u32 FASTCALL OP_STRB(u32 adr, u32 data) +{ + WRITE8(cpu->mem_if->data, adr, data); + return MMU_aluMemAccessCycles(2,adr); +} + +typedef u32 (FASTCALL* OpSTR)(u32, u32); +#define T(op) op<0,0>, op<0,1>, op<0,2>, op<1,0>, op<1,1>, NULL +static const OpSTR STR_tab[2][3] = { T(OP_STR) }; +static const OpSTR STRH_tab[2][3] = { T(OP_STRH) }; +static const OpSTR STRB_tab[2][3] = { T(OP_STRB) }; +#undef T + +#define OP_STR_(mem_op, arg, sign_op, writeback) \ + reg_pos_ptr(12, data, VALUE); \ + reg_pos_ptr(16, R8, VALUE); \ + arg; \ + emit_mov(g_out, R8, adr); \ + if(!rhs_is_imm || *(u32*)&rhs_var) \ + { \ + if(writeback == 0) \ + emit_##sign_op(g_out, adr, rhs_reg, adr); \ + else if(writeback < 0) \ + { \ + emit_##sign_op(g_out, adr, rhs_reg, adr); \ + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(16, R3, ADDRESS), adr); \ + } \ + else if(writeback > 0) \ + { \ + emit_mov(g_out, adr, tmp_reg); \ + emit_##sign_op(g_out, tmp_reg, rhs_reg, tmp_reg); \ + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(16, R3, ADDRESS), tmp_reg); \ + } \ + } \ + u32 adr_first = sign_op(cpu->R[REG_POS(i,16)], rhs_first); \ + emit_ptr(g_out, (uintptr_t)mem_op##_tab[PROCNUM][classify_adr(adr_first,1)], R3); \ + callR3(g_out); \ + emit_write_ptr32_reg(g_out, (uintptr_t)&bb_cycles, R0); \ + return 1; + +#define data R1 +#define adr R0 + +static int OP_STR_P_IMM_OFF(const u32 i) { OP_STR_(STR, IMM_OFF_12, add, 0); } +static int OP_STR_M_IMM_OFF(const u32 i) { OP_STR_(STR, IMM_OFF_12, sub, 0); } +static int OP_STR_P_LSL_IMM_OFF(const u32 i) { OP_STR_(STR, LSL_IMM, add, 0); } +static int OP_STR_M_LSL_IMM_OFF(const u32 i) { OP_STR_(STR, LSL_IMM, sub, 0); } +static int OP_STR_P_LSR_IMM_OFF(const u32 i) { OP_STR_(STR, LSR_IMM, add, 0); } +static int OP_STR_M_LSR_IMM_OFF(const u32 i) { OP_STR_(STR, LSR_IMM, sub, 0); } +static int OP_STR_P_ASR_IMM_OFF(const u32 i) { OP_STR_(STR, ASR_IMM, add, 0); } +static int OP_STR_M_ASR_IMM_OFF(const u32 i) { OP_STR_(STR, ASR_IMM, sub, 0); } +static int OP_STR_P_ROR_IMM_OFF(const u32 i) { OP_STR_(STR, ROR_IMM, add, 0); } +static int OP_STR_M_ROR_IMM_OFF(const u32 i) { OP_STR_(STR, ROR_IMM, sub, 0); } + +static int OP_STR_P_IMM_OFF_PREIND(const u32 i) { OP_STR_(STR, IMM_OFF_12, add, -1); } +static int OP_STR_M_IMM_OFF_PREIND(const u32 i) { OP_STR_(STR, IMM_OFF_12, sub, -1); } +static int OP_STR_P_LSL_IMM_OFF_PREIND(const u32 i) { OP_STR_(STR, LSL_IMM, add, -1); } +static int OP_STR_M_LSL_IMM_OFF_PREIND(const u32 i) { OP_STR_(STR, LSL_IMM, sub, -1); } +static int OP_STR_P_LSR_IMM_OFF_PREIND(const u32 i) { OP_STR_(STR, LSR_IMM, add, -1); } +static int OP_STR_M_LSR_IMM_OFF_PREIND(const u32 i) { OP_STR_(STR, LSR_IMM, sub, -1); } +static int OP_STR_P_ASR_IMM_OFF_PREIND(const u32 i) { OP_STR_(STR, ASR_IMM, add, -1); } +static int OP_STR_M_ASR_IMM_OFF_PREIND(const u32 i) { OP_STR_(STR, ASR_IMM, sub, -1); } +static int OP_STR_P_ROR_IMM_OFF_PREIND(const u32 i) { OP_STR_(STR, ROR_IMM, add, -1); } +static int OP_STR_M_ROR_IMM_OFF_PREIND(const u32 i) { OP_STR_(STR, ROR_IMM, sub, -1); } +static int OP_STR_P_IMM_OFF_POSTIND(const u32 i) { OP_STR_(STR, IMM_OFF_12, add, 1); } +static int OP_STR_M_IMM_OFF_POSTIND(const u32 i) { OP_STR_(STR, IMM_OFF_12, sub, 1); } +static int OP_STR_P_LSL_IMM_OFF_POSTIND(const u32 i) { OP_STR_(STR, LSL_IMM, add, 1); } +static int OP_STR_M_LSL_IMM_OFF_POSTIND(const u32 i) { OP_STR_(STR, LSL_IMM, sub, 1); } +static int OP_STR_P_LSR_IMM_OFF_POSTIND(const u32 i) { OP_STR_(STR, LSR_IMM, add, 1); } +static int OP_STR_M_LSR_IMM_OFF_POSTIND(const u32 i) { OP_STR_(STR, LSR_IMM, sub, 1); } +static int OP_STR_P_ASR_IMM_OFF_POSTIND(const u32 i) { OP_STR_(STR, ASR_IMM, add, 1); } +static int OP_STR_M_ASR_IMM_OFF_POSTIND(const u32 i) { OP_STR_(STR, ASR_IMM, sub, 1); } +static int OP_STR_P_ROR_IMM_OFF_POSTIND(const u32 i) { OP_STR_(STR, ROR_IMM, add, 1); } +static int OP_STR_M_ROR_IMM_OFF_POSTIND(const u32 i) { OP_STR_(STR, ROR_IMM, sub, 1); } + +static int OP_STRH_P_IMM_OFF(const u32 i) { OP_STR_(STRH, IMM_OFF, add, 0); } +static int OP_STRH_M_IMM_OFF(const u32 i) { OP_STR_(STRH, IMM_OFF, sub, 0); } +static int OP_STRH_P_REG_OFF(const u32 i) { OP_STR_(STRH, REG_OFF, add, 0); } +static int OP_STRH_M_REG_OFF(const u32 i) { OP_STR_(STRH, REG_OFF, sub, 0); } + +static int OP_STRH_PRE_INDE_P_IMM_OFF(const u32 i) { OP_STR_(STRH, IMM_OFF, add, -1); } +static int OP_STRH_PRE_INDE_M_IMM_OFF(const u32 i) { OP_STR_(STRH, IMM_OFF, sub, -1); } +static int OP_STRH_PRE_INDE_P_REG_OFF(const u32 i) { OP_STR_(STRH, REG_OFF, add, -1); } +static int OP_STRH_PRE_INDE_M_REG_OFF(const u32 i) { OP_STR_(STRH, REG_OFF, sub, -1); } +static int OP_STRH_POS_INDE_P_IMM_OFF(const u32 i) { OP_STR_(STRH, IMM_OFF, add, 1); } +static int OP_STRH_POS_INDE_M_IMM_OFF(const u32 i) { OP_STR_(STRH, IMM_OFF, sub, 1); } +static int OP_STRH_POS_INDE_P_REG_OFF(const u32 i) { OP_STR_(STRH, REG_OFF, add, 1); } +static int OP_STRH_POS_INDE_M_REG_OFF(const u32 i) { OP_STR_(STRH, REG_OFF, sub, 1); } + +static int OP_STRB_P_IMM_OFF(const u32 i) { OP_STR_(STRB, IMM_OFF_12, add, 0); } +static int OP_STRB_M_IMM_OFF(const u32 i) { OP_STR_(STRB, IMM_OFF_12, sub, 0); } +static int OP_STRB_P_LSL_IMM_OFF(const u32 i) { OP_STR_(STRB, LSL_IMM, add, 0); } +static int OP_STRB_M_LSL_IMM_OFF(const u32 i) { OP_STR_(STRB, LSL_IMM, sub, 0); } +static int OP_STRB_P_LSR_IMM_OFF(const u32 i) { OP_STR_(STRB, LSR_IMM, add, 0); } +static int OP_STRB_M_LSR_IMM_OFF(const u32 i) { OP_STR_(STRB, LSR_IMM, sub, 0); } +static int OP_STRB_P_ASR_IMM_OFF(const u32 i) { OP_STR_(STRB, ASR_IMM, add, 0); } +static int OP_STRB_M_ASR_IMM_OFF(const u32 i) { OP_STR_(STRB, ASR_IMM, sub, 0); } +static int OP_STRB_P_ROR_IMM_OFF(const u32 i) { OP_STR_(STRB, ROR_IMM, add, 0); } +static int OP_STRB_M_ROR_IMM_OFF(const u32 i) { OP_STR_(STRB, ROR_IMM, sub, 0); } + +static int OP_STRB_P_IMM_OFF_PREIND(const u32 i) { OP_STR_(STRB, IMM_OFF_12, add, -1); } +static int OP_STRB_M_IMM_OFF_PREIND(const u32 i) { OP_STR_(STRB, IMM_OFF_12, sub, -1); } +static int OP_STRB_P_LSL_IMM_OFF_PREIND(const u32 i) { OP_STR_(STRB, LSL_IMM, add, -1); } +static int OP_STRB_M_LSL_IMM_OFF_PREIND(const u32 i) { OP_STR_(STRB, LSL_IMM, sub, -1); } +static int OP_STRB_P_LSR_IMM_OFF_PREIND(const u32 i) { OP_STR_(STRB, LSR_IMM, add, -1); } +static int OP_STRB_M_LSR_IMM_OFF_PREIND(const u32 i) { OP_STR_(STRB, LSR_IMM, sub, -1); } +static int OP_STRB_P_ASR_IMM_OFF_PREIND(const u32 i) { OP_STR_(STRB, ASR_IMM, add, -1); } +static int OP_STRB_M_ASR_IMM_OFF_PREIND(const u32 i) { OP_STR_(STRB, ASR_IMM, sub, -1); } +static int OP_STRB_P_ROR_IMM_OFF_PREIND(const u32 i) { OP_STR_(STRB, ROR_IMM, add, -1); } +static int OP_STRB_M_ROR_IMM_OFF_PREIND(const u32 i) { OP_STR_(STRB, ROR_IMM, sub, -1); } +static int OP_STRB_P_IMM_OFF_POSTIND(const u32 i) { OP_STR_(STRB, IMM_OFF_12, add, 1); } +static int OP_STRB_M_IMM_OFF_POSTIND(const u32 i) { OP_STR_(STRB, IMM_OFF_12, sub, 1); } +static int OP_STRB_P_LSL_IMM_OFF_POSTIND(const u32 i) { OP_STR_(STRB, LSL_IMM, add, 1); } +static int OP_STRB_M_LSL_IMM_OFF_POSTIND(const u32 i) { OP_STR_(STRB, LSL_IMM, sub, 1); } +static int OP_STRB_P_LSR_IMM_OFF_POSTIND(const u32 i) { OP_STR_(STRB, LSR_IMM, add, 1); } +static int OP_STRB_M_LSR_IMM_OFF_POSTIND(const u32 i) { OP_STR_(STRB, LSR_IMM, sub, 1); } +static int OP_STRB_P_ASR_IMM_OFF_POSTIND(const u32 i) { OP_STR_(STRB, ASR_IMM, add, 1); } +static int OP_STRB_M_ASR_IMM_OFF_POSTIND(const u32 i) { OP_STR_(STRB, ASR_IMM, sub, 1); } +static int OP_STRB_P_ROR_IMM_OFF_POSTIND(const u32 i) { OP_STR_(STRB, ROR_IMM, add, 1); } +static int OP_STRB_M_ROR_IMM_OFF_POSTIND(const u32 i) { OP_STR_(STRB, ROR_IMM, sub, 1); } + +#undef data +#undef adr + +//----------------------------------------------------------------------------- +// LDRD / STRD +//----------------------------------------------------------------------------- +typedef u32 FASTCALL (*LDRD_STRD_REG)(u32); +template +static u32 FASTCALL OP_LDRD_REG(u32 adr) +{ + cpu->R[Rnum] = READ32(cpu->mem_if->data, adr); + + // For even-numbered registers, we'll do a double-word load. Otherwise, we'll just do a single-word load. + if ((Rnum & 0x01) == 0) + { + cpu->R[(Rnum & 0xE) + 1] = READ32(cpu->mem_if->data, adr+4); + return (MMU_memAccessCycles(adr) + MMU_memAccessCycles(adr+4)); + } + + return MMU_memAccessCycles(adr); +} +template +static u32 FASTCALL OP_STRD_REG(u32 adr) +{ + WRITE32(cpu->mem_if->data, adr, cpu->R[Rnum]); + + // For even-numbered registers, we'll do a double-word store. Otherwise, we'll just do a single-word store. + if ((Rnum & 0x01) == 0) + { + WRITE32(cpu->mem_if->data, adr+4, cpu->R[(Rnum & 0xE) + 1]); + return (MMU_memAccessCycles(adr) + MMU_memAccessCycles(adr+4)); + } + + return MMU_memAccessCycles(adr); +} +#define T(op, proc) op, op, op, op, op, op, op, op, op, op, op, op, op, op, op, op +static const LDRD_STRD_REG op_ldrd_tab[2][16] = { {T(OP_LDRD_REG, 0)}, {T(OP_LDRD_REG, 1)} }; +static const LDRD_STRD_REG op_strd_tab[2][16] = { {T(OP_STRD_REG, 0)}, {T(OP_STRD_REG, 1)} }; +#undef T + +static int OP_LDRD_STRD_POST_INDEX(const u32 i) +{ + #define addr R2 + #define Rd R4 + #define Rs R5 + + u8 Rd_num = REG_POS(i, 12); + + if (Rd_num == 14) + { + printf("OP_LDRD_STRD_POST_INDEX: use R14!!!!\n"); + return 0; // TODO: exception + } + if (Rd_num & 0x1) + { + printf("OP_LDRD_STRD_POST_INDEX: ERROR!!!!\n"); + return 0; // TODO: exception + } + + reg_pos_ptr(16, Rd, VALUE); + emit_mov(g_out, Rd, addr); + + // I bit - immediate or register + if (BIT22(i)) + { + IMM_OFF; + BIT23(i)?emit_add_ptr32_regptrTO_regFROM(g_out, CACHED_PTR(reg_pos_ptr(16, R1, ADDRESS)), rhs):emit_sub_ptr32_regptrTO_regFROM(g_out, CACHED_PTR(reg_pos_ptr(16, R1, ADDRESS)), rhs); + } + else + { + #define idx R3 + reg_pos_ptr(0, idx, VALUE); + BIT23(i)?emit_add_ptr32_regptrTO_regFROM(g_out, CACHED_PTR(reg_pos_ptr(16, R1, ADDRESS)), idx):emit_sub_ptr32_regptrTO_regFROM(g_out, CACHED_PTR(reg_pos_ptr(16, R1, ADDRESS)), idx); + #undef idx + } + + // Setup args + emit_mov(g_out,addr, R0); + // Add function + emit_ptr(g_out, (uintptr_t)(BIT5(i) ? op_strd_tab[PROCNUM][Rd_num] : op_ldrd_tab[PROCNUM][Rd_num]), R3); + // Call function + callR3(g_out); + emit_write_ptr32_reg(g_out, (uintptr_t)&bb_cycles, R0); + + emit_MMU_aluMemCycles(3, &bb_cycles, 0); + + #undef addr + #undef Rd + #undef Rs + + return 1; +} + +static int OP_LDRD_STRD_OFFSET_PRE_INDEX(const u32 i) +{ + #define addr R2 + #define Rd R4 + #define Rs R5 + + u8 Rd_num = REG_POS(i, 12); + + if (Rd_num == 14) + { + printf("OP_LDRD_STRD_OFFSET_PRE_INDEX: use R14!!!!\n"); + return 0; // TODO: exception + } + if (Rd_num & 0x1) + { + printf("OP_LDRD_STRD_OFFSET_PRE_INDEX: ERROR!!!!\n"); + return 0; // TODO: exception + } + + reg_pos_ptr(16, Rd, VALUE); + emit_mov(g_out, Rd, addr); + + // I bit - immediate or register + if (BIT22(i)) + { + IMM_OFF; + BIT23(i)?emit_add(g_out, addr, rhs, addr):emit_sub(g_out, addr, rhs, addr); + } + else { + reg_pos_ptr(0, R1, VALUE); + BIT23(i)?emit_add(g_out, addr, R1, addr):emit_sub(g_out, addr, R1, addr); + } + + if (BIT5(i)) // Store + { + // Setup args + emit_mov(g_out,addr, R0); + // Add function + emit_ptr(g_out, (uintptr_t)op_strd_tab[PROCNUM][Rd_num], R3); + // Call function + callR3(g_out); + // Return + emit_write_ptr32_reg(g_out, (uintptr_t)&bb_cycles, R0); + + if (BIT21(i)) // W bit - writeback + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(16, R1, ADDRESS), addr); + emit_MMU_aluMemCycles(3, &bb_cycles, 0); + } + else // Load + { + if (BIT21(i)) // W bit - writeback + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(16, R1, ADDRESS), addr); + + // Setup args + emit_mov(g_out,addr, R0); + // Add function + emit_ptr(g_out, (uintptr_t)op_ldrd_tab[PROCNUM][Rd_num], R3); + // Call function + callR3(g_out); + // Return + emit_write_ptr32_reg(g_out, (uintptr_t)&bb_cycles, R0); + + emit_MMU_aluMemCycles(3, &bb_cycles, 0); + } + + #undef addr + #undef Rd + #undef Rs + + return 1; +} + +//----------------------------------------------------------------------------- +// SWP/SWPB +//----------------------------------------------------------------------------- +template +static u32 FASTCALL op_swp(u32 adr, u32 *Rd, u32 Rs) +{ + u32 tmpt = ROR(READ32(cpu->mem_if->data, adr), (adr & 3)<<3); + WRITE32(cpu->mem_if->data, adr, Rs); + *Rd = tmpt; + return (MMU_memAccessCycles(adr) + MMU_memAccessCycles(adr)); +} +template +static u32 FASTCALL op_swpb(u32 adr, u32 *Rd, u32 Rs) +{ + u32 tmpt = READ8(cpu->mem_if->data, adr); + WRITE8(cpu->mem_if->data, adr, Rs); + *Rd = tmpt; + return (MMU_memAccessCycles(adr) + MMU_memAccessCycles(adr)); +} + +typedef u32 FASTCALL (*OP_SWP_SWPB)(u32, u32*, u32); +static const OP_SWP_SWPB op_swp_tab[2][2] = {{ op_swp<0>, op_swp<1> }, { op_swpb<0>, op_swpb<1> }}; + +static int op_swp_(const u32 i, int b) +{ + #define addr R2 + #define Rd R4 + #define Rs R5 + + reg_pos_ptr(16, addr, VALUE); + reg_pos_ptr(12, Rd, ADDRESS); + + if(b) + reg_pos_ptrB(0, Rs, VALUE); + else + reg_pos_ptr(0, Rs, VALUE); + + emit_mov(g_out, addr, R0); + emit_mov64(g_out, Rd, R1); + emit_mov(g_out, Rs, R2); + + // Add function + //emit_movimm(g_out, (uintptr_t)op_swp_tab[b][PROCNUM], R3); + emit_ptr(g_out, (uintptr_t)op_swp_tab[b][PROCNUM], R3); + // Call function + callR3(g_out); + //emit_mov(g_out, PC, LR); + //emit_jmpreg(g_out, R3); + // Return + emit_write_ptr32_reg(g_out, (uintptr_t)&bb_cycles, R0); + + emit_MMU_aluMemCycles(4, &bb_cycles, 0); + + #undef addr + #undef Rd + #undef Rs + + return 1; +} + +static int OP_SWP(const u32 i) { return op_swp_(i, 0); } +static int OP_SWPB(const u32 i) { return op_swp_(i, 1); } + +//----------------------------------------------------------------------------- +// LDMIA / LDMIB / LDMDA / LDMDB / STMIA / STMIB / STMDA / STMDB +//----------------------------------------------------------------------------- +static u32 popregcount(u32 xt) +{ + uint32_t pop = 0; + for(; xt; xt>>=1) + pop += xt&1; + return pop; +} + +static u64 get_reg_list(u32 reg_mask, int dir) +{ + u64 regs = 0; + for(int j=0; j<16; j++) + { + int k = dir<0 ? j : 15-j; + if(BIT_N(reg_mask,k)) + regs = (regs << 4) | k; + } + return regs; +} + +#ifdef ASMJIT_X64 +// generic needs to spill regs and main doesn't; if it's inlined gcc isn't smart enough to keep the spills out of the common case. +#define LDM_INLINE NOINLINE +#else +// spills either way, and we might as well save codesize by not having separate functions +#define LDM_INLINE INLINE +#endif + +template +static LDM_INLINE FASTCALL u32 OP_LDM_STM_generic(u32 adr, u64 regs, int n) +{ + u32 cycles = 0; + adr &= ~3; + do { + if(store) _MMU_write32(adr, cpu->R[regs&0xF]); + else cpu->R[regs&0xF] = _MMU_read32(adr); + cycles += MMU_memAccessCycles(adr); + adr += 4*dir; + regs >>= 4; + } while(--n > 0); + return cycles; +} + +#ifdef ENABLE_ADVANCED_TIMING +#define ADV_CYCLES cycles += MMU_memAccessCycles(adr); +#else +#define ADV_CYCLES +#endif + +template +static LDM_INLINE FASTCALL u32 OP_LDM_STM_other(u32 adr, u64 regs, int n) +{ + u32 cycles = 0; + adr &= ~3; +#ifndef ENABLE_ADVANCED_TIMING + cycles = n * MMU_memAccessCycles(adr); +#endif + do { + if(PROCNUM==ARMCPU_ARM9) + if(store) _MMU_ARM9_write32(adr, cpu->R[regs&0xF]); + else cpu->R[regs&0xF] = _MMU_ARM9_read32(adr); + else + if(store) _MMU_ARM7_write32(adr, cpu->R[regs&0xF]); + else cpu->R[regs&0xF] = _MMU_ARM7_read32(adr); + ADV_CYCLES; + adr += 4*dir; + regs >>= 4; + } while(--n > 0); + return cycles; +} + +template +static FORCEINLINE FASTCALL u32 OP_LDM_STM_main(u32 adr, u64 regs, int n, u8 *ptr, u32 cycles) +{ +#ifdef ENABLE_ADVANCED_TIMING + cycles = 0; +#endif + uintptr_t *func = (uintptr_t *)&JIT_COMPILED_FUNC(adr, PROCNUM); + +#define OP(j) { \ + /* no need to zero functions in DTCM, since we can't execute from it */ \ + if(null_compiled && store) \ + { \ + *func = 0; \ + *(func+1) = 0; \ + } \ + int Rd = ((uintptr_t)regs >> (j*4)) & 0xF; \ + if(store) *(u32*)ptr = cpu->R[Rd]; \ + else cpu->R[Rd] = *(u32*)ptr; \ + ADV_CYCLES; \ + func += 2*dir; \ + adr += 4*dir; \ + ptr += 4*dir; } + + do { + OP(0); + if(n == 1) break; + OP(1); + if(n == 2) break; + OP(2); + if(n == 3) break; + OP(3); + regs >>= 16; + n -= 4; + } while(n > 0); + return cycles; +#undef OP +#undef ADV_CYCLES +} + +template +static u32 FASTCALL OP_LDM_STM(u32 adr, u32 regslo, u32 regshi, int n) +{ + // TODO use classify_adr? + u32 cycles; + u8 *ptr; + u64 regs = (u64)regslo | (u64)((u64)regshi << 32); + + if((adr ^ (adr + (dir>0 ? (n-1)*4 : -15*4))) & ~0x3FFF) // a little conservative, but we don't want to run too many comparisons + { + // the memory region spans a page boundary, so we can't factor the address translation out of the loop + return OP_LDM_STM_generic(adr, regs, n); + } + else if(PROCNUM==ARMCPU_ARM9 && (adr & ~0x3FFF) == MMU.DTCMRegion) + { + // don't special-case DTCM cycles, even though that would be both faster and more accurate, + // because that wouldn't match the non-jitted version with !ACCOUNT_FOR_DATA_TCM_SPEED + ptr = MMU.ARM9_DTCM + (adr & 0x3FFC); + cycles = n * MMU_memAccessCycles(adr); + if(store) + return OP_LDM_STM_main(adr, regs, n, ptr, cycles); + } + else if((adr & 0x0F000000) == 0x02000000) + { + ptr = MMU.MAIN_MEM + (adr & _MMU_MAIN_MEM_MASK32); + cycles = n * ((PROCNUM==ARMCPU_ARM9) ? 4 : 2); + } + else if(PROCNUM==ARMCPU_ARM7 && !store && (adr & 0xFF800000) == 0x03800000) + { + ptr = MMU.ARM7_ERAM + (adr & 0xFFFC); + cycles = n; + } + else if(PROCNUM==ARMCPU_ARM7 && !store && (adr & 0xFF800000) == 0x03000000) + { + ptr = MMU.SWIRAM + (adr & 0x7FFC); + cycles = n; + } + else + return OP_LDM_STM_other(adr, regs, n); + + return OP_LDM_STM_main(adr, regs, n, ptr, cycles); +} + +typedef u32 FASTCALL (*LDMOpFunc)(u32,u32,u32,int); +static const LDMOpFunc op_ldm_stm_tab[2][2][2] = {{ + { OP_LDM_STM<0,0,-1>, OP_LDM_STM<0,0,+1> }, + { OP_LDM_STM<0,1,-1>, OP_LDM_STM<0,1,+1> }, +},{ + { OP_LDM_STM<1,0,-1>, OP_LDM_STM<1,0,+1> }, + { OP_LDM_STM<1,1,-1>, OP_LDM_STM<1,1,+1> }, +}}; + +static void call_ldm_stm(int reg, u32 bitmask, bool store, int dir) +{ + if(bitmask) + { + // Setup args + emit_mov(g_out,reg,R0); + emit_movimm(g_out, (uintptr_t)popregcount(bitmask), R3); + emit_movimm(g_out, (uintptr_t)get_reg_list(bitmask, dir), R1); + emit_movimm(g_out, (uintptr_t)(get_reg_list(bitmask, dir) >> 32), R2); + + // Add function + emit_ptr(g_out, (uintptr_t)op_ldm_stm_tab[PROCNUM][store][dir>0], R7); + + // Call function + callR7(g_out); + + emit_write_ptr32_reg(g_out, (uintptr_t)&bb_cycles, R0); + + } + else + bb_constant_cycles++; +} + +static int op_bx(/*Mem */int srcreg, bool blx, bool test_thumb); +static int op_bx_thumb(/*Mem */ int srcreg, bool blx, bool test_thumb); + +static int op_ldm_stm(u32 i, bool store, int dir, bool before, bool writeback) +{ + #define adr R4 + #define oldmode R5 + + u32 bitmask = i & 0xFFFF; + u32 pop = popregcount(bitmask); + + reg_pos_ptr(16, adr, VALUE); + if(before) + emit_addimm(g_out, adr, 4*dir, adr); + + call_ldm_stm(adr, bitmask, store, dir); + + if(BIT15(i) && !store) + { + op_bx(reg_ptr(15, R6, VALUE), 0, PROCNUM == ARMCPU_ARM9); + } + + if(writeback) + { + + if(store || !(i & (1 << REG_POS(i,16)))) + { + JIT_COMMENT("--- writeback"); + reg_pos_ptr(16, R3, VALUE); + emit_addimm(g_out, R3, 4*dir*pop, R3); + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(16, R1, ADDRESS), R3); + } + else + { + u32 bitlist = (~((2 << REG_POS(i,16))-1)) & 0xFFFF; + if(i & bitlist) + { + JIT_COMMENT("--- writeback"); + emit_addimm(g_out, adr, 4*dir*(pop-before), adr); + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(16, R1, ADDRESS), adr); + } + } + } + + emit_MMU_aluMemCycles(store ? 1 : 2, &bb_cycles, pop); + + #undef adr + #undef oldmode + + return 1; +} + +static int OP_LDMIA(const u32 i) { return op_ldm_stm(i, 0, +1, 0, 0); } +static int OP_LDMIB(const u32 i) { return op_ldm_stm(i, 0, +1, 1, 0); } +static int OP_LDMDA(const u32 i) { return op_ldm_stm(i, 0, -1, 0, 0); } +static int OP_LDMDB(const u32 i) { return op_ldm_stm(i, 0, -1, 1, 0); } +static int OP_LDMIA_W(const u32 i) { return op_ldm_stm(i, 0, +1, 0, 1); } +static int OP_LDMIB_W(const u32 i) { return op_ldm_stm(i, 0, +1, 1, 1); } +static int OP_LDMDA_W(const u32 i) { return op_ldm_stm(i, 0, -1, 0, 1); } +static int OP_LDMDB_W(const u32 i) { return op_ldm_stm(i, 0, -1, 1, 1); } + +static int OP_STMIA(const u32 i) { return op_ldm_stm(i, 1, +1, 0, 0); } +static int OP_STMIB(const u32 i) { return op_ldm_stm(i, 1, +1, 1, 0); } +static int OP_STMDA(const u32 i) { return op_ldm_stm(i, 1, -1, 0, 0); } +static int OP_STMDB(const u32 i) { return op_ldm_stm(i, 1, -1, 1, 0); } +static int OP_STMIA_W(const u32 i) { return op_ldm_stm(i, 1, +1, 0, 1); } +static int OP_STMIB_W(const u32 i) { return op_ldm_stm(i, 1, +1, 1, 1); } +static int OP_STMDA_W(const u32 i) { return op_ldm_stm(i, 1, -1, 0, 1); } +static int OP_STMDB_W(const u32 i) { return op_ldm_stm(i, 1, -1, 1, 1); } + +static int op_ldm_stm2(u32 i, bool store, int dir, bool before, bool writeback) +{ + u32 bitmask = i & 0xFFFF; + u32 pop = popregcount(bitmask); + bool bit15 = BIT15(i); + + u32 adr_first = cpu->R[REG_POS(i, 16)]; + + #define adr R4 + #define oldmode R5 + + reg_pos_ptr(16, adr, VALUE); + + if(before) + emit_addimm(g_out, adr, 4*dir, adr); + + if (!bit15 || store) + { + emit_ptr(g_out, (uintptr_t)&ARMPROC, R0); + emit_movimm(g_out, SYS, R1); + emit_ptr(g_out, (uintptr_t)armcpu_switchMode, R3); + callR3(g_out); + emit_mov(g_out, R0, oldmode); + } + + call_ldm_stm(adr, bitmask, store, dir); + + if(!bit15 || store) + { + emit_ptr(g_out, (uintptr_t)&ARMPROC, R0); + emit_mov(g_out, oldmode, R1); + emit_ptr(g_out, (uintptr_t)armcpu_switchMode, R3); + callR3(g_out); + } + else + { + S_DST_R15; + } + + // FIXME + if(writeback) + { + if(store || !(i & (1 << REG_POS(i,16)))) { + emit_add_ptr32_regptrTO_immFROM(g_out, reg_pos_ptr(16, R1, ADDRESS), 4*dir*pop); + } + else + { + u32 bitlist = (~((2 << REG_POS(i,16))-1)) & 0xFFFF; + if(i & bitlist) + { + emit_addimm(g_out, adr, 4*dir*(pop-before), adr); + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(16, R1, ADDRESS), adr); + } + } + } + + emit_MMU_aluMemCycles(store ? 1 : 2, &bb_cycles, pop); + + #undef adr + #undef oldmode + + return 1; +} + +static int OP_LDMIA2(const u32 i) { return op_ldm_stm2(i, 0, +1, 0, 0); } +static int OP_LDMIB2(const u32 i) { return op_ldm_stm2(i, 0, +1, 1, 0); } +static int OP_LDMDA2(const u32 i) { return op_ldm_stm2(i, 0, -1, 0, 0); } +static int OP_LDMDB2(const u32 i) { return op_ldm_stm2(i, 0, -1, 1, 0); } +static int OP_LDMIA2_W(const u32 i) { return op_ldm_stm2(i, 0, +1, 0, 1); } +static int OP_LDMIB2_W(const u32 i) { return op_ldm_stm2(i, 0, +1, 1, 1); } +static int OP_LDMDA2_W(const u32 i) { return op_ldm_stm2(i, 0, -1, 0, 1); } +static int OP_LDMDB2_W(const u32 i) { return op_ldm_stm2(i, 0, -1, 1, 1); } + +static int OP_STMIA2(const u32 i) { return op_ldm_stm2(i, 1, +1, 0, 0); } +static int OP_STMIB2(const u32 i) { return op_ldm_stm2(i, 1, +1, 1, 0); } +static int OP_STMDA2(const u32 i) { return op_ldm_stm2(i, 1, -1, 0, 0); } +static int OP_STMDB2(const u32 i) { return op_ldm_stm2(i, 1, -1, 1, 0); } +static int OP_STMIA2_W(const u32 i) { return op_ldm_stm2(i, 1, +1, 0, 1); } +static int OP_STMIB2_W(const u32 i) { return op_ldm_stm2(i, 1, +1, 1, 1); } +static int OP_STMDA2_W(const u32 i) { return op_ldm_stm2(i, 1, -1, 0, 1); } +static int OP_STMDB2_W(const u32 i) { return op_ldm_stm2(i, 1, -1, 1, 1); } + +//----------------------------------------------------------------------------- +// Branch +//----------------------------------------------------------------------------- +#define SIGNEXTEND_11(i) (((s32)i<<21)>>21) +#define SIGNEXTEND_24(i) (((s32)i<<8)>>8) + +static int op_b(u32 i, bool bl) +{ + u32 dst = bb_r15 + (SIGNEXTEND_24(i) << 2); + if(CONDITION(i)==0xF) + { + if(bl) + dst += 2; + emit_or_ptr8_regptr_imm(g_out, cpu_ptr_byte(CPSR, 0, R1, ADDRESS), 1<<5); + } + if(bl || CONDITION(i)==0xF) + emit_write_ptr32_regptrTO_immFROM(g_out, reg_ptr(14, R1, ADDRESS), bb_next_instruction); + + emit_write_ptr32_regptrTO_immFROM(g_out, cpu_ptr(instruct_adr, R1, ADDRESS), dst); + + return 1; +} + +static int OP_B(const u32 i) { return op_b(i, 0); } +static int OP_BL(const u32 i) { return op_b(i, 1); } + +static int op_bx(/*Mem */int srcreg, bool blx, bool test_thumb) +{ + #define thumb tmp2_reg + #define dst tmp_reg + #define mask R8 + + emit_mov(g_out, srcreg, dst); + + if(test_thumb) + { + emit_mov(g_out, dst, thumb); + + emit_andimm(g_out, thumb, 1, thumb); + emit_mov(g_out, thumb,R9); + emit_lea_ptr_reg(g_out, 0xFFFFFFFC, R9, 2, mask); + emit_lsl(g_out, thumb, 5); + + emit_or_ptr8_regptr_reg(g_out, cpu_ptr_byte(CPSR, 0, R1, ADDRESS), thumb); + + emit_and(g_out, dst, mask, dst); + } + else + emit_andimm(g_out, dst, 0xFFFFFFFC, dst); + + if(blx) + emit_write_ptr32_regptrTO_immFROM(g_out, reg_ptr(14, R1, ADDRESS), bb_next_instruction); + emit_write_ptr32_regptrTO_regFROM(g_out, cpu_ptr(instruct_adr, R1, ADDRESS), dst); + + #undef thumb + #undef dst + #undef mask + + return 1; +} + +static int OP_BX(const u32 i) { return op_bx(reg_pos_ptr(0, R1, VALUE), 0, 1); } +static int OP_BLX_REG(const u32 i) { return op_bx(reg_pos_ptr(0, R1, VALUE), 1, 1); } + +//----------------------------------------------------------------------------- +// CLZ +//----------------------------------------------------------------------------- +static int OP_CLZ(const u32 i) +{ + reg_pos_ptr(0, tmp2_reg, VALUE); + + emit_clz(g_out, tmp2_reg, tmp_reg); + + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(12, R1, ADDRESS), tmp_reg); + + return 1; +} + +//----------------------------------------------------------------------------- +// MCR / MRC +//----------------------------------------------------------------------------- + +// precalculate region masks/sets from cp15 register ----- JIT +static void maskPrecalc(u32 _num) +{ +#define precalc(num) { \ + u32 mask = 0, set = 0xFFFFFFFF ; /* (x & 0) == 0xFF..FF is allways false (disabled) */ \ + if (BIT_N(cp15.protectBaseSize[num],0)) /* if region is enabled */ \ + { /* reason for this define: naming includes var */ \ + mask = CP15_MASKFROMREG(cp15.protectBaseSize[num]) ; \ + set = CP15_SETFROMREG(cp15.protectBaseSize[num]) ; \ + if (CP15_SIZEIDENTIFIER(cp15.protectBaseSize[num])==0x1F) \ + { /* for the 4GB region, u32 suffers wraparound */ \ + mask = 0 ; set = 0 ; /* (x & 0) == 0 is allways true (enabled) */ \ + } \ + } \ + armcp15_setSingleRegionAccess(&cp15, num, mask, set) ; \ +} + switch(_num) + { + case 0: precalc(0); break; + case 1: precalc(1); break; + case 2: precalc(2); break; + case 3: precalc(3); break; + case 4: precalc(4); break; + case 5: precalc(5); break; + case 6: precalc(6); break; + case 7: precalc(7); break; + + case 0xFF: + precalc(0); + precalc(1); + precalc(2); + precalc(3); + precalc(4); + precalc(5); + precalc(6); + precalc(7); + break; + } +#undef precalc +} + +#define _maskPrecalc(num) \ +{ \ + emit_movimm(g_out, num, R0);\ + emit_ptr(g_out, (uintptr_t)maskPrecalc, R3); \ + callR3(g_out); \ +} + +static int OP_MCR(const u32 i) +{ + #define data tmp2_reg + + if (PROCNUM == ARMCPU_ARM7) return 0; + + u32 cpnum = REG_POS(i, 8); + if(cpnum != 15) + { + // TODO - exception? + printf("JIT: MCR P%i, 0, R%i, C%i, C%i, %i, %i (don't allocated coprocessor)\n", + cpnum, REG_POS(i, 12), REG_POS(i, 16), REG_POS(i, 0), (i>>21)&0x7, (i>>5)&0x7); + return 2; + } + if (REG_POS(i, 12) == 15) + { + printf("JIT: MCR Rd=R15\n"); + return 2; + } + + u8 CRn = REG_POS(i, 16); // Cn + u8 CRm = REG_POS(i, 0); // Cm + u8 opcode1 = ((i>>21)&0x7); // opcode1 + u8 opcode2 = ((i>>5)&0x7); // opcode2 + + static uintptr_t bb_cp15; + reg_pos_ptr(12, data, VALUE); + emit_write_ptr(g_out, (uintptr_t)&bb_cp15, (uintptr_t)&cp15); + + bool bUnknown = false; + switch(CRn) + { + case 1: + if((opcode1==0) && (opcode2==0) && (CRm==0)) + { + static uintptr_t bb_mmu; + emit_write_ptr(g_out, (uintptr_t)&bb_mmu, (uintptr_t)&MMU); + + + mmu_ptr_byte(ARM9_RW_MODE, R2, ADDRESS); + + emit_testimm(g_out, data, (1<<7)); + emit_setnz(g_out, R1); + emit_strb(g_out, R2, R1); + + #define vec R6 + + emit_movimm(g_out, 0xFFFF0000, tmp); + emit_movimm(g_out, 0, vec); + emit_xor(g_out, vec, vec, vec); + emit_testimm(g_out, data, (1 << 13)); + + emit_mov_cond(g_out, tmp, vec, 0x1); + emit_write_ptr32_regptrTO_regFROM(g_out, cpu_ptr(intVector, R1, ADDRESS), vec); + + cpu_ptr_byte(LDTBit, 0, R2, ADDRESS); + + emit_testimm(g_out, data, (1 << 15)); + emit_setz(g_out, R1); + emit_strb(g_out, R2, R1); + + emit_andimm(g_out, data, 0x000FF085, data); + emit_orimm(g_out, data, 0x00000078, data); + + emit_write_ptr32_regptrTO_regFROM(g_out, cp15_ptr(ctrl, R1, ADDRESS), data); + + #undef vec + + break; + } + bUnknown = true; + break; + case 2: + if((opcode1==0) && (CRm==0)) + { + switch(opcode2) + { + case 0: + emit_write_ptr32_regptrTO_regFROM(g_out, cp15_ptr(DCConfig, R1, ADDRESS), data); + break; + case 1: + emit_write_ptr32_regptrTO_regFROM(g_out, cp15_ptr(ICConfig, R1, ADDRESS), data); + break; + default: + bUnknown = true; + break; + } + break; + } + bUnknown = true; + break; + case 3: + if((opcode1==0) && (opcode2==0) && (CRm==0)) + { + emit_write_ptr32_regptrTO_regFROM(g_out, cp15_ptr(writeBuffCtrl, R1, ADDRESS), data); + break; + } + bUnknown = true; + break; + case 5: + if((opcode1==0) && (CRm==0)) + { + switch(opcode2) + { + case 2: + emit_write_ptr32_regptrTO_regFROM(g_out, cp15_ptr(DaccessPerm, R1, ADDRESS), data); + _maskPrecalc(0xFF); + break; + case 3: + emit_write_ptr32_regptrTO_regFROM(g_out, cp15_ptr(IaccessPerm, R1, ADDRESS), data); + _maskPrecalc(0xFF); + break; + default: + bUnknown = true; + break; + } + } + bUnknown = true; + break; + case 6: + if((opcode1==0) && (opcode2==0)) + { + if (CRm < 8) + { + emit_write_ptr32_regptrTO_regFROM(g_out, cp15_ptr_off(protectBaseSize, (CRm * sizeof(u32)), R1, ADDRESS), data); + _maskPrecalc(CRm); + break; + } + } + bUnknown = true; + break; + case 7: + if((CRm==0)&&(opcode1==0)&&((opcode2==4))) + { + emit_write_ptr32_regptrTO_immFROM(g_out, cpu_ptr(freeze, R1, ADDRESS), CPU_FREEZE_IRQ_IE_IF); + break; + } + bUnknown = true; + break; + case 9: + if(opcode1==0) + { + switch(CRm) + { + case 0: + switch(opcode2) + { + case 0: + emit_write_ptr32_regptrTO_regFROM(g_out, cp15_ptr(DcacheLock, R1, ADDRESS), data); + break; + case 1: + emit_write_ptr32_regptrTO_regFROM(g_out, cp15_ptr(IcacheLock, R1, ADDRESS), data); + break; + default: + bUnknown = true; + break; + } + case 1: + switch(opcode2) + { + case 0: + { + emit_andimm(g_out, data, 0x0FFFF000, data); + static uintptr_t bb_mmu; + emit_write_ptr(g_out, (uintptr_t)&bb_mmu, (uintptr_t)&MMU); + emit_write_ptr32_regptrTO_regFROM(g_out, mmu_ptr(DTCMRegion, R1, ADDRESS), data); + emit_write_ptr32_regptrTO_regFROM(g_out, cp15_ptr(DTCMRegion, R1, ADDRESS), data); + } + break; + case 1: + { + static uintptr_t bb_mmu; + emit_write_ptr(g_out, (uintptr_t)&bb_mmu, (uintptr_t)&MMU); + emit_write_ptr32_regptrTO_immFROM(g_out, mmu_ptr(ITCMRegion, R1, ADDRESS), 0); + emit_write_ptr32_regptrTO_regFROM(g_out, cp15_ptr(ITCMRegion, R1, ADDRESS), data); + } + break; + default: + bUnknown = true; + break; + } + } + break; + } + bUnknown = true; + break; + case 13: + bUnknown = true; + break; + case 15: + bUnknown = true; + break; + default: + bUnknown = true; + break; + } + + if (bUnknown) + { + //printf("Unknown MCR command: MRC P15, 0, R%i, C%i, C%i, %i, %i\n", REG_POS(i, 12), CRn, CRm, opcode1, opcode2); + return 1; + } + + #undef data + + return 1; +} +static int OP_MRC(const u32 i) +{ + if (PROCNUM == ARMCPU_ARM7) return 0; + + u32 cpnum = REG_POS(i, 8); + if(cpnum != 15) + { + printf("MRC P%i, 0, R%i, C%i, C%i, %i, %i (don't allocated coprocessor)\n", cpnum, REG_POS(i, 12), REG_POS(i, 16), REG_POS(i, 0), (i>>21)&0x7, (i>>5)&0x7); + return 2; + } + + u8 CRn = REG_POS(i, 16); // Cn + u8 CRm = REG_POS(i, 0); // Cm + u8 opcode1 = ((i>>21)&0x7); // opcode1 + u8 opcode2 = ((i>>5)&0x7); // opcode2 + + #define data tmp2_reg + + static uintptr_t bb_cp15; + emit_write_ptr(g_out, (uintptr_t)&bb_cp15, (uintptr_t)&cp15); + + bool bUnknown = false; + switch(CRn) + { + case 0: + if((opcode1 == 0)&&(CRm==0)) + { + switch(opcode2) + { + case 1: + cp15_ptr(cacheType, data, VALUE); + break; + case 2: + cp15_ptr(TCMSize, data, VALUE); + break; + default: // FIXME + cp15_ptr(IDCode, data, VALUE); + break; + } + break; + } + bUnknown = true; + break; + + case 1: + if((opcode1==0) && (opcode2==0) && (CRm==0)) + { + cp15_ptr(ctrl, data, VALUE); + break; + } + bUnknown = true; + break; + + case 2: + if((opcode1==0) && (CRm==0)) + { + switch(opcode2) + { + case 0: + cp15_ptr(DCConfig, data, VALUE); + break; + case 1: + cp15_ptr(ICConfig, data, VALUE); + break; + default: + bUnknown = true; + break; + } + break; + } + bUnknown = true; + break; + + case 3: + if((opcode1==0) && (opcode2==0) && (CRm==0)) + { + cp15_ptr(writeBuffCtrl, data, VALUE); + break; + } + bUnknown = true; + break; + + case 5: + if((opcode1==0) && (CRm==0)) + { + switch(opcode2) + { + case 2: + cp15_ptr(DaccessPerm, data, VALUE); + break; + case 3: + cp15_ptr(IaccessPerm, data, VALUE); + break; + default: + bUnknown = true; + break; + } + break; + } + bUnknown = true; + break; + + case 6: + if((opcode1==0) && (opcode2==0)) + { + if (CRm < 8) + { + cp15_ptr_off(protectBaseSize, CRm * sizeof(u32), data, VALUE); + break; + } + } + bUnknown = true; + break; + + case 7: + bUnknown = true; + break; + + case 9: + if(opcode1 == 0) + { + switch(CRm) + { + case 0: + switch(opcode2) + { + case 0: + cp15_ptr(DcacheLock, data, VALUE); + break; + case 1: + cp15_ptr(IcacheLock, data, VALUE); + break; + default: + bUnknown = true; + break; + } + + case 1: + switch(opcode2) + { + case 0: + cp15_ptr(DTCMRegion, data, VALUE); + break; + case 1: + cp15_ptr(ITCMRegion, data, VALUE); + break; + default: + bUnknown = true; + break; + } + } + // + break; + } + bUnknown = true; + break; + + case 13: + bUnknown = true; + break; + + case 15: + bUnknown = true; + break; + + default: + bUnknown = true; + break; + } + + if (bUnknown) + { + return 1; + } + + if (REG_POS(i, 12) == 15) + { + emit_andimm(g_out, data, 0xF0000000, data); + emit_and_ptr32_regptr(g_out, cpu_ptr(CPSR, R1, ADDRESS), 0x0FFFFFFF); + emit_or_ptr32_regptr_reg(g_out, cpu_ptr(CPSR, R1, ADDRESS), data); + } + else { + //if (REG_POS(i, 12) == 1) emit_brk(g_out); + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_ptr(12, R1, ADDRESS), data); + } + + #undef data + + return 1; +} + +u32 op_swi(u8 swinum) +{ + if(cpu->swi_tab) + { + // TODO: + return 0; + + emit_ptr(g_out, (uintptr_t)ARM_swi_tab[PROCNUM][swinum], R3); + callR3(g_out); + emit_write_ptr32_reg(g_out, (uintptr_t)&bb_cycles, R0); + emit_inc_ptr_imm(g_out, (uintptr_t)&bb_cycles, 3); + return 1; + } + + cpu_ptr(CPSR.val, R4, VALUE); + + emit_ptr(g_out, (uintptr_t)&ARMPROC, R0); + emit_movimm(g_out, SVC, R1); + emit_ptr(g_out, (uintptr_t)armcpu_switchMode, R3); + callR3(g_out); + + emit_mov(g_out, R4, tmp2_reg); + + emit_write_ptr32_regptrTO_immFROM(g_out,reg_ptr(14, R1, ADDRESS), bb_next_instruction); + + emit_write_ptr32_regptrTO_regFROM(g_out, cpu_ptr(SPSR.val, R1, ADDRESS), tmp2_reg); + + cpu_ptr(CPSR.val, tmp2_reg, VALUE); + emit_andimm(g_out, tmp2_reg, ~(1 << 5), tmp2_reg); + emit_orimm(g_out, tmp2_reg, (1 << 7), tmp2_reg); + emit_write_ptr32_regptrTO_regFROM(g_out, cpu_ptr(CPSR.val, R1, ADDRESS), tmp2_reg); + + emit_movimm(g_out, cpu->intVector+0x08, R0); + emit_write_ptr32_regptrTO_regFROM(g_out, cpu_ptr(next_instruction, R1, ADDRESS), R0); + + return 1; +} + +static int OP_SWI(const u32 i) { return op_swi((i >> 16) & 0x1F); } + +//----------------------------------------------------------------------------- +// BKPT +//----------------------------------------------------------------------------- +static int OP_BKPT(const u32 i) { printf("JIT: unimplemented OP_BKPT\n"); return 0; } + +//----------------------------------------------------------------------------- +// THUMB +//----------------------------------------------------------------------------- +#define OP_SHIFTS_IMM(x86inst) \ + u8 cf_change = 1; \ + const u32 rhs2 = ((i>>6) & 0x1F); \ + if (_REG_NUM(i, 0) == _REG_NUM(i, 3)) \ + { \ + reg_pos_thumb(0, R1, VALUE);\ + x86inst(g_out, R1, rhs2);\ + emit_write_ptr32_regptrTO_regFROM(g_out, CACHED_PTR(reg_pos_thumb(0, R2, ADDRESS)), R1); \ + } \ + else \ + { \ + reg_pos_thumb(3, R1, VALUE); \ + x86inst(g_out, R1, rhs2);\ + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_thumb(0, R2, ADDRESS), R1); \ + } \ + SET_NZC; \ + return 1; + +#define OP_SHIFTS_REG(x86inst, bit) \ + u8 cf_change = 1; \ + GET_CARRY(0); \ + reg_pos_thumb(3, imm_reg, VALUE); \ + x86inst(g_out, reg_pos_thumb(0, R1, VALUE), imm_reg); \ + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_thumb(0, R0, ADDRESS), R1); \ + SET_NZC; \ + return 1; + +#define OP_LOGIC(x86inst, _conv) \ + emit_mov(g_out, reg_pos_thumb(3, R1, VALUE), rhs); \ + if (_conv==1) emit_not(g_out, rhs, rhs);\ + reg_pos_thumb(0, R1, VALUE);\ + x86inst(g_out, R1, rhs, R1);\ + emit_write_ptr32_regptrTO_regFROM(g_out, CACHED_PTR(reg_pos_thumb(0, R2, ADDRESS)), R1); \ + SET_NZ(0);\ + return 1; + +//----------------------------------------------------------------------------- +// LSL / LSR / ASR / ROR +//----------------------------------------------------------------------------- +static int OP_LSL_0(const u32 i) +{ + if (_REG_NUM(i, 0) == _REG_NUM(i, 3)) + emit_cmpimm(g_out, reg_pos_thumb(0, R1, VALUE), 0); + else + { + //emit_read_ptr32_regptrFROM_regTO(g_out, reg_pos_thumb(3, R1, VALUE), rhs); + emit_mov(g_out, reg_pos_thumb(3, R1, VALUE), rhs); + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_thumb(0, R1, ADDRESS), rhs); + + emit_cmpimm(g_out, rhs, 0); + } + SET_NZ(0); + + return 1; +} + + +static int OP_LSL(const u32 i) { OP_SHIFTS_IMM(emit_lsls); } +static int OP_LSL_REG(const u32 i) { OP_SHIFTS_REG(emit_lsls_reg, 0); } + +static int OP_LSR_0(const u32 i) +{ + emit_testimm(g_out, reg_pos_thumb(3, R1, VALUE), (1 << 31)); + emit_setnz(g_out, rcf); + SET_NZC_SHIFTS_ZERO(1); + emit_write_ptr32_regptrTO_immFROM(g_out, reg_pos_thumb(0, R1, ADDRESS), 0); + + return 1; +} + +static int OP_LSR(const u32 i) { OP_SHIFTS_IMM(emit_lsrs); } +static int OP_LSR_REG(const u32 i) { OP_SHIFTS_REG(emit_lsrs_reg, 31); } + +static int OP_ASR_0(const u32 i) +{ + u8 cf_change = 1; + if (_REG_NUM(i, 0) == _REG_NUM(i, 3)) { + emit_asrs(g_out, reg_pos_thumb(0, R1, VALUE), 31); + emit_write_ptr32_regptrTO_regFROM(g_out, CACHED_PTR(reg_pos_thumb(0, R2, ADDRESS)), R1); // TAGCHECK + } + else + { + reg_pos_thumb(3, rhs, VALUE); + emit_asrs(g_out, rhs, 31); + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_thumb(0, R1, ADDRESS), rhs); + } + SET_NZC; + + return 1; +} + +static int OP_ASR(const u32 i) { OP_SHIFTS_IMM(emit_asrs); } +static int OP_ASR_REG(const u32 i) +{ + u8 cf_change = 1; + GET_CARRY(0); + reg_pos_thumb(3, imm_reg, VALUE); + emit_asrs_reg(g_out, reg_pos_thumb(0, R1, VALUE), imm_reg); + emit_write_ptr32_regptrTO_regFROM(g_out, CACHED_PTR(reg_pos_thumb(0, R0, ADDRESS)), R1); + SET_NZC; + return 1; +} + +//----------------------------------------------------------------------------- +// ROR +//----------------------------------------------------------------------------- +static int OP_ROR_REG(const u32 i) +{ + u8 cf_change = 1; + GET_CARRY(0); + reg_pos_thumb(3, imm_reg, VALUE); + emit_rors_reg(g_out, reg_pos_thumb(0, R1, VALUE), imm_reg); + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_thumb(0, R0, ADDRESS), R1); + SET_NZC; + + return 1; +} + +//----------------------------------------------------------------------------- +// AND / ORR / EOR / BIC +//----------------------------------------------------------------------------- +static int OP_AND(const u32 i) { OP_LOGIC(emit_ands, 0); } +static int OP_ORR(const u32 i) { OP_LOGIC(emit_ors, 0); } +static int OP_EOR(const u32 i) { OP_LOGIC(emit_xors, 0); } +static int OP_BIC(const u32 i) { OP_LOGIC(emit_ands, 1); } + +//----------------------------------------------------------------------------- +// NEG +//----------------------------------------------------------------------------- +static int OP_NEG(const u32 i) +{ + if (_REG_NUM(i, 0) == _REG_NUM(i, 3)) + emit_negs_ptr32_regptr(g_out, reg_pos_thumb(0, R1, ADDRESS)); + else + { + reg_pos_thumb(3, tmp, VALUE); + emit_negs(g_out, tmp, tmp); + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_thumb(0, R1, ADDRESS), tmp); + } + SET_NZCV(1); + + return 1; +} + +//----------------------------------------------------------------------------- +// ADD +//----------------------------------------------------------------------------- +static int OP_ADD_IMM3(const u32 i) +{ + u32 imm3 = (i >> 6) & 0x07; + + if (imm3 == 0) // mov 2 + { + reg_pos_thumb(3, tmp, VALUE); + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_thumb(0, R1, ADDRESS), tmp); + emit_cmpimm(g_out, tmp,0); + SET_NZ(1); + + return 1; + } + if (_REG_NUM(i, 0) == _REG_NUM(i, 3)) + { + emit_adds_ptr32_regptrTO_immFROM(g_out, reg_pos_thumb(0, R1, ADDRESS), imm3); + } + else + { + reg_pos_thumb(3, tmp, VALUE); + emit_addsimm(g_out, tmp, imm3, tmp); + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_thumb(0, R1, ADDRESS), tmp); + } + SET_NZCV(0); + + return 1; +} +static int OP_ADD_IMM8(const u32 i) +{ + emit_adds_ptr32_regptrTO_immFROM(g_out, reg_pos_thumb(8, R1, ADDRESS), (i & 0xFF)); + SET_NZCV(0); + + return 1; +} +static int OP_ADD_REG(const u32 i) +{ + if (_REG_NUM(i, 0) == _REG_NUM(i, 3)) + { + reg_pos_thumb(6, tmp, VALUE); + emit_adds_ptr32_regptrTO_regFROM(g_out, reg_pos_thumb(0, R1, ADDRESS), tmp); + } + else + if (_REG_NUM(i, 0) == _REG_NUM(i, 6)) + { + reg_pos_thumb(3, tmp, VALUE); + emit_adds_ptr32_regptrTO_regFROM(g_out, reg_pos_thumb(0, R1, ADDRESS), tmp); + } + else + { + reg_pos_thumb(3, tmp, VALUE); + reg_pos_thumb(6, tmp2, VALUE); + emit_adds(g_out, tmp, tmp2, tmp); + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_thumb(0, R1, ADDRESS), tmp); + } + + SET_NZCV(0); + + return 1; +} +static int OP_ADD_SPE(const u32 i) +{ + u32 Rd = _REG_NUM(i, 0) | ((i>>4)&8); + reg_ptr(Rd, tmp, VALUE); + reg_pos_ptr(3, tmp2, VALUE); + emit_add(g_out, tmp, tmp2, tmp); + emit_write_ptr32_regptrTO_regFROM(g_out, reg_ptr(Rd, R1, ADDRESS), tmp); + + if(Rd==15) + emit_write_ptr32_regptrTO_regFROM(g_out, cpu_ptr(next_instruction, R1, ADDRESS), tmp); + + return 1; +} + +static int OP_ADD_2PC(const u32 i) +{ + u32 imm = ((i&0xFF)<<2); + emit_write_ptr32_regptrTO_immFROM(g_out, reg_pos_thumb(8, R1, ADDRESS), (bb_r15 & 0xFFFFFFFC) + imm); + + return 1; +} + +static int OP_ADD_2SP(const u32 i) +{ + u32 imm = ((i&0xFF)<<2); + reg_ptr(13, tmp, VALUE); + if (imm) emit_addimm(g_out, tmp, imm, tmp); + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_thumb(8, R1, ADDRESS), tmp); + + return 1; +} + +//----------------------------------------------------------------------------- +// SUB +//----------------------------------------------------------------------------- +static int OP_SUB_IMM3(const u32 i) +{ + u32 imm3 = (i >> 6) & 0x07; + + if (_REG_NUM(i, 0) == _REG_NUM(i, 3)) + { + emit_subs_ptr32_regptrTO_immFROM(g_out, reg_pos_thumb(0, R1, ADDRESS), imm3); + } + else + { + reg_pos_thumb(3, tmp, VALUE); + emit_subsimm(g_out, tmp, imm3, tmp); + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_thumb(0, R1, ADDRESS), tmp); + } + + SET_NZCV(1); + + return 1; +} +static int OP_SUB_IMM8(const u32 i) +{ + emit_subs_ptr32_regptrTO_immFROM(g_out, reg_pos_thumb(8, R1, ADDRESS), (i & 0xFF)); + SET_NZCV(1); + + return 1; +} +static int OP_SUB_REG(const u32 i) +{ + if (_REG_NUM(i, 0) == _REG_NUM(i, 3)) + { + reg_pos_thumb(6, tmp, VALUE); + reg_pos_thumb(0, tmp2, VALUE); + emit_subs(g_out, tmp2, tmp, tmp2); + emit_write_ptr32_regptrTO_regFROM(g_out, CACHED_PTR(reg_pos_thumb(0, R1, ADDRESS)), tmp2); + } + else + { + reg_pos_thumb(3, tmp, VALUE); + reg_pos_thumb(6, tmp2, VALUE); + emit_subs(g_out, tmp, tmp2, tmp); + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_thumb(0, R1, ADDRESS), tmp); + } + + SET_NZCV(1); + + return 1; +} + +//----------------------------------------------------------------------------- +// ADC +//----------------------------------------------------------------------------- +static int OP_ADC_REG(const u32 i) +{ + reg_pos_thumb(3, tmp, VALUE); + GET_CARRY(0); + reg_pos_thumb(0, tmp2, VALUE); + emit_adcs(g_out, tmp2, tmp, tmp2); + emit_write_ptr32_regptrTO_regFROM(g_out, CACHED_PTR(reg_pos_thumb(0, R1, ADDRESS)), tmp2); + SET_NZCV(0); + + return 1; +} + +//----------------------------------------------------------------------------- +// SBC +//----------------------------------------------------------------------------- +static int OP_SBC_REG(const u32 i) +{ + reg_pos_thumb(3, tmp, VALUE); + GET_CARRY(1); + reg_pos_thumb(0, tmp2, VALUE); + emit_sbcs(g_out, tmp2, tmp, tmp2); + emit_write_ptr32_regptrTO_regFROM(g_out, CACHED_PTR(reg_pos_thumb(0, R1, ADDRESS)), tmp2); + SET_NZCV(1); + + return 1; +} + +//----------------------------------------------------------------------------- +// MOV / MVN +//----------------------------------------------------------------------------- +static int OP_MOV_IMM8(const u32 i) +{ + emit_movimms(g_out, i & 0xFF, tmp); + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_thumb(8, R1, ADDRESS), tmp); + SET_NZ(0); + + return 1; +} + +static int OP_MOV_SPE(const u32 i) +{ + u32 Rd = _REG_NUM(i, 0) | ((i>>4)&8); + reg_pos_ptr(3, tmp, VALUE); + emit_write_ptr32_regptrTO_regFROM(g_out, reg_ptr(Rd, R1, ADDRESS), tmp); + if(Rd == 15) + { + emit_write_ptr32_regptrTO_regFROM(g_out, cpu_ptr(next_instruction, R1, ADDRESS), tmp); + bb_constant_cycles += 2; + } + + return 1; +} + +static int OP_MVN(const u32 i) +{ + reg_pos_thumb(3, tmp, VALUE); + emit_mvns(g_out, tmp, tmp2); + emit_write_ptr32_regptrTO_regFROM(g_out, reg_pos_thumb(0, R1, ADDRESS), tmp2); + SET_NZ(0); + + return 1; +} + +//----------------------------------------------------------------------------- +// MUL +//----------------------------------------------------------------------------- +static int OP_MUL_REG(const u32 i) +{ + reg_pos_thumb(3, tmp, VALUE); + reg_pos_thumb(0, lhs, VALUE); + + emit_muls(g_out, lhs, tmp); + emit_write_ptr32_regptrTO_regFROM(g_out, CACHED_PTR(reg_pos_thumb(0, R1, ADDRESS)), lhs); + SET_NZ(0); + if (PROCNUM == ARMCPU_ARM7) + emit_write_ptr32(g_out, (uintptr_t)&bb_cycles, (uintptr_t)4); + else + MUL_Mxx_END(lhs, 0, 1); + + return 1; +} + +//----------------------------------------------------------------------------- +// CMP / CMN +//----------------------------------------------------------------------------- +static int OP_CMP_IMM8(const u32 i) +{ + reg_pos_thumb(8, tmp, VALUE); + emit_cmpimm(g_out, tmp, (i & 0xFF)); + SET_NZCV(1); + + return 1; +} + +static int OP_CMP(const u32 i) +{ + reg_pos_thumb(3, tmp2, VALUE); + reg_pos_thumb(0, tmp, VALUE); + emit_cmp(g_out, tmp, tmp2); + SET_NZCV(1); + + return 1; +} + +static int OP_CMP_SPE(const u32 i) +{ + u32 Rn = (i&7) | ((i>>4)&8); + reg_pos_ptr(3, tmp, VALUE); + reg_ptr(Rn, tmp2, VALUE); + emit_cmp(g_out, tmp, tmp2); + SET_NZCV(1); + + return 1; +} + +static int OP_CMN(const u32 i) +{ + reg_pos_thumb(0, tmp, VALUE); + reg_pos_thumb(3, tmp2, VALUE); + emit_adds(g_out, tmp, tmp2, tmp); // MAY NEED TO SET FLAGS HERE + SET_NZCV(0); + + return 1; +} + +//----------------------------------------------------------------------------- +// TST +//----------------------------------------------------------------------------- +static int OP_TST(const u32 i) +{ + reg_pos_thumb(3, tmp, VALUE); + reg_pos_thumb(0, tmp2, VALUE); + emit_test(g_out, tmp, tmp2); + SET_NZ(0); + + return 1; +} + +//----------------------------------------------------------------------------- +// STR / LDR / STRB / LDRB +//----------------------------------------------------------------------------- +#define STR_THUMB(mem_op, offset) \ + u32 adr_first = cpu->R[_REG_NUM(i, 3)];\ + reg_pos_thumb(3, R0, VALUE);\ + if ((offset) != -1) \ + { \ + if ((offset) != 0) \ + { \ + emit_addimm(g_out, R0, (uintptr_t)(offset), R0);\ + adr_first += (u32)(offset); \ + } \ + } \ + else \ + { \ + reg_pos_thumb(6, R1, VALUE); \ + emit_add(g_out, R0, R1, R0);\ + adr_first += cpu->R[_REG_NUM(i, 6)]; \ + } \ + reg_pos_thumb(0, R1, VALUE); \ + emit_ptr(g_out, (uintptr_t)mem_op##_tab[PROCNUM][classify_adr(adr_first,1)], R3); \ + callR3(g_out); \ + emit_write_ptr32_reg(g_out, (uintptr_t)&bb_cycles, R0);\ + return 1; + +#define LDR_THUMB(mem_op, offset) \ + u32 adr_first = cpu->R[_REG_NUM(i, 3)]; \ + reg_pos_thumb(3, R0, VALUE);\ + if ((offset) != -1) \ + { \ + if ((offset) != 0) \ + { \ + emit_addimm(g_out, R0, (uintptr_t)(offset), R0);\ + adr_first += (u32)(offset); \ + } \ + } \ + else \ + { \ + reg_pos_thumb(6, R1, VALUE); \ + emit_add(g_out, R0, R1, R0);\ + adr_first += cpu->R[_REG_NUM(i, 6)]; \ + } \ + reg_pos_thumb(0, R1, ADDRESS); \ + emit_ptr(g_out, (uintptr_t)mem_op##_tab[PROCNUM][classify_adr(adr_first,0)], R3); \ + callR3(g_out); \ + emit_write_ptr32_reg(g_out, (uintptr_t)&bb_cycles, R0);\ + return 1; + +static int OP_STRB_IMM_OFF(const u32 i) { STR_THUMB(STRB, ((i>>6)&0x1F)); } +static int OP_LDRB_IMM_OFF(const u32 i) { LDR_THUMB(LDRB, ((i>>6)&0x1F)); } +static int OP_STRB_REG_OFF(const u32 i) { STR_THUMB(STRB, -1); } +static int OP_LDRB_REG_OFF(const u32 i) { LDR_THUMB(LDRB, -1); } +static int OP_LDRSB_REG_OFF(const u32 i) { LDR_THUMB(LDRSB, -1); } + +static int OP_STRH_IMM_OFF(const u32 i) { STR_THUMB(STRH, ((i>>5)&0x3E)); } +static int OP_LDRH_IMM_OFF(const u32 i) { LDR_THUMB(LDRH, ((i>>5)&0x3E)); } +static int OP_STRH_REG_OFF(const u32 i) { STR_THUMB(STRH, -1); } +static int OP_LDRH_REG_OFF(const u32 i) { LDR_THUMB(LDRH, -1); } +static int OP_LDRSH_REG_OFF(const u32 i) { LDR_THUMB(LDRSH, -1); } + +static int OP_STR_IMM_OFF(const u32 i) { STR_THUMB(STR, ((i>>4)&0x7C)); } +static int OP_LDR_IMM_OFF(const u32 i) { LDR_THUMB(LDR, ((i>>4)&0x7C)); } // FIXME: tempValue = (tempValue>>adr) | (tempValue<<(32-adr)); +static int OP_STR_REG_OFF(const u32 i) { STR_THUMB(STR, -1); } +static int OP_LDR_REG_OFF(const u32 i) { LDR_THUMB(LDR, -1); } + +static int OP_STR_SPREL(const u32 i) +{ + #define addr R0 + #define data R1 + + u32 imm = ((i&0xFF)<<2); + u32 adr_first = cpu->R[13] + imm; + + reg_ptr(13, addr, VALUE); + if (imm) emit_addimm(g_out, addr, imm, addr); + reg_pos_thumb(8, data, VALUE); + + // Add function + emit_ptr(g_out, (uintptr_t)STR_tab[PROCNUM][classify_adr(adr_first,1)], R3); + + // Call function + callR3(g_out); + emit_write_ptr32_reg(g_out, (uintptr_t)&bb_cycles, R0); + + #undef addr + #undef data + + return 1; +} + +static int OP_LDR_SPREL(const u32 i) +{ + #define addr R0 + #define data R1 + + u32 imm = ((i&0xFF)<<2); + u32 adr_first = cpu->R[13] + imm; + + reg_ptr(13, addr, VALUE); + if (imm) emit_addimm(g_out, addr, imm, addr); + reg_pos_thumb(8, data, ADDRESS); + + // Add function + emit_ptr(g_out, (uintptr_t)LDR_tab[PROCNUM][classify_adr(adr_first,0)], R3); + + // Call function + callR3(g_out); + + emit_write_ptr32_reg(g_out, (uintptr_t)&bb_cycles, R0); + + #undef addr + #undef data + + return 1; +} + +static int OP_LDR_PCREL(const u32 i) +{ + #define addr R0 + #define data R1 + + u32 imm = ((i&0xFF)<<2); + u32 adr_first = (bb_r15 & 0xFFFFFFFC) + imm; + + emit_movimm(g_out, adr_first, addr); + reg_pos_thumb(8, data, ADDRESS); + + // Add function + emit_ptr(g_out, (uintptr_t)LDR_tab[PROCNUM][classify_adr(adr_first,0)], R3); + + // Call function + callR3(g_out); + + emit_write_ptr32_reg(g_out, (uintptr_t)&bb_cycles, R0); + + #undef addr + #undef data + + + return 1; +} + +//----------------------------------------------------------------------------- +// STMIA / LDMIA +//----------------------------------------------------------------------------- +static int op_ldm_stm_thumb(u32 i, bool store) +{ + u32 bitmask = i & 0xFF; + u32 pop = popregcount(bitmask); + + #define adr R4 + + reg_pos_thumb(8, adr, VALUE); + + call_ldm_stm(adr, bitmask, store, 1); + + // ARM_REF: THUMB: Causes base register write-back, and is not optional + // ARM_REF: If the base register is specified in , the final value of is the loaded value + // (not the written-back value). + if (store) emit_add_ptr32_regptrTO_immFROM(g_out, reg_pos_thumb(8, R1, ADDRESS), 4*pop); //c.add(reg_pos_thumb(8), 4*pop); + + else + { + if (!BIT_N(i, _REG_NUM(i, 8))) + emit_add_ptr32_regptrTO_immFROM(g_out, reg_pos_thumb(8, R1, ADDRESS), 4*pop); //c.add(reg_pos_thumb(8), 4*pop); + } + + emit_MMU_aluMemCycles(store ? 2 : 3, &bb_cycles, pop); + + #undef adr + + return 1; +} + +static int OP_LDMIA_THUMB(const u32 i) { return op_ldm_stm_thumb(i, 0); } +static int OP_STMIA_THUMB(const u32 i) { return op_ldm_stm_thumb(i, 1); } + +//----------------------------------------------------------------------------- +// Adjust SP +//----------------------------------------------------------------------------- +static int OP_ADJUST_P_SP(const u32 i) { emit_add_ptr32_regptrTO_immFROM(g_out, reg_ptr(13, R1, ADDRESS), ((i&0x7F)<<2)); /*c.add(reg_ptr(13), ((i&0x7F)<<2));*/ return 1; } +static int OP_ADJUST_M_SP(const u32 i) { emit_sub_ptr32_regptrTO_immFROM(g_out, reg_ptr(13, R1, ADDRESS), ((i&0x7F)<<2)); /*c.sub(reg_ptr(13), ((i&0x7F)<<2));*/ return 1; } + +//----------------------------------------------------------------------------- +// PUSH / POP +//----------------------------------------------------------------------------- +static int op_push_pop(u32 i, bool store, bool pc_lr) +{ + #define adr R4 + + u32 bitmask = (i & 0xFF); + bitmask |= pc_lr << (store ? 14 : 15); + u32 pop = popregcount(bitmask); + int dir = store ? -1 : 1; + + reg_ptr(13, adr, VALUE); + + if(store) emit_subimm(g_out, adr, 4, adr); + + call_ldm_stm(adr, bitmask, store, dir); + + if(pc_lr && !store) + op_bx_thumb(reg_ptr(15, R1, VALUE), 0, PROCNUM == ARMCPU_ARM9); + emit_add_ptr32_regptrTO_immFROM(g_out, reg_ptr(13, R1, ADDRESS), 4*dir*pop); + + emit_MMU_aluMemCycles(store ? (pc_lr?4:3) : (pc_lr?5:2), &bb_cycles, pop); + + #undef adr + + return 1; +} + +static int OP_PUSH(const u32 i) { return op_push_pop(i, 1, 0); } +static int OP_PUSH_LR(const u32 i) { return op_push_pop(i, 1, 1); } +static int OP_POP(const u32 i) { return op_push_pop(i, 0, 0); } +static int OP_POP_PC(const u32 i) { return op_push_pop(i, 0, 1); } + +//----------------------------------------------------------------------------- +// Branch +//----------------------------------------------------------------------------- +static int OP_B_COND(const u32 i) +{ + unsigned int skip=genlabel(); + u32 dst = bb_r15 + ((u32)((s8)(i&0xFF))<<1); + + emit_write_ptr32_regptrTO_immFROM(g_out, cpu_ptr(instruct_adr, PTR_STORE_REG, ADDRESS), bb_next_instruction); + + emit_mov64(g_out, PTR_STORE_REG, 12); + emit_branch((i>>8)&0xF, skip); + emit_write_ptr32_regptrTO_immFROM(g_out,12/*cpu_ptr(instruct_adr, R1, ADDRESS)*/, dst); + + emit_inc_ptr_imm(g_out, (uintptr_t)&bb_total_cycles, 2); + emit_label(g_out, (int)skip); + + return 1; +} + +static int OP_B_UNCOND(const u32 i) +{ + u32 dst = bb_r15 + (SIGNEXTEND_11(i)<<1); + emit_write_ptr32_regptrTO_immFROM(g_out,cpu_ptr(instruct_adr, R1, ADDRESS), dst); + + return 1; +} + +static int OP_BLX(const u32 i) +{ + #define dst R4 + + reg_ptr(14, dst, VALUE); + emit_addimm(g_out, dst, (i&0x7FF) << 1, dst); + emit_andimm(g_out, dst, 0xFFFFFFFC, dst); + emit_write_ptr32_regptrTO_regFROM(g_out, cpu_ptr(instruct_adr, R1, ADDRESS), dst); + emit_write_ptr32_regptrTO_immFROM(g_out,reg_ptr(14, R1, ADDRESS), bb_next_instruction | 1); + cpu_ptr_byte(CPSR, 0, tmp, VALUE); + emit_andimm(g_out, tmp, ~(1<<5), tmp); + emit_writeBYTE_ptr32_regptrTO_regFROM(g_out, cpu_ptr_byte(CPSR, 0, R1, ADDRESS), tmp); + + #undef dst + + return 1; +} + +static int OP_BL_10(const u32 i) +{ + u32 dst = bb_r15 + (SIGNEXTEND_11(i)<<12); + emit_write_ptr32_regptrTO_immFROM(g_out,reg_ptr(14, R1, ADDRESS), dst); + return 1; +} + +static int OP_BL_11(const u32 i) +{ + #define dst R4 + + reg_ptr(14, dst, VALUE); + emit_addimm(g_out, dst, (i&0x7FF) << 1, dst); + emit_write_ptr32_regptrTO_regFROM(g_out, cpu_ptr(instruct_adr, R1, ADDRESS), dst); + emit_write_ptr32_regptrTO_immFROM(g_out, reg_ptr(14, R1, ADDRESS), bb_next_instruction | 1); + + #undef dst + + return 1; +} + +static int op_bx_thumb(/*Mem*/int srcreg, bool blx, bool test_thumb) +{ + #define thumb tmp2_reg + #define dst tmp_reg + #define mask R8 + + emit_mov(g_out, srcreg, dst); + emit_mov(g_out, dst, thumb); + emit_andimm(g_out, thumb, 1, thumb); + + if (blx) emit_write_ptr32_regptrTO_immFROM(g_out, reg_ptr(14, R1, ADDRESS), bb_next_instruction | 1); + if(test_thumb) + { + emit_mov(g_out, thumb, R9); + emit_lea_ptr_reg(g_out, 0xFFFFFFFC, R9, 2, mask); + emit_and(g_out, dst, mask, dst); + } + else + { + emit_andimm(g_out, dst, 0xFFFFFFFE, dst); + } + + emit_write_ptr32_regptrTO_regFROM(g_out, cpu_ptr(instruct_adr, R1, ADDRESS), dst); + + cpu_ptr_byte(CPSR, 0, tmp, VALUE); + emit_andimm(g_out, tmp, ~(1<< 5), tmp); + emit_lsl(g_out, thumb, 5); + emit_or(g_out, tmp, thumb, tmp); + emit_writeBYTE_ptr32_regptrTO_regFROM(g_out, cpu_ptr_byte(CPSR, 0, R1, ADDRESS), tmp); + + #undef dst + #undef thumb + #undef mask + + return 1; +} + +static int op_bx_thumbR15() +{ + const u32 r15 = (bb_r15 & 0xFFFFFFFC); + emit_write_ptr32_regptrTO_immFROM(g_out, cpu_ptr(instruct_adr, R1, ADDRESS), r15); + emit_write_ptr32_regptrTO_immFROM(g_out, reg_ptr(15, R1, ADDRESS), r15); + cpu_ptr(CPSR, R7, VALUE); + emit_movimm(g_out, (u32)~(1<< 5), R8); + emit_and(g_out, R7,R8,R7); + emit_write_ptr32_regptrTO_regFROM(g_out, cpu_ptr(CPSR, R1, ADDRESS), R7); + + return 1; +} + +static int OP_BX_THUMB(const u32 i) { if (REG_POS(i, 3) == 15) return op_bx_thumbR15(); return op_bx_thumb(reg_pos_ptr(3, R1, VALUE), 0, 0); } +static int OP_BLX_THUMB(const u32 i) { return op_bx_thumb(reg_pos_ptr(3, R1, VALUE), 1, 1); } + +static int OP_SWI_THUMB(const u32 i) { return op_swi(i & 0x1F); } +//----------------------------------------------------------------------------- +// Unimplemented; fall back to the C versions +//----------------------------------------------------------------------------- + +#define OP_UND NULL +#define OP_LDREX NULL +#define OP_STREX NULL +#define OP_LDC_P_IMM_OFF NULL +#define OP_LDC_M_IMM_OFF NULL +#define OP_LDC_P_PREIND NULL +#define OP_LDC_M_PREIND NULL +#define OP_LDC_P_POSTIND NULL +#define OP_LDC_M_POSTIND NULL +#define OP_LDC_OPTION NULL +#define OP_STC_P_IMM_OFF NULL +#define OP_STC_M_IMM_OFF NULL +#define OP_STC_P_PREIND NULL +#define OP_STC_M_PREIND NULL +#define OP_STC_P_POSTIND NULL +#define OP_STC_M_POSTIND NULL +#define OP_STC_OPTION NULL +#define OP_CDP NULL + +#define OP_UND_THUMB NULL +#define OP_BKPT_THUMB NULL + +//----------------------------------------------------------------------------- +// Dispatch table +//----------------------------------------------------------------------------- + +typedef int (*ArmOpCompiler)(u32); +static const ArmOpCompiler arm_instruction_compilers[4096] = { +#define TABDECL(x) x +#include "instruction_tabdef.inc" +#undef TABDECL +}; + +static const ArmOpCompiler thumb_instruction_compilers[1024] = { +#define TABDECL(x) x +#include "thumb_tabdef.inc" +#undef TABDECL +}; + +//#if pth // pth + +//----------------------------------------------------------------------------- +// Compiler +//----------------------------------------------------------------------------- + +static u32 instr_attributes(u32 opcode) +{ + return bb_thumb ? thumb_attributes[opcode>>6] + : instruction_attributes[INSTRUCTION_INDEX(opcode)]; +} + +static bool instr_is_branch(u32 opcode) +{ + u32 xt = instr_attributes(opcode); + + if(bb_thumb) + { + // merge OP_BL_10+OP_BL_11 + if (xt & MERGE_NEXT) return false; + return (xt & BRANCH_ALWAYS) + || ((xt & BRANCH_POS0) && ((opcode&7) | ((opcode>>4)&8)) == 15) + || (xt & BRANCH_SWI) + || (xt & JIT_BYPASS); + } + else + return (xt & BRANCH_ALWAYS) + || ((xt & BRANCH_POS12) && REG_POS(opcode,12) == 15) + || ((xt & BRANCH_LDM) && BIT15(opcode)) + || (xt & BRANCH_SWI) + || (xt & JIT_BYPASS); +} + +static bool instr_uses_r15(u32 opcode) +{ + u32 xt = instr_attributes(opcode); + if(bb_thumb) + return ((xt & SRCREG_POS0) && ((opcode&7) | ((opcode>>4)&8)) == 15) + || ((xt & SRCREG_POS3) && REG_POS(opcode,3) == 15) + || (xt & JIT_BYPASS); + else + return ((xt & SRCREG_POS0) && REG_POS(opcode,0) == 15) + || ((xt & SRCREG_POS8) && REG_POS(opcode,8) == 15) + || ((xt & SRCREG_POS12) && REG_POS(opcode,12) == 15) + || ((xt & SRCREG_POS16) && REG_POS(opcode,16) == 15) + || ((xt & SRCREG_STM) && BIT15(opcode)) + || (xt & JIT_BYPASS); +} + +static bool instr_is_conditional(u32 opcode) +{ + if(bb_thumb) return false; + + return !(CONDITION(opcode) == 0xE + || (CONDITION(opcode) == 0xF && CODE(opcode) == 5)); +} + +static int instr_cycles(u32 opcode) +{ + u32 xt = instr_attributes(opcode); + u32 ct = (xt & INSTR_CYCLES_MASK); + if(ct == INSTR_CYCLES_VARIABLE) + { + if ((xt & BRANCH_SWI) && !cpu->swi_tab) + return 3; + + return 0; + } + if(instr_is_branch(opcode) && !(instr_attributes(opcode) & (BRANCH_ALWAYS|BRANCH_LDM))) + ct += 2; + return ct; +} + +static bool instr_does_prefetch(u32 opcode) +{ + u32 xt = instr_attributes(opcode); + if(bb_thumb) + return thumb_instruction_compilers[opcode>>6] + && (xt & BRANCH_ALWAYS); + else + return instr_is_branch(opcode) && arm_instruction_compilers[INSTRUCTION_INDEX(opcode)] + && ((xt & BRANCH_ALWAYS) || (xt & BRANCH_LDM)); +} + +static const char *blah = ""; +static const char *disassemble(u32 opcode) +{ + return blah; + static char str[600]; + if(bb_thumb) { + int i = opcode>>6; + if (i >= 0 && i <= 4095) { strcpy(str, thumb_instruction_names[opcode>>6]); return str; } + else { strcpy(str, "bb Invalid"); return str; } + } + + int idx = INSTRUCTION_INDEX(opcode); + if (idx >= 0 && idx <= 4095) strcpy(str, arm_instruction_names[INSTRUCTION_INDEX(opcode)]); + else { strcpy(str, "Invalid"); return str; } + static const char *conds[16] = {"EQ","NE","CS","CC","MI","PL","VS","VC","HI","LS","GE","LT","GT","LE","AL","NV"}; + if(instr_is_conditional(opcode)) + { + strcat(str, "."); + if (CONDITION(opcode) >= 0 && CONDITION(opcode) <= 15) strcat(str, conds[CONDITION(opcode)]); + } + return str; +} + +static void sync_r15(u32 opcode, bool is_last, bool force) +{ + if(instr_does_prefetch(opcode)) + { + if(force) + { + emit_write_ptr32_regptrTO_immFROM(g_out, cpu_ptr(instruct_adr, R1, ADDRESS), bb_next_instruction); + } + } + else + { + if(force || (instr_attributes(opcode) & JIT_BYPASS) || (instr_attributes(opcode) & BRANCH_SWI) || (is_last && !instr_is_branch(opcode))) + { + emit_write_ptr32_regptrTO_immFROM(g_out, cpu_ptr(next_instruction, R1, ADDRESS), bb_next_instruction); + } + if(instr_uses_r15(opcode)) + { + emit_write_ptr32_regptrTO_immFROM(g_out, reg_ptr(15, R1, ADDRESS), bb_r15); + } + if(instr_attributes(opcode) & JIT_BYPASS) + { + emit_write_ptr32_regptrTO_immFROM(g_out, cpu_ptr(instruct_adr, R1, ADDRESS), bb_adr); + } + } +} + +static void emit_branch(int cond, int /*Label*/ to) +{ + static const u8 cond_bit[] = {0x40, 0x40, 0x20, 0x20, 0x80, 0x80, 0x10, 0x10}; + if(cond < 8) + { + emit_testimm(g_out, flags_ptr(R1, VALUE), cond_bit[cond]); + (cond & 1)?emit_branch_label(g_out, to, 0x1):emit_branch_label(g_out, to, 0x0); + } + else + { + emit_movimm(g_out, 0, x); + flags_ptr(x, VALUE); + + emit_andimm(g_out, x, 0xF0, x); + + emit_ptr(g_out, (uintptr_t)(arm_cond_table + cond), tmp); + emit_add64(g_out, tmp, x, tmp); + + emit_movimm(g_out, 0, x); + + emit_ldrb(g_out, tmp, x); + + emit_testimm(g_out, x, 1); + emit_branch_label(g_out, to, 0x0); + + } +} + +static void emit_armop_call(u32 opcode) +{ + ArmOpCompiler fc = bb_thumb? thumb_instruction_compilers[opcode>>6]: + arm_instruction_compilers[INSTRUCTION_INDEX(opcode)]; + + if (fc && fc(opcode)) { + return; + } + + OpFunc f = bb_thumb ? thumb_instructions_set[PROCNUM][opcode>>6] + : arm_instructions_set[PROCNUM][INSTRUCTION_INDEX(opcode)]; + + // Setup args + emit_movimm(g_out, opcode, R0); + // Add function + emit_ptr(g_out, (uintptr_t)f, R3); + + // Call function + callR3(g_out); + + emit_write_ptr32_reg(g_out, (uintptr_t)&bb_cycles, R0); +} + +static void _armlog(u8 proc, u32 addr, u32 opcode) +{ +#if 0 +#if 0 + fprintf(stderr, "\t\t;R0:%08X R1:%08X R2:%08X R3:%08X R4:%08X R5:%08X R6:%08X R7:%08X R8:%08X R9:%08X\n\t\t;R10:%08X R11:%08X R12:%08X R13:%08X R14:%08X R15:%08X| next %08X, N:%i Z:%i C:%i V:%i\n", + cpu->R[0], cpu->R[1], cpu->R[2], cpu->R[3], cpu->R[4], cpu->R[5], cpu->R[6], cpu->R[7], + cpu->R[8], cpu->R[9], cpu->R[10], cpu->R[11], cpu->R[12], cpu->R[13], cpu->R[14], cpu->R[15], + cpu->next_instruction, cpu->CPSR.bits.N, cpu->CPSR.bits.Z, cpu->CPSR.bits.C, cpu->CPSR.bits.V); +#endif + #define INDEX22(i) ((((i)>>16)&0xFF0)|(((i)>>4)&0xF)) + char dasmbuf[4096]; + if(cpu->CPSR.bits.T) + des_thumb_instructions_set[((opcode)>>6)&1023](addr, opcode, dasmbuf); + else + des_arm_instructions_set[INDEX22(opcode)](addr, opcode, dasmbuf); + #undef INDEX22 + fprintf(stderr, "%s%c %08X\t%08X \t%s\n", cpu->CPSR.bits.T?"THUMB":"ARM", proc?'7':'9', addr, opcode, dasmbuf); +#else + return; +#endif +} + +ArmOpCompiled gf = NULL; + +template +static u32 compile_basicblock() +{ +#if LOG_JIT + bool has_variable_cycles = FALSE; +#endif + + u32 interpreted_cycles = 0; + u32 start_adr = cpu->instruct_adr; + u32 opcode = 0; + + bb_thumb = cpu->CPSR.bits.T; + bb_opcodesize = bb_thumb ? 2 : 4; + + if (!JIT_MAPPED(start_adr & 0x0FFFFFFF, PROCNUM)) + { + printf("JIT: use unmapped memory address %08X\n", start_adr); + return 1; + } + + jit_write(); + + if (g_out) { + releaseBytes(g_out); + } + g_out = allocBytes(); + + emit_startfunc(g_out); + + JIT_COMMENT("CPU ptr"); + + emit_write_ptr(g_out, (uintptr_t)&bb_cpu, (uintptr_t)&ARMPROC); + + JIT_COMMENT("reset bb_total_cycles"); + + emit_write_ptr32(g_out, (uintptr_t)&bb_total_cycles, (uintptr_t)0); + +#if (PROFILER_JIT_LEVEL > 0) + JIT_COMMENT("Profiler ptr"); + emit_write_ptr(g_out, (uintptr_t)&bb_profiler, (uintptr_t)&profiler_counter[PROCNUM]); +#endif + + bb_constant_cycles = 0; + for(u32 i=0, bEndBlock = 0; bEndBlock == 0; i++) + { + bb_adr = start_adr + (i * bb_opcodesize); + if(bb_thumb) + opcode = _MMU_read16(bb_adr); + else + opcode = _MMU_read32(bb_adr); +#if LOG_JIT + char dasmbuf[1024] = {0}; + if(bb_thumb) + des_thumb_instructions_set[opcode>>6](bb_adr, opcode, dasmbuf); + else + des_arm_instructions_set[INSTRUCTION_INDEX(opcode)](bb_adr, opcode, dasmbuf); + fprintf(stderr, "%08X\t%s\t\t; %s \n", bb_adr, dasmbuf, disassemble(opcode)); +#endif + + u32 cycles = instr_cycles(opcode); + + bEndBlock = instr_is_branch(opcode) || (i >= (CommonSettings.jit_max_block_size - 1)); + +#if LOG_JIT + if (instr_is_conditional(opcode) && (cycles > 1) || (cycles == 0)) + has_variable_cycles = TRUE; +#endif + bb_constant_cycles += instr_is_conditional(opcode) ? 1 : cycles; + +#if (PROFILER_JIT_LEVEL > 0) + JIT_COMMENT("*** profiler - counter"); +#endif + if (instr_is_conditional(opcode)) + { + // 25% of conditional instructions are immediately followed by + // another with the same condition, but merging them into a + // single branch has negligible effect on speed. + if(bEndBlock) sync_r15(opcode, 1, 1); + int skip; + skip=genlabel(); + + emit_branch(CONDITION(opcode), skip); + + if(!bEndBlock) { + sync_r15(opcode, 0, 0); + } + + emit_armop_call(opcode); + + if(cycles == 0) + { + JIT_COMMENT("variable cycles"); + emit_inc_ptr(g_out, (uintptr_t)&bb_total_cycles, (uintptr_t)&bb_cycles); + emit_inc_ptr_imm(g_out, (uintptr_t)&bb_total_cycles, -1); + } + else + if (cycles > 1) + { + JIT_COMMENT("cycles (%d)", cycles); + emit_inc_ptr_imm(g_out, (uintptr_t)&bb_total_cycles, -1); + } + emit_label(g_out, (int)skip); + } + else + { + sync_r15(opcode, bEndBlock, 0); + emit_armop_call(opcode); + + if(cycles == 0) + { + JIT_COMMENT("variable cycles"); + emit_inc_ptr(g_out, (uintptr_t)&bb_total_cycles, (uintptr_t)&bb_cycles); + } + } + + interpreted_cycles += op_decode[PROCNUM][bb_thumb](); + } + + if(!instr_does_prefetch(opcode)) + { + emit_var_copy32_reg(g_out, cpu_ptr(next_instruction, R1, VALUE), cpu_ptr(instruct_adr, R2, ADDRESS)); + } + + JIT_COMMENT("total cycles (block)"); + + if (bb_constant_cycles > 0) + emit_inc_ptr_imm(g_out, (uintptr_t)&bb_total_cycles, bb_constant_cycles); + +#if LOG_JIT + fprintf(stderr, "cycles %d%s\n", bb_constant_cycles, has_variable_cycles ? " + variable" : ""); +#endif + + emit_read_ptr32_reg(g_out, (uintptr_t)&bb_total_cycles, R0); + + emit_endfunc(g_out); + + ArmOpCompiled f; + + + f = createFunc(g_out); + g_out = NULL; + + jit_exec(); + + if(! f) + { + f = op_decode[PROCNUM][bb_thumb]; + } + +#if LOG_JIT + uintptr_t baddr = (uintptr_t)f; + fprintf(stderr, "Block address %08lX\n\n", baddr); + fflush(stderr); +#endif + + JIT_COMPILED_FUNC(start_adr, PROCNUM) = (uintptr_t)f; + + return interpreted_cycles; +} + +template u32 arm_jit_compile() +{ + *PROCNUM_ptr = PROCNUM; + // prevent endless recompilation of self-modifying code, which would be a memleak since we only free code all at once. + // also allows us to clear compiled_funcs[] while leaving it sparsely allocated, if the OS does memory overcommit. + u32 adr = cpu->instruct_adr; + u32 mask_adr = (adr & 0x07FFFFFE) >> 4; + if(((recompile_counts[mask_adr >> 1] >> 4*(mask_adr & 1)) & 0xF) > 8) + { + ArmOpCompiled f = op_decode[PROCNUM][cpu->CPSR.bits.T]; + JIT_COMPILED_FUNC(adr, PROCNUM) = (uintptr_t)f; + return f(); + } + recompile_counts[mask_adr >> 1] += 1 << 4*(mask_adr & 1); + + return compile_basicblock(); +} + +template u32 arm_jit_compile<0>(); +template u32 arm_jit_compile<1>(); + +void arm_jit_reset(bool enable, bool suppress_msg) +{ +#if LOG_JIT + freopen("desmume_jit.log", "w", stderr); +#endif + + if (!suppress_msg) + printf("CPU mode: %s\n", enable?"JIT":"Interpreter"); + saveBlockSizeJIT = CommonSettings.jit_max_block_size; + + if (enable) + { + printf("JIT: max block size %d instruction(s)\n", CommonSettings.jit_max_block_size); + +#ifdef MAPPED_JIT_FUNCS + + //these pointers are allocated by asmjit and need freeing + #define JITFREE(x) for(int iii=0;iiifree((void*)x[iii]); memset(x,0,sizeof(x)); +/* JITFREE(JIT.MAIN_MEM); + JITFREE(JIT.SWIRAM); + JITFREE(JIT.ARM9_ITCM); + JITFREE(JIT.ARM9_LCDC); + JITFREE(JIT.ARM9_BIOS); + JITFREE(JIT.ARM7_BIOS); + JITFREE(JIT.ARM7_ERAM); + JITFREE(JIT.ARM7_WIRAM); + JITFREE(JIT.ARM7_WRAM);*/ + #undef JITFREE + + memset(recompile_counts, 0, sizeof(recompile_counts)); + init_jit_mem(); +#else + for(int i=0; i 0) + reconstruct(&profiler_counter[0]); + reconstruct(&profiler_counter[1]); +#if (PROFILER_JIT_LEVEL > 1) + for (u8 t = 0; t < 2; t++) + { + for (u32 i = 0; i < (1<<26); i++) + memset(&profiler_entry[t][i], 0, sizeof(PROFILER_ENTRY)); + } +#endif +#endif +} + +#if (PROFILER_JIT_LEVEL > 0) +static int pcmp(PROFILER_COUNTER_INFO *info1, PROFILER_COUNTER_INFO *info2) +{ + return (uintptr_t)(info2->count - info1->count); +} + +#if (PROFILER_JIT_LEVEL > 1) +static int pcmp_entry(PROFILER_ENTRY *info1, PROFILER_ENTRY *info2) +{ + return (uintptr_t)(info1->cycles - info2->cycles); +} +#endif +#endif + +void arm_jit_close() +{ +#if (PROFILER_JIT_LEVEL > 0) + printf("Generating profile report..."); + + for (u8 proc = 0; proc < 2; proc++) + { + extern GameInfo gameInfo; + u16 last[2] = {0}; + PROFILER_COUNTER_INFO *arm_info = NULL; + PROFILER_COUNTER_INFO *thumb_info = NULL; + + arm_info = new PROFILER_COUNTER_INFO[4096]; + thumb_info = new PROFILER_COUNTER_INFO[1024]; + memset(arm_info, 0, sizeof(PROFILER_COUNTER_INFO) * 4096); + memset(thumb_info, 0, sizeof(PROFILER_COUNTER_INFO) * 1024); + + // ARM + last[0] = 0; + for (u16 i=0; i < 4096; i++) + { + u16 t = 0; + if (profiler_counter[proc].arm_count[i] == 0) continue; + + for (t = 0; t < last[0]; t++) + { + if (strcmp(arm_instruction_names[i], arm_info[t].name) == 0) + { + arm_info[t].count += profiler_counter[proc].arm_count[i]; + break; + } + } + if (t == last[0]) + { + strcpy(arm_info[last[0]++].name, arm_instruction_names[i]); + arm_info[t].count = profiler_counter[proc].arm_count[i]; + } + } + + // THUMB + last[1] = 0; + for (u16 i=0; i < 1024; i++) + { + u16 t = 0; + if (profiler_counter[proc].thumb_count[i] == 0) continue; + + for (t = 0; t < last[1]; t++) + { + if (strcmp(thumb_instruction_names[i], thumb_info[t].name) == 0) + { + thumb_info[t].count += profiler_counter[proc].thumb_count[i]; + break; + } + } + if (t == last[1]) + { + strcpy(thumb_info[last[1]++].name, thumb_instruction_names[i]); + thumb_info[t].count = profiler_counter[proc].thumb_count[i]; + } + } + + std::qsort(arm_info, last[0], sizeof(PROFILER_COUNTER_INFO), (int (*)(const void *, const void *))pcmp); + std::qsort(thumb_info, last[1], sizeof(PROFILER_COUNTER_INFO), (int (*)(const void *, const void *))pcmp); + + char buf[MAX_PATH] = {0}; + sprintf(buf, "desmume_jit%c_counter.profiler", proc==0?'9':'7'); + FILE *fp = fopen(buf, "w"); + if (fp) + { + if (!gameInfo.isHomebrew()) + { + fprintf(fp, "Name: %s\n", gameInfo.ROMname); + fprintf(fp, "Serial: %s\n", gameInfo.ROMserial); + } + else + fprintf(fp, "Homebrew\n"); + fprintf(fp, "CPU: ARM%c\n\n", proc==0?'9':'7'); + + if (last[0]) + { + fprintf(fp, "========================================== ARM ==========================================\n"); + for (int i=0; i < last[0]; i++) + fprintf(fp, "%30s: %20ld\n", arm_info[i].name, arm_info[i].count); + fprintf(fp, "\n"); + } + + if (last[1]) + { + fprintf(fp, "========================================== THUMB ==========================================\n"); + for (int i=0; i < last[1]; i++) + fprintf(fp, "%30s: %20ld\n", thumb_info[i].name, thumb_info[i].count); + fprintf(fp, "\n"); + } + + fclose(fp); + } + + delete [] arm_info; arm_info = NULL; + delete [] thumb_info; thumb_info = NULL; + +#if (PROFILER_JIT_LEVEL > 1) + sprintf(buf, "desmume_jit%c_entry.profiler", proc==0?'9':'7'); + fp = fopen(buf, "w"); + if (fp) + { + u32 count = 0; + PROFILER_ENTRY *tmp = NULL; + + fprintf(fp, "Entrypoints (cycles):\n"); + tmp = new PROFILER_ENTRY[1<<26]; + memset(tmp, 0, sizeof(PROFILER_ENTRY) * (1<<26)); + for (u32 i = 0; i < (1<<26); i++) + { + if (profiler_entry[proc][i].cycles == 0) continue; + memcpy(&tmp[count++], &profiler_entry[proc][i], sizeof(PROFILER_ENTRY)); + } + std::qsort(tmp, count, sizeof(PROFILER_ENTRY), (int (*)(const void *, const void *))pcmp_entry); + if (!gameInfo.isHomebrew()) + { + fprintf(fp, "Name: %s\n", gameInfo.ROMname); + fprintf(fp, "Serial: %s\n", gameInfo.ROMserial); + } + else + fprintf(fp, "Homebrew\n"); + fprintf(fp, "CPU: ARM%c\n\n", proc==0?'9':'7'); + + while ((count--) > 0) + fprintf(fp, "%08X: %20ld\n", tmp[count].addr, tmp[count].cycles); + + delete [] tmp; tmp = NULL; + + fclose(fp); + } +#endif + } + printf(" done.\n"); +#endif +} +#endif // HAVE_JIT diff --git a/desmume/src/utils/arm_jit/emit_core.cpp b/desmume/src/utils/arm_jit/emit_core.cpp new file mode 100755 index 000000000..2a39fdee1 --- /dev/null +++ b/desmume/src/utils/arm_jit/emit_core.cpp @@ -0,0 +1,208 @@ +/* + Copyright (C) 2022 Patrick Herlihy - https://byte4byte.com + Copyright (C) 2022 DeSmuME team + + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This file is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the this software. If not, see . +*/ + +#define CHUNK_SIZE 6048*4 + +struct s_bytes { + struct s_bytes *next; + u8 data[CHUNK_SIZE]; +}; + +typedef struct jumpmap { + int num_bytes; // bytes in function where jump is + int cond; + u32 *instruct_ptr; +} t_jumpmap; + +typedef struct labelmap { + int num_bytes; // bytes in function where label is + std::vector jumps; +} t_labelmap; + +static std::map g_LABELS; +static int g_NUMJUMPS = 0; +static int g_TOTALBYTES = 0; +u32 *g_PTR = NULL; +t_bytes *g_currBlock = NULL; + +std::map allFuncs; + +static void* alloc_executable_memory(size_t size) { +#ifdef __APPLE__ + void* ptr = mmap (NULL, size, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_JIT, + -1, 0); +#else + void* ptr = mmap(0, size, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +#endif + if (ptr == (void*)-1) { + return NULL; + } + if (! ptr) { + return NULL; + } + + allFuncs.insert(std::pair((JittedFunc)ptr, size)); + + return ptr; +} + +static char *g_curr_jit_block = NULL; +static char *g_curr_jit_ptr = NULL; +static int g_curr_jit_size = 0; + +static void *armjitarm_alloc_func(size_t size) { + if (! g_curr_jit_block || (uintptr_t)((uintptr_t)g_curr_jit_ptr + (uintptr_t)size) > (uintptr_t)((uintptr_t)g_curr_jit_block + (uintptr_t)g_curr_jit_size)) { + if (g_curr_jit_block) { + printf("RESETTING JIT\n"); + arm_jit_reset(1); + //return NULL; + } + + int memsize = 4 * 10000000; + g_curr_jit_block = (char *)alloc_executable_memory(memsize); + if (! g_curr_jit_block) return NULL; + g_curr_jit_ptr = g_curr_jit_block; + g_curr_jit_size = memsize; + } + + char *ptr = g_curr_jit_ptr; + g_curr_jit_ptr += size; + return ptr; +} + +static void freeFuncs() { + std::map::iterator it; + for (it=allFuncs.begin(); it!=allFuncs.end(); ++it) { + // TODO: Calling munmap() causes a crash when resetting the emulator. Need to research why this happens. + //munmap((void *)it->first, (uintptr_t)it->second); + } + allFuncs.clear(); +} + +static void allocNewBlock() { + if (! g_currBlock) { + fprintf(stderr, "allocNewBlock failed\n"); + exit(0); + } + g_currBlock->next = (t_bytes *)malloc(sizeof(t_bytes)); + if (! g_currBlock->next) { + fprintf(stderr, "allocNewBlock failed\n"); + exit(0); + } + g_currBlock = g_currBlock->next; + g_PTR = (u32*)&g_currBlock->data[0]; + g_currBlock->next = NULL; +} + +static void output_w32(t_bytes *bytes, u32 word) { + *g_PTR++ = word; + g_TOTALBYTES+=4; + if ((uintptr_t)g_PTR >= (uintptr_t)(&g_currBlock->data[0] + ((uintptr_t)CHUNK_SIZE))) { + allocNewBlock(); + } +} + +static void emit_label(t_bytes *bytes, u_int id) { + g_LABELS[id].num_bytes = g_TOTALBYTES + 4; +} + +static void emit_branch_label(t_bytes *out, u_int id, int cond) { + u32 *instruct_adr = g_PTR; + + output_w32(out, 0x00000000); + + int num_bytes = g_TOTALBYTES; + + t_jumpmap jm = { + num_bytes, + cond, + instruct_adr + }; + + g_LABELS[id].jumps.push_back(jm); +} + +static t_bytes *g_last_bytes = NULL; + +t_bytes *allocBytes() { + t_bytes *ret; + if (g_last_bytes) ret = g_last_bytes; + else ret = (t_bytes *)malloc(sizeof(t_bytes)); + if (! ret) { + fprintf(stderr, "allocBytes failed\n"); + exit(0); + } + + ret->next = NULL; + + g_currBlock = ret; + g_PTR = (u32*)&g_currBlock->data[0]; + g_TOTALBYTES = 0; + + g_LABELS.clear(); + + return ret; +} + +void releaseBytes(t_bytes *bytes) { + g_last_bytes = bytes; + bytes = bytes->next; + while (bytes) { + t_bytes *next = bytes->next; + free(bytes); + bytes = next; + } +} + +JittedFunc createFunc(t_bytes *bytes) { + + for (auto const& lbl : g_LABELS) { + int nj = lbl.second.jumps.size(); + for (int i = 0; i < nj; i++) { + MOD_JUMP(lbl.second.jumps[i].instruct_ptr, lbl.second.jumps[i].cond, lbl.second.num_bytes, lbl.second.jumps[i].num_bytes) + } + } + + char *fn = (char *)armjitarm_alloc_func(g_TOTALBYTES); + char *fn_ptr = fn; + if (! fn) return NULL; + + int tot_bytes_acum = 0; + + t_bytes *curr_bytes = bytes; + while (curr_bytes) { + if (curr_bytes->next) { + memcpy(fn_ptr, curr_bytes->data, CHUNK_SIZE); + fn_ptr += CHUNK_SIZE; + tot_bytes_acum += CHUNK_SIZE; + } + else { + memcpy(fn_ptr, curr_bytes->data, g_TOTALBYTES-tot_bytes_acum); + break; + } + curr_bytes = curr_bytes->next; + } + + __builtin___clear_cache(fn, fn+g_TOTALBYTES); + + return (JittedFunc)fn; +}