diff --git a/desmume/src/utils/arm_arm/arm_gen.cpp b/desmume/src/utils/arm_arm/arm_gen.cpp new file mode 100644 index 000000000..3399c9063 --- /dev/null +++ b/desmume/src/utils/arm_arm/arm_gen.cpp @@ -0,0 +1,276 @@ +#include +#include +#include +#include +#include +#include +#include "arm_gen.h" + +#ifdef _3DS +# include +# include "3ds/memory.h" +#elif defined(VITA) +# include +# define RW_INIT sceKernelOpenVMDomain +# define RW_END sceKernelCloseVMDomain +#else +# include +#endif + +// __clear_cache(start, end) +#ifdef __BLACKBERRY_QNX__ +#undef __clear_cache +#define __clear_cache(start,end) msync(start, (size_t)((void*)end - (void*)start), MS_SYNC | MS_CACHE_ONLY | MS_INVALIDATE_ICACHE); +#elif defined(__MACH__) +#include +#define __clear_cache mach_clear_cache +static void __clear_cache(void *start, void *end) { + size_t len = (char *)end - (char *)start; + sys_dcache_flush(start, len); + sys_icache_invalidate(start, len); +} +#elif defined(_3DS) +#undef __clear_cache +#define __clear_cache(start,end)FlushInvalidateCache(); +#elif defined(VITA) +#undef __clear_cache +#define __clear_cache(start,end)sceKernelSyncVMDomain(block, start, (char *)end - (char *)start) +#endif + +namespace arm_gen +{ + +#ifdef _3DS +uint32_t* _instructions = 0; +#endif + +code_pool::code_pool(uint32_t icount) : + instruction_count(icount), + instructions(0), + next_instruction(0), + flush_start(0) +{ + + printf("\n\ncode_pool icount: %i\n\n", icount); + literal_count = 0; + memset(labels, 0, sizeof(labels)); + memset(branches, 0, sizeof(branches)); + +#if defined(_3DS) + if(!_instructions) + { + _instructions = (uint32_t*)memalign(4096, instruction_count * 4); + if (!_instructions) + { + fprintf(stderr, "memalign failed\n"); + abort(); + } + ReprotectMemory((unsigned int*)_instructions, (instruction_count * 4) / 4096, 7); + } + instructions = _instructions; +#elif defined(VITA) + block = sceKernelAllocMemBlockForVM("desmume_rwx_block", instruction_count * 4); + if (block < 0) + { + fprintf(stderr, "sceKernelAllocMemBlockForVM failed\n"); + abort(); + } + + if (sceKernelGetMemBlockBase(block, (void **)&instructions) < 0) + { + fprintf(stderr, "sceKernelGetMemBlockBase failed\n"); + abort(); + } +#elif defined(USE_POSIX_MEMALIGN) + if (posix_memalign((void**)&instructions, 4096, instruction_count * 4)) + { + fprintf(stderr, "posix_memalign failed\n"); + abort(); + } + + if (mprotect(instructions, instruction_count * 4, PROT_READ | PROT_WRITE | PROT_EXEC)) + { + fprintf(stderr, "mprotect failed\n"); + abort(); + } +#else + instructions = (uint32_t*)memalign(4096, instruction_count * 4); + if (!instructions) + { + fprintf(stderr, "memalign failed\n"); + abort(); + } + + if (mprotect(instructions, instruction_count * 4, PROT_READ | PROT_WRITE | PROT_EXEC)) + { + fprintf(stderr, "mprotect failed\n"); + abort(); + } +#endif +} + +code_pool::~code_pool() +{ +#ifdef _3DS + //ReprotectMemory((unsigned int*)instructions, (instruction_count * 4) / 4096, 3); +#elif defined(VITA) + sceKernelFreeMemBlock(block); +#else + mprotect(instructions, instruction_count * 4, PROT_READ | PROT_WRITE); + free(instructions); +#endif +} + +void* code_pool::fn_pointer() +{ + void* result = &instructions[flush_start]; + + __clear_cache(&instructions[flush_start], &instructions[next_instruction]); + flush_start = next_instruction; + + return result; +} + +void code_pool::set_label(const char* name) +{ + for (int i = 0; i < TARGET_COUNT; i ++) + { + if (labels[i].name == name) + { + fprintf(stderr, "Duplicate label\n"); + abort(); + } + } + + for (int i = 0; i < TARGET_COUNT; i ++) + { + if (labels[i].name == 0) + { + labels[i].name = name; + labels[i].position = next_instruction; + return; + } + } + + fprintf(stderr, "Label overflow\n"); + abort(); +} + +void code_pool::resolve_label(const char* name) +{ +#ifdef VITA + RW_INIT(); +#endif + for (int i = 0; i < TARGET_COUNT; i ++) + { + if (labels[i].name != name) + { + continue; + } + + for (int j = 0; j < TARGET_COUNT; j ++) + { + if (branches[j].name != name) + { + continue; + } + + const uint32_t source = branches[j].position; + const uint32_t target = labels[i].position; + instructions[source] |= ((target - source) - 2) & 0xFFFFFF; + + branches[j].name = 0; + } + + labels[i].name = 0; + break; + } +#ifdef VITA + RW_END(); +#endif +} + +// Code Gen: Generic +void code_pool::insert_instruction(uint32_t op, AG_COND cond) +{ + assert(cond < CONDINVALID); + insert_raw_instruction((op & 0x0FFFFFFF) | (cond << 28)); +} + +void code_pool::insert_raw_instruction(uint32_t op) +{ + if (next_instruction >= instruction_count) + { + fprintf(stderr, "code_pool overflow\n"); + abort(); + } +#ifdef VITA + RW_INIT(); +#endif + instructions[next_instruction ++] = op; +#ifdef VITA + RW_END(); +#endif +} + +void code_pool::alu_op(AG_ALU_OP op, reg_t rd, reg_t rn, + const alu2& arg, AG_COND cond) +{ + assert(op < OPINVALID); + insert_instruction( (op << 20) | (rn << 16) | (rd << 12) | arg.encoding, cond ); +} + +void code_pool::mem_op(AG_MEM_OP op, reg_t rd, reg_t rn, const mem2& arg, + AG_MEM_FLAGS flags, AG_COND cond) +{ + uint32_t instruction = 0x04000000; + instruction |= (op & 1) ? 1 << 20 : 0; + instruction |= (op & 2) ? 1 << 22 : 0; + + instruction |= arg.encoding; + instruction |= rd << 12; + instruction |= rn << 16; + + instruction |= flags ^ 0x1800000; + + insert_instruction( instruction, cond ); +} + +void code_pool::b(const char* target, AG_COND cond) +{ + assert(target); + + for (int i = 0; i < TARGET_COUNT; i ++) + { + if (branches[i].name == 0) + { + branches[i].name = target; + branches[i].position = next_instruction; + insert_instruction( 0x0A000000, cond ); + return; + } + } + + assert(false); +} + +void code_pool::load_constant(reg_t target_reg, uint32_t constant, AG_COND cond) +{ + // TODO: Support another method for ARM procs that don't have movw|movt + + uint32_t instructions[2] = { 0x03000000, 0x03400000 }; + + for (int i = 0; i < 2; i ++, constant >>= 16) + { + // If the upper 16-bits are zero the movt op is not needed + if (i == 1 && constant == 0) + break; + + instructions[i] |= target_reg << 12; + instructions[i] |= constant & 0xFFF; + instructions[i] |= (constant & 0xF000) << 4; + insert_instruction( instructions[i], cond ); + } +} + +} // namespace arm_gen diff --git a/desmume/src/utils/arm_arm/arm_gen.h b/desmume/src/utils/arm_arm/arm_gen.h new file mode 100644 index 000000000..05fb99518 --- /dev/null +++ b/desmume/src/utils/arm_arm/arm_gen.h @@ -0,0 +1,226 @@ +#ifndef ARM_GEN_H_LR +#define ARM_GEN_H_LR + +#include +#include + +#if defined(VITA) +# include +#endif + +namespace arm_gen +{ + +template +struct Constraint +{ + public: + Constraint(uint32_t val) : value(val) { assert(val < MAX); } + operator uint32_t() const { return value; } + + private: + const uint32_t value; +}; + +struct reg_t : public Constraint<16> +{ + public: + reg_t(uint32_t num) : Constraint<16>(num) { } +}; + +// Do NOT reorder these enums +enum AG_COND +{ + EQ, NE, CS, CC, MI, PL, VS, VC, + HI, LS, GE, LT, GT, LE, AL, EGG, + CONDINVALID +}; + +enum AG_ALU_OP +{ + AND, ANDS, EOR, EORS, SUB, SUBS, RSB, RSBS, + ADD, ADDS, ADC, ADCS, SBC, SBCS, RSC, RSCS, + XX1, TST , XX2, TEQ , XX3, CMP , XX4, CMN , + ORR, ORRS, MOV, MOVS, BIC, BICS, MVN, MVNS, + OPINVALID +}; + +enum AG_MEM_OP +{ + STR, LDR, STRB, LDRB, MEMINVALID +}; + +enum AG_MEM_FLAGS +{ + POST_INDEX = 1 << 24, + NEGATE_OFFSET = 1 << 23, + WRITE_BACK = 1 << 21, + MEM_NONE = 0 +}; + +enum AG_ALU_SHIFT +{ + LSL, LSR, ASR, ROR, SHIFTINVALID +}; + +struct alu2 +{ + private: + alu2(uint32_t val) : encoding(val) { } + + public: + static alu2 reg_shift_reg(reg_t rm, AG_ALU_SHIFT type, reg_t rs) { return alu2(rm | (type << 5) | 0x10 | (rs << 8)); } + static alu2 reg_shift_imm(reg_t rm, AG_ALU_SHIFT type, uint32_t imm) { return alu2(rm | (type << 5) | (imm << 7)); } + static alu2 imm_ror(uint32_t val, uint32_t ror) { return alu2((1 << 25) | ((ror / 2) << 8) | val); } + static alu2 imm_rol(uint32_t val, uint32_t rol) { return imm_ror(val, (32 - rol) & 0x1F); } + + + static alu2 reg(reg_t rm) { return reg_shift_imm(rm, LSL, 0); } + static alu2 imm(uint8_t val) { return imm_ror(val, 0); } + + const uint32_t encoding; +}; + +struct mem2 +{ + private: + mem2(uint32_t val) : encoding(val) { } + + public: + static mem2 reg_shift_imm(reg_t rm, AG_ALU_SHIFT type, uint32_t imm) { return mem2((1 << 25) | rm | (type << 5) | (imm << 7)); } + + static mem2 reg(reg_t rm) { return reg_shift_imm(rm, LSL, 0); } + static mem2 imm(uint32_t val) { return mem2(val); } + + const uint32_t encoding; +}; + +// 80 Columns be damned +class code_pool +{ + public: + code_pool(uint32_t instruction_count); + ~code_pool(); + + uint32_t instructions_remaining() const { return instruction_count - next_instruction; } + + void* fn_pointer(); + + // Relocs + void set_label(const char* name); + void resolve_label(const char* name); + + // Code Gen: Generic + void insert_instruction(uint32_t op, AG_COND cond = AL); + void insert_raw_instruction(uint32_t op); + + // Code Gen: ALU + void alu_op(AG_ALU_OP op, reg_t rd, reg_t rn, const alu2& arg, AG_COND cond = AL); + void and_(reg_t rd, reg_t rn, const alu2& arg, AG_COND cond = AL) { alu_op(AND , rd, rn, arg, cond); } + void and_(reg_t rd, const alu2& arg, AG_COND cond = AL) { alu_op(AND , rd, rd, arg, cond); } + void ands(reg_t rd, reg_t rn, const alu2& arg, AG_COND cond = AL) { alu_op(ANDS, rd, rn, arg, cond); } + void ands(reg_t rd, const alu2& arg, AG_COND cond = AL) { alu_op(ANDS, rd, rd, arg, cond); } + void eor (reg_t rd, reg_t rn, const alu2& arg, AG_COND cond = AL) { alu_op(EOR , rd, rn, arg, cond); } + void eor (reg_t rd, const alu2& arg, AG_COND cond = AL) { alu_op(EOR , rd, rd, arg, cond); } + void eors(reg_t rd, reg_t rn, const alu2& arg, AG_COND cond = AL) { alu_op(EORS, rd, rn, arg, cond); } + void eors(reg_t rd, const alu2& arg, AG_COND cond = AL) { alu_op(EORS, rd, rd, arg, cond); } + void sub (reg_t rd, reg_t rn, const alu2& arg, AG_COND cond = AL) { alu_op(SUB , rd, rn, arg, cond); } + void sub (reg_t rd, const alu2& arg, AG_COND cond = AL) { alu_op(SUB , rd, rd, arg, cond); } + void subs(reg_t rd, reg_t rn, const alu2& arg, AG_COND cond = AL) { alu_op(SUBS, rd, rn, arg, cond); } + void subs(reg_t rd, const alu2& arg, AG_COND cond = AL) { alu_op(SUBS, rd, rd, arg, cond); } + void rsb (reg_t rd, reg_t rn, const alu2& arg, AG_COND cond = AL) { alu_op(RSB , rd, rn, arg, cond); } + void rsb (reg_t rd, const alu2& arg, AG_COND cond = AL) { alu_op(RSB , rd, rd, arg, cond); } + void rsbs(reg_t rd, reg_t rn, const alu2& arg, AG_COND cond = AL) { alu_op(RSBS, rd, rn, arg, cond); } + void rsbs(reg_t rd, const alu2& arg, AG_COND cond = AL) { alu_op(RSBS, rd, rd, arg, cond); } + void add (reg_t rd, reg_t rn, const alu2& arg, AG_COND cond = AL) { alu_op(ADD , rd, rn, arg, cond); } + void add (reg_t rd, const alu2& arg, AG_COND cond = AL) { alu_op(ADD , rd, rd, arg, cond); } + void adds(reg_t rd, reg_t rn, const alu2& arg, AG_COND cond = AL) { alu_op(ADDS, rd, rn, arg, cond); } + void adds(reg_t rd, const alu2& arg, AG_COND cond = AL) { alu_op(ADDS, rd, rd, arg, cond); } + void adc (reg_t rd, reg_t rn, const alu2& arg, AG_COND cond = AL) { alu_op(ADC , rd, rn, arg, cond); } + void adc (reg_t rd, const alu2& arg, AG_COND cond = AL) { alu_op(ADC , rd, rd, arg, cond); } + void adcs(reg_t rd, reg_t rn, const alu2& arg, AG_COND cond = AL) { alu_op(ADCS, rd, rn, arg, cond); } + void adcs(reg_t rd, const alu2& arg, AG_COND cond = AL) { alu_op(ADCS, rd, rd, arg, cond); } + void sbc (reg_t rd, reg_t rn, const alu2& arg, AG_COND cond = AL) { alu_op(SBC , rd, rn, arg, cond); } + void sbc (reg_t rd, const alu2& arg, AG_COND cond = AL) { alu_op(SBC , rd, rd, arg, cond); } + void sbcs(reg_t rd, reg_t rn, const alu2& arg, AG_COND cond = AL) { alu_op(SBCS, rd, rn, arg, cond); } + void sbcs(reg_t rd, const alu2& arg, AG_COND cond = AL) { alu_op(SBCS, rd, rd, arg, cond); } + void rsc (reg_t rd, reg_t rn, const alu2& arg, AG_COND cond = AL) { alu_op(RSC , rd, rn, arg, cond); } + void rsc (reg_t rd, const alu2& arg, AG_COND cond = AL) { alu_op(RSC , rd, rd, arg, cond); } + void rscs(reg_t rd, reg_t rn, const alu2& arg, AG_COND cond = AL) { alu_op(RSCS, rd, rn, arg, cond); } + void rscs(reg_t rd, const alu2& arg, AG_COND cond = AL) { alu_op(RSCS, rd, rd, arg, cond); } + void tst ( reg_t rn, const alu2& arg, AG_COND cond = AL) { alu_op(TST , rn, rn, arg, cond); } // 1 + void teq ( reg_t rn, const alu2& arg, AG_COND cond = AL) { alu_op(TEQ , rn, rn, arg, cond); } // 1 + void cmp ( reg_t rn, const alu2& arg, AG_COND cond = AL) { alu_op(CMP , rn, rn, arg, cond); } // 1 + void cmn ( reg_t rn, const alu2& arg, AG_COND cond = AL) { alu_op(CMN , rn, rn, arg, cond); } // 1 + void orr (reg_t rd, reg_t rn, const alu2& arg, AG_COND cond = AL) { alu_op(ORR , rd, rn, arg, cond); } + void orr (reg_t rd, const alu2& arg, AG_COND cond = AL) { alu_op(ORR , rd, rd, arg, cond); } + void orrs(reg_t rd, reg_t rn, const alu2& arg, AG_COND cond = AL) { alu_op(ORRS, rd, rn, arg, cond); } + void orrs(reg_t rd, const alu2& arg, AG_COND cond = AL) { alu_op(ORRS, rd, rd, arg, cond); } + void mov (reg_t rd, const alu2& arg, AG_COND cond = AL) { alu_op(MOV , rd, rd, arg, cond); } // 2 + void movs(reg_t rd, const alu2& arg, AG_COND cond = AL) { alu_op(MOVS, rd, rd, arg, cond); } // 2 + void bic (reg_t rd, reg_t rn, const alu2& arg, AG_COND cond = AL) { alu_op(BIC , rd, rn, arg, cond); } + void bic (reg_t rd, const alu2& arg, AG_COND cond = AL) { alu_op(BIC , rd, rd, arg, cond); } + void bics(reg_t rd, reg_t rn, const alu2& arg, AG_COND cond = AL) { alu_op(BICS, rd, rn, arg, cond); } + void bics(reg_t rd, const alu2& arg, AG_COND cond = AL) { alu_op(BICS, rd, rd, arg, cond); } + void mvn (reg_t rd, const alu2& arg, AG_COND cond = AL) { alu_op(MVN , rd, rd, arg, cond); } // 2 + void mvns(reg_t rd, const alu2& arg, AG_COND cond = AL) { alu_op(MVNS, rd, rd, arg, cond); } // 2 + + // Code Gen: Memory + void mem_op(AG_MEM_OP op, reg_t rd, reg_t rn, const mem2& arg, AG_MEM_FLAGS flags = MEM_NONE, AG_COND cond = AL); + void ldr (reg_t rd, reg_t base, const mem2& arg = mem2::imm(0), AG_MEM_FLAGS flags = MEM_NONE, AG_COND cond = AL) { mem_op(LDR , rd, base, arg, flags, cond); } + void str (reg_t rd, reg_t base, const mem2& arg = mem2::imm(0), AG_MEM_FLAGS flags = MEM_NONE, AG_COND cond = AL) { mem_op(STR , rd, base, arg, flags, cond); } + void ldrb(reg_t rd, reg_t base, const mem2& arg = mem2::imm(0), AG_MEM_FLAGS flags = MEM_NONE, AG_COND cond = AL) { mem_op(LDRB, rd, base, arg, flags, cond); } + void strb(reg_t rd, reg_t base, const mem2& arg = mem2::imm(0), AG_MEM_FLAGS flags = MEM_NONE, AG_COND cond = AL) { mem_op(STRB, rd, base, arg, flags, cond); } + + // Code Gen: Sign Extend + void sxtb(reg_t rd, reg_t rm, AG_COND cond = AL) { insert_instruction( 0x06AF0070 | (rd << 12) | rm, cond ); } + void sxth(reg_t rd, reg_t rm, AG_COND cond = AL) { insert_instruction( 0x06BF0070 | (rd << 12) | rm, cond ); } + void uxtb(reg_t rd, reg_t rm, AG_COND cond = AL) { insert_instruction( 0x06EF0070 | (rd << 12) | rm, cond ); } + void uxth(reg_t rd, reg_t rm, AG_COND cond = AL) { insert_instruction( 0x06FF0070 | (rd << 12) | rm, cond ); } + + // Code Gen: Other + void set_status(reg_t source_reg, AG_COND cond = AL) { insert_instruction( 0x0128F000 | source_reg, cond ); } + void get_status(reg_t dest_reg, AG_COND cond = AL) { insert_instruction( 0x010F0000 | (dest_reg << 12), cond ); } + void bx(reg_t target_reg, AG_COND cond = AL) { insert_instruction( 0x012FFF10 | target_reg, cond ); } + void blx(reg_t target_reg, AG_COND cond = AL) { insert_instruction( 0x012FFF30 | target_reg, cond ); } + void push(uint16_t regs, AG_COND cond = AL) { insert_instruction( 0x092D0000 | regs, cond ); } + void pop(uint16_t regs, AG_COND cond = AL) { insert_instruction( 0x08BD0000 | regs, cond ); } + + void b(const char* target, AG_COND cond = AL); + + // Inserts a movw; movt pair to load the constant, omits movt is constant fits in 16 bits. + void load_constant(reg_t target_reg, uint32_t constant, AG_COND cond = AL); + void insert_constants(); + + void jmp(uint32_t offset); + void resolve_jmp(uint32_t instruction, uint32_t offset); + + uint32_t get_next_instruction() { return next_instruction; }; + + private: + const uint32_t instruction_count; + uint32_t* instructions; + + uint32_t next_instruction; + uint32_t flush_start; + + uint32_t literals[128][2]; + uint32_t literal_count; + + static const uint32_t TARGET_COUNT = 16; + + struct target + { + const char* name; + uint32_t position; + }; + + target labels[TARGET_COUNT]; + target branches[TARGET_COUNT]; +#if defined(VITA) + SceUID block; +#endif +}; +} // namespace arm_gen + +#endif diff --git a/desmume/src/utils/arm_arm/arm_jit.cpp b/desmume/src/utils/arm_arm/arm_jit.cpp new file mode 100644 index 000000000..0da86cc6c --- /dev/null +++ b/desmume/src/utils/arm_arm/arm_jit.cpp @@ -0,0 +1,1547 @@ +/* Copyright (C) 2006 yopyop + Copyright (C) 2011 Loren Merritt + Copyright (C) 2012 DeSmuME team + + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This file is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the this software. If not, see . +*/ + +#include "types.h" + +#ifdef HAVE_JIT + +#include +#include +#include + +#include "arm_gen.h" +#include "reg_manager.h" +using namespace arm_gen; + +#include "instructions.h" +#include "instruction_attributes.h" +#include "MMU.h" +#include "MMU_timing.h" +#include "arm_jit.h" +#include "bios.h" +#include "armcpu.h" + +u32 saveBlockSizeJIT = 0; + +#ifdef MAPPED_JIT_FUNCS +CACHE_ALIGN JIT_struct JIT; + +uintptr_t *JIT_struct::JIT_MEM[2][0x4000] = {{0}}; + +static uintptr_t *JIT_MEM[2][32] = { + //arm9 + { + /* 0X*/ DUP2(JIT.ARM9_ITCM), + /* 1X*/ DUP2(JIT.ARM9_ITCM), // mirror + /* 2X*/ DUP2(JIT.MAIN_MEM), + /* 3X*/ DUP2(JIT.SWIRAM), + /* 4X*/ DUP2(NULL), + /* 5X*/ DUP2(NULL), + /* 6X*/ NULL, + JIT.ARM9_LCDC, // Plain ARM9-CPU Access (LCDC mode) (max 656KB) + /* 7X*/ DUP2(NULL), + /* 8X*/ DUP2(NULL), + /* 9X*/ DUP2(NULL), + /* AX*/ DUP2(NULL), + /* BX*/ DUP2(NULL), + /* CX*/ DUP2(NULL), + /* DX*/ DUP2(NULL), + /* EX*/ DUP2(NULL), + /* FX*/ DUP2(JIT.ARM9_BIOS) + }, + //arm7 + { + /* 0X*/ DUP2(JIT.ARM7_BIOS), + /* 1X*/ DUP2(NULL), + /* 2X*/ DUP2(JIT.MAIN_MEM), + /* 3X*/ JIT.SWIRAM, + JIT.ARM7_ERAM, + /* 4X*/ NULL, + JIT.ARM7_WIRAM, + /* 5X*/ DUP2(NULL), + /* 6X*/ JIT.ARM7_WRAM, // VRAM allocated as Work RAM to ARM7 (max. 256K) + NULL, + /* 7X*/ DUP2(NULL), + /* 8X*/ DUP2(NULL), + /* 9X*/ DUP2(NULL), + /* AX*/ DUP2(NULL), + /* BX*/ DUP2(NULL), + /* CX*/ DUP2(NULL), + /* DX*/ DUP2(NULL), + /* EX*/ DUP2(NULL), + /* FX*/ DUP2(NULL) + } +}; + +static u32 JIT_MASK[2][32] = { + //arm9 + { + /* 0X*/ DUP2(0x00007FFF), + /* 1X*/ DUP2(0x00007FFF), + /* 2X*/ DUP2(0x003FFFFF), // FIXME _MMU_MAIN_MEM_MASK + /* 3X*/ DUP2(0x00007FFF), + /* 4X*/ DUP2(0x00000000), + /* 5X*/ DUP2(0x00000000), + /* 6X*/ 0x00000000, + 0x000FFFFF, + /* 7X*/ DUP2(0x00000000), + /* 8X*/ DUP2(0x00000000), + /* 9X*/ DUP2(0x00000000), + /* AX*/ DUP2(0x00000000), + /* BX*/ DUP2(0x00000000), + /* CX*/ DUP2(0x00000000), + /* DX*/ DUP2(0x00000000), + /* EX*/ DUP2(0x00000000), + /* FX*/ DUP2(0x00007FFF) + }, + //arm7 + { + /* 0X*/ DUP2(0x00003FFF), + /* 1X*/ DUP2(0x00000000), + /* 2X*/ DUP2(0x003FFFFF), + /* 3X*/ 0x00007FFF, + 0x0000FFFF, + /* 4X*/ 0x00000000, + 0x0000FFFF, + /* 5X*/ DUP2(0x00000000), + /* 6X*/ 0x0003FFFF, + 0x00000000, + /* 7X*/ DUP2(0x00000000), + /* 8X*/ DUP2(0x00000000), + /* 9X*/ DUP2(0x00000000), + /* AX*/ DUP2(0x00000000), + /* BX*/ DUP2(0x00000000), + /* CX*/ DUP2(0x00000000), + /* DX*/ DUP2(0x00000000), + /* EX*/ DUP2(0x00000000), + /* FX*/ DUP2(0x00000000) + } +}; + +static void init_jit_mem() +{ + static bool inited = false; + if(inited) + return; + inited = true; + for(int proc=0; proc<2; proc++) + for(int i=0; i<0x4000; i++) + JIT.JIT_MEM[proc][i] = JIT_MEM[proc][i>>9] + (((i<<14) & JIT_MASK[proc][i>>9]) >> 1); +} + +#else +DS_ALIGN(4096) uintptr_t compiled_funcs[1<<26] = {0}; +#endif + +template +static u32 FASTCALL OP_DECODE() +{ + u32 cycles; + u32 adr = ARMPROC.instruct_adr; + if(thumb) + { + ARMPROC.next_instruction = adr + 2; + ARMPROC.R[15] = adr + 4; + u32 opcode = _MMU_read16(adr); + //_armlog(PROCNUM, adr, opcode); + cycles = thumb_instructions_set[PROCNUM][opcode>>6](opcode); + } + else + { + ARMPROC.next_instruction = adr + 4; + ARMPROC.R[15] = adr + 8; + u32 opcode = _MMU_read32(adr); + //_armlog(PROCNUM, adr, opcode); + if(CONDITION(opcode) == 0xE || TEST_COND(CONDITION(opcode), CODE(opcode), ARMPROC.CPSR)) + cycles = arm_instructions_set[PROCNUM][INSTRUCTION_INDEX(opcode)](opcode); + else + cycles = 1; + } + ARMPROC.instruct_adr = ARMPROC.next_instruction; + return cycles; +} + +static const ArmOpCompiled op_decode[2][2] = { OP_DECODE<0,0>, OP_DECODE<0,1>, OP_DECODE<1,0>, OP_DECODE<1,1> }; + + +enum OP_RESULT { OPR_CONTINUE, OPR_INTERPRET, OPR_BRANCHED, OPR_RESULT_SIZE = 2147483647 }; +#define OPR_RESULT(result, cycles) (OP_RESULT)((result) | ((cycles) << 16)); +#define OPR_RESULT_CYCLES(result) ((result >> 16)) +#define OPR_RESULT_ACTION(result) ((result & 0xFF)) + +typedef OP_RESULT (*ArmOpCompiler)(uint32_t pc, uint32_t opcode); + +static const uint32_t INSTRUCTION_COUNT = 0xC0000; +static code_pool* block; +static register_manager* regman; +static u8 recompile_counts[(1<<26)/16]; + +const reg_t RCPU = 12; +const reg_t RCYC = 4; +static uint32_t block_procnum; + +/////// +// HELPERS +/////// +static bool emu_status_dirty; + +static bool bit(uint32_t value, uint32_t bit) +{ + return value & (1 << bit); +} + +static uint32_t bit(uint32_t value, uint32_t first, uint32_t count) +{ + return (value >> first) & ((1 << count) - 1); +} + +static uint32_t bit_write(uint32_t value, uint32_t first, uint32_t count, uint32_t insert) +{ + uint32_t result = value & ~(((1 << count) - 1) << first); + return result | (insert << first); +} + +static void load_status(reg_t scratch) +{ + block->ldr(scratch, RCPU, mem2::imm(offsetof(armcpu_t, CPSR))); + block->set_status(scratch); +} + +static void write_status(reg_t scratch) +{ + if (emu_status_dirty) + { + block->get_status(scratch); + block->mov(scratch, alu2::reg_shift_imm(scratch, LSR, 24)); + block->strb(scratch, RCPU, mem2::imm(offsetof(armcpu_t, CPSR) + 3)); + + emu_status_dirty = false; + } +} + +static void mark_status_dirty() +{ + emu_status_dirty = true; +} + +static void call(reg_t reg) +{ + write_status(3); + block->blx(reg); + + const unsigned PROCNUM = block_procnum; + block->load_constant(RCPU, (uint32_t)&ARMPROC); + + load_status(3); +} + +static void change_mode(bool thumb) +{ + block->ldr(0, RCPU, mem2::imm(offsetof(armcpu_t, CPSR))); + + if (!thumb) + { + block->bic(0, alu2::imm(1 << 5)); + } + else + { + block->orr(0, alu2::imm(1 << 5)); + } + block->str(0, RCPU, mem2::imm(offsetof(armcpu_t, CPSR))); +} + +static void change_mode_reg(reg_t reg, reg_t scratch, reg_t scratch2) +{ + block->and_(scratch2, reg, alu2::imm(1)); + + block->ldr(scratch, RCPU, mem2::imm(offsetof(armcpu_t, CPSR))); + block->bic(scratch, alu2::imm(scratch2 << 5)); + block->orr(scratch, alu2::reg_shift_imm(scratch2, LSL, 5)); + block->str(scratch, RCPU, mem2::imm(offsetof(armcpu_t, CPSR))); +} + +template +static void arm_jit_prefetch(uint32_t pc, uint32_t opcode, bool thumb) +{ + const uint32_t imask = thumb ? 0xFFFFFFFE : 0xFFFFFFFC; + const uint32_t isize = thumb ? 2 : 4; + + block->load_constant(0, pc & imask); + block->str(0, RCPU, mem2::imm(offsetof(armcpu_t, instruct_adr))); + + block->add(0, alu2::imm(isize)); + block->str(0, RCPU, mem2::imm(offsetof(armcpu_t, next_instruction))); + + block->add(0, alu2::imm(isize)); + block->str(0, RCPU, mem2::imm(offsetof(armcpu_t, R) + 4 * 15)); + + block->load_constant(0, opcode); + block->str(0, RCPU, mem2::imm(offsetof(armcpu_t, instruction))); +} + +///////// +/// ARM +///////// +static OP_RESULT ARM_OP_PATCH_DELEGATE(uint32_t pc, uint32_t opcode, int AT16, int AT12, int AT8, int AT0, bool S, uint32_t CYC) +{ + const reg_t at16 = bit(opcode, 16, 4); + const reg_t at12 = bit(opcode, 12, 4); + const reg_t at8 = bit(opcode, 8, 4); + const reg_t at0 = bit(opcode, 0, 4); + + if ((AT16 && (at16 == 0xF)) || (AT12 && (at12 == 0xF)) || (AT8 && (at8 == 0xF)) || (AT0 && (at0 == 0xF))) + return OPR_INTERPRET; + + const uint32_t weak_tag = (bit(opcode, 28, 4) == AL) ? 0x10 : 0; + + int32_t reg_list[4]; + reg_list[0] = (AT16) ? (int32_t)(at16 | ((AT16 == 2) ? weak_tag : 0)) : -1; + reg_list[1] = (AT12) ? (int32_t)(at12 | ((AT12 == 2) ? weak_tag : 0)) : -1; + reg_list[2] = (AT8 ) ? (int32_t)(at8 | ((AT8 == 2) ? weak_tag : 0)) : -1; + reg_list[3] = (AT0 ) ? (int32_t)(at0 | ((AT0 == 2) ? weak_tag : 0)) : -1; + regman->get(4, reg_list); + + opcode = AT16 ? bit_write(opcode, 16, 4, reg_list[0]) : opcode; + opcode = AT12 ? bit_write(opcode, 12, 4, reg_list[1]) : opcode; + opcode = AT8 ? bit_write(opcode, 8, 4, reg_list[2]) : opcode; + opcode = AT0 ? bit_write(opcode, 0, 4, reg_list[3]) : opcode; + + block->insert_raw_instruction(opcode); + if (S) mark_status_dirty(); + + if (AT16 & 2) regman->mark_dirty(reg_list[0]); + if (AT12 & 2) regman->mark_dirty(reg_list[1]); + if (AT8 & 2) regman->mark_dirty(reg_list[2]); + if (AT0 & 2) regman->mark_dirty(reg_list[3]); + + return OPR_RESULT(OPR_CONTINUE, CYC); +} + +template +static OP_RESULT ARM_OP_PATCH(uint32_t pc, uint32_t opcode) +{ + return ARM_OP_PATCH_DELEGATE(pc, opcode, AT16, AT12, AT8, AT0, S, CYC); +} + +#define ARM_ALU_OP_DEF(T, D, N, S) \ + static const ArmOpCompiler ARM_OP_##T##_LSL_IMM = ARM_OP_PATCH; \ + static const ArmOpCompiler ARM_OP_##T##_LSL_REG = ARM_OP_PATCH; \ + static const ArmOpCompiler ARM_OP_##T##_LSR_IMM = ARM_OP_PATCH; \ + static const ArmOpCompiler ARM_OP_##T##_LSR_REG = ARM_OP_PATCH; \ + static const ArmOpCompiler ARM_OP_##T##_ASR_IMM = ARM_OP_PATCH; \ + static const ArmOpCompiler ARM_OP_##T##_ASR_REG = ARM_OP_PATCH; \ + static const ArmOpCompiler ARM_OP_##T##_ROR_IMM = ARM_OP_PATCH; \ + static const ArmOpCompiler ARM_OP_##T##_ROR_REG = ARM_OP_PATCH; \ + static const ArmOpCompiler ARM_OP_##T##_IMM_VAL = ARM_OP_PATCH + +ARM_ALU_OP_DEF(AND , 2, 1, false); +ARM_ALU_OP_DEF(AND_S, 2, 1, true); +ARM_ALU_OP_DEF(EOR , 2, 1, false); +ARM_ALU_OP_DEF(EOR_S, 2, 1, true); +ARM_ALU_OP_DEF(SUB , 2, 1, false); +ARM_ALU_OP_DEF(SUB_S, 2, 1, true); +ARM_ALU_OP_DEF(RSB , 2, 1, false); +ARM_ALU_OP_DEF(RSB_S, 2, 1, true); +ARM_ALU_OP_DEF(ADD , 2, 1, false); +ARM_ALU_OP_DEF(ADD_S, 2, 1, true); +ARM_ALU_OP_DEF(ADC , 2, 1, false); +ARM_ALU_OP_DEF(ADC_S, 2, 1, true); +ARM_ALU_OP_DEF(SBC , 2, 1, false); +ARM_ALU_OP_DEF(SBC_S, 2, 1, true); +ARM_ALU_OP_DEF(RSC , 2, 1, false); +ARM_ALU_OP_DEF(RSC_S, 2, 1, true); +ARM_ALU_OP_DEF(TST , 0, 1, true); +ARM_ALU_OP_DEF(TEQ , 0, 1, true); +ARM_ALU_OP_DEF(CMP , 0, 1, true); +ARM_ALU_OP_DEF(CMN , 0, 1, true); +ARM_ALU_OP_DEF(ORR , 2, 1, false); +ARM_ALU_OP_DEF(ORR_S, 2, 1, true); +ARM_ALU_OP_DEF(MOV , 2, 0, false); +ARM_ALU_OP_DEF(MOV_S, 2, 0, true); +ARM_ALU_OP_DEF(BIC , 2, 1, false); +ARM_ALU_OP_DEF(BIC_S, 2, 1, true); +ARM_ALU_OP_DEF(MVN , 2, 0, false); +ARM_ALU_OP_DEF(MVN_S, 2, 0, true); + +// HACK: multiply cycles are wrong +#define ARM_OP_MUL ARM_OP_PATCH<2, 0, 1, 1, false, 3> +#define ARM_OP_MUL_S ARM_OP_PATCH<2, 0, 1, 1, true, 3> +#define ARM_OP_MLA ARM_OP_PATCH<2, 1, 1, 1, false, 4> +#define ARM_OP_MLA_S ARM_OP_PATCH<2, 1, 1, 1, true, 4> +#define ARM_OP_UMULL ARM_OP_PATCH<2, 2, 1, 1, false, 4> +#define ARM_OP_UMULL_S ARM_OP_PATCH<2, 2, 1, 1, true, 4> +#define ARM_OP_UMLAL ARM_OP_PATCH<3, 3, 1, 1, false, 5> +#define ARM_OP_UMLAL_S ARM_OP_PATCH<3, 3, 1, 1, true, 5> +#define ARM_OP_SMULL ARM_OP_PATCH<2, 2, 1, 1, false, 4> +#define ARM_OP_SMULL_S ARM_OP_PATCH<2, 2, 1, 1, true, 4> +#define ARM_OP_SMLAL ARM_OP_PATCH<3, 3, 1, 1, false, 5> +#define ARM_OP_SMLAL_S ARM_OP_PATCH<3, 3, 1, 1, true, 5> + +#define ARM_OP_SMUL_B_B ARM_OP_PATCH<2, 0, 1, 1, true, 2> +#define ARM_OP_SMUL_T_B ARM_OP_PATCH<2, 0, 1, 1, true, 2> +#define ARM_OP_SMUL_B_T ARM_OP_PATCH<2, 0, 1, 1, true, 2> +#define ARM_OP_SMUL_T_T ARM_OP_PATCH<2, 0, 1, 1, true, 2> + +#define ARM_OP_SMLA_B_B ARM_OP_PATCH<2, 1, 1, 1, true, 2> +#define ARM_OP_SMLA_T_B ARM_OP_PATCH<2, 1, 1, 1, true, 2> +#define ARM_OP_SMLA_B_T ARM_OP_PATCH<2, 1, 1, 1, true, 2> +#define ARM_OP_SMLA_T_T ARM_OP_PATCH<2, 1, 1, 1, true, 2> + +#define ARM_OP_SMULW_B ARM_OP_PATCH<2, 0, 1, 1, true, 2> +#define ARM_OP_SMULW_T ARM_OP_PATCH<2, 0, 1, 1, true, 2> +#define ARM_OP_SMLAW_B ARM_OP_PATCH<2, 1, 1, 1, true, 2> +#define ARM_OP_SMLAW_T ARM_OP_PATCH<2, 1, 1, 1, true, 2> + +#define ARM_OP_SMLAL_B_B ARM_OP_PATCH<3, 3, 1, 1, true, 2> +#define ARM_OP_SMLAL_T_B ARM_OP_PATCH<3, 3, 1, 1, true, 2> +#define ARM_OP_SMLAL_B_T ARM_OP_PATCH<3, 3, 1, 1, true, 2> +#define ARM_OP_SMLAL_T_T ARM_OP_PATCH<3, 3, 1, 1, true, 2> + +#define ARM_OP_QADD ARM_OP_PATCH<1, 2, 0, 1, true, 2> +#define ARM_OP_QSUB ARM_OP_PATCH<1, 2, 0, 1, true, 2> +#define ARM_OP_QDADD ARM_OP_PATCH<1, 2, 0, 1, true, 2> +#define ARM_OP_QDSUB ARM_OP_PATCH<1, 2, 0, 1, true, 2> + +#define ARM_OP_CLZ ARM_OP_PATCH<0, 2, 0, 1, false, 2> + +//////// +// Need versions of these functions with exported symbol +u8 _MMU_read08_9(u32 addr) { return _MMU_read08<0>(addr); } +u8 _MMU_read08_7(u32 addr) { return _MMU_read08<1>(addr); } +u16 _MMU_read16_9(u32 addr) { return _MMU_read16<0>(addr & 0xFFFFFFFE); } +u16 _MMU_read16_7(u32 addr) { return _MMU_read16<1>(addr & 0xFFFFFFFE); } +u32 _MMU_read32_9(u32 addr) { return ::ROR(_MMU_read32<0>(addr & 0xFFFFFFFC), 8 * (addr & 3)); } +u32 _MMU_read32_7(u32 addr) { return ::ROR(_MMU_read32<1>(addr & 0xFFFFFFFC), 8 * (addr & 3)); } + +void _MMU_write08_9(u32 addr, u8 val) { _MMU_write08<0>(addr, val); } +void _MMU_write08_7(u32 addr, u8 val) { _MMU_write08<1>(addr, val); } +void _MMU_write16_9(u32 addr, u16 val) { _MMU_write16<0>(addr & 0xFFFFFFFE, val); } +void _MMU_write16_7(u32 addr, u16 val) { _MMU_write16<1>(addr & 0xFFFFFFFE, val); } +void _MMU_write32_9(u32 addr, u32 val) { _MMU_write32<0>(addr & 0xFFFFFFFC, val); } +void _MMU_write32_7(u32 addr, u32 val) { _MMU_write32<1>(addr & 0xFFFFFFFC, val); } + +static const uint32_t mem_funcs[12] = +{ + (uint32_t)_MMU_read08_9 , (uint32_t)_MMU_read08_7, + (uint32_t)_MMU_write08_9, (uint32_t)_MMU_write08_7, + (uint32_t)_MMU_read16_9, (uint32_t)_MMU_read16_7, + (uint32_t)_MMU_write16_9, (uint32_t)_MMU_write16_7, + (uint32_t)_MMU_read32_9, (uint32_t)_MMU_read32_7, + (uint32_t)_MMU_write32_9, (uint32_t)_MMU_write32_7 +}; + + +static OP_RESULT ARM_OP_MEM(uint32_t pc, const uint32_t opcode) +{ + const AG_COND cond = (AG_COND)bit(opcode, 28, 4); + const bool has_reg_offset = bit(opcode, 25); + const bool has_pre_index = bit(opcode, 24); + const bool has_up_bit = bit(opcode, 23); + const bool has_byte_bit = bit(opcode, 22); + const bool has_write_back = bit(opcode, 21); + const bool has_load = bit(opcode, 20); + const reg_t rn = bit(opcode, 16, 4); + const reg_t rd = bit(opcode, 12, 4); + const reg_t rm = bit(opcode, 0, 4); + + if (rn == 0xF || rd == 0xF || (has_reg_offset && (rm == 0xF))) + return OPR_INTERPRET; + + int32_t regs[3] = { rd | (((cond == AL) && has_load) ? 0x10 : 0), rn, has_reg_offset ? (int32_t)rm : -1 }; + regman->get(3, regs); + + const reg_t dest = regs[0]; + const reg_t base = regs[1]; + const reg_t offs = has_reg_offset ? regs[2] : 3; + + // HACK: This needs to done manually here as we can't branch over the generated code + write_status(3); + + if (cond != AL) + { + block->b("run", cond); + block->b("skip"); + block->set_label("run"); + } + + // Put the indexed address in R3 + if (has_reg_offset) + { + const AG_ALU_SHIFT st = (AG_ALU_SHIFT)bit(opcode, 5, 2); + const uint32_t imm = bit(opcode, 7, 5); + + if (has_up_bit) block->add(3, base, alu2::reg_shift_imm(offs, st, imm)); + else block->sub(3, base, alu2::reg_shift_imm(offs, st, imm)); + } + else + { + block->load_constant(3, opcode & 0xFFF); + + if (has_up_bit) block->add(3, base, alu2::reg(3)); + else block->sub(3, base, alu2::reg(3)); + } + + // Load EA + block->mov(0, alu2::reg((has_pre_index ? (reg_t)3 : base))); + + // Do Writeback + if ((!has_pre_index) || has_write_back) + { + block->mov(base, alu2::reg(3)); + regman->mark_dirty(base); + } + + // DO + if (!has_load) + { + if (has_byte_bit) + { + block->uxtb(1, dest); + } + else + { + block->mov(1, alu2::reg(dest)); + } + } + + uint32_t func_idx = block_procnum | (has_load ? 0 : 2) | (has_byte_bit ? 0 : 8); + block->load_constant(2, mem_funcs[func_idx]); + call(2); + + if (has_load) + { + if (has_byte_bit) + { + block->uxtb(dest, 0); + } + else + { + block->mov(dest, alu2::reg(0)); + } + + regman->mark_dirty(dest); + } + + if (cond != AL) + { + block->set_label("skip"); + block->resolve_label("run"); + block->resolve_label("skip"); + } + + // TODO: + return OPR_RESULT(OPR_CONTINUE, 3); +} + +#define ARM_MEM_OP_DEF2(T, Q) \ + static const ArmOpCompiler ARM_OP_##T##_M_LSL_##Q = ARM_OP_MEM; \ + static const ArmOpCompiler ARM_OP_##T##_P_LSL_##Q = ARM_OP_MEM; \ + static const ArmOpCompiler ARM_OP_##T##_M_LSR_##Q = ARM_OP_MEM; \ + static const ArmOpCompiler ARM_OP_##T##_P_LSR_##Q = ARM_OP_MEM; \ + static const ArmOpCompiler ARM_OP_##T##_M_ASR_##Q = ARM_OP_MEM; \ + static const ArmOpCompiler ARM_OP_##T##_P_ASR_##Q = ARM_OP_MEM; \ + static const ArmOpCompiler ARM_OP_##T##_M_ROR_##Q = ARM_OP_MEM; \ + static const ArmOpCompiler ARM_OP_##T##_P_ROR_##Q = ARM_OP_MEM; \ + static const ArmOpCompiler ARM_OP_##T##_M_##Q = ARM_OP_MEM; \ + static const ArmOpCompiler ARM_OP_##T##_P_##Q = ARM_OP_MEM + +#define ARM_MEM_OP_DEF(T) \ + ARM_MEM_OP_DEF2(T, IMM_OFF_PREIND); \ + ARM_MEM_OP_DEF2(T, IMM_OFF); \ + ARM_MEM_OP_DEF2(T, IMM_OFF_POSTIND) + +ARM_MEM_OP_DEF(STR); +ARM_MEM_OP_DEF(LDR); +ARM_MEM_OP_DEF(STRB); +ARM_MEM_OP_DEF(LDRB); + +// +static OP_RESULT ARM_OP_MEM_HALF(uint32_t pc, uint32_t opcode) +{ + const AG_COND cond = (AG_COND)bit(opcode, 28, 4); + const bool has_pre_index = bit(opcode, 24); + const bool has_up_bit = bit(opcode, 23); + const bool has_imm_offset = bit(opcode, 22); + const bool has_write_back = bit(opcode, 21); + const bool has_load = bit(opcode, 20); + const uint32_t op = bit(opcode, 5, 2); + const reg_t rn = bit(opcode, 16, 4); + const reg_t rd = bit(opcode, 12, 4); + const reg_t rm = bit(opcode, 0, 4); + + if (rn == 0xF || rd == 0xF || (!has_imm_offset && (rm == 0xF))) + return OPR_INTERPRET; + + int32_t regs[3] = { rd | (((cond == AL) && has_load) ? 0x10 : 0), rn, (!has_imm_offset) ? (int32_t)rm : -1 }; + regman->get(3, regs); + + const reg_t dest = regs[0]; + const reg_t base = regs[1]; + const reg_t offs = (!has_imm_offset) ? regs[2] : 0; + + // HACK: This needs to done manually here as we can't branch over the generated code + write_status(3); + + if (cond != AL) + { + block->b("run", cond); + block->b("skip"); + block->set_label("run"); + } + + // Put the indexed address in R3 + if (!has_imm_offset) + { + if (has_up_bit) block->add(3, base, alu2::reg(offs)); + else block->sub(3, base, alu2::reg(offs)); + } + else + { + block->load_constant(3, (opcode & 0xF) | ((opcode >> 4) & 0xF0)); + + if (has_up_bit) block->add(3, base, alu2::reg(3)); + else block->sub(3, base, alu2::reg(3)); + } + + // Load EA + block->mov(0, alu2::reg((has_pre_index ? (reg_t)3 : base))); + + // Do Writeback + if ((!has_pre_index) || has_write_back) + { + block->mov(base, alu2::reg(3)); + regman->mark_dirty(base); + } + + // DO + if (!has_load) + { + switch (op) + { + case 1: block->uxth(1, dest); break; + case 2: block->sxtb(1, dest); break; + case 3: block->sxth(1, dest); break; + } + } + + uint32_t func_idx = block_procnum | (has_load ? 0 : 2) | ((op == 2) ? 0 : 4); + block->load_constant(2, mem_funcs[func_idx]); + call(2); + + if (has_load) + { + switch (op) + { + case 1: block->uxth(dest, 0); break; + case 2: block->sxtb(dest, 0); break; + case 3: block->sxth(dest, 0); break; + } + + regman->mark_dirty(dest); + } + + if (cond != AL) + { + block->set_label("skip"); + block->resolve_label("run"); + block->resolve_label("skip"); + } + + // TODO: + return OPR_RESULT(OPR_CONTINUE, 3); +} + +#define ARM_MEM_HALF_OP_DEF2(T, P) \ + static const ArmOpCompiler ARM_OP_##T##_##P##M_REG_OFF = ARM_OP_MEM_HALF; \ + static const ArmOpCompiler ARM_OP_##T##_##P##P_REG_OFF = ARM_OP_MEM_HALF; \ + static const ArmOpCompiler ARM_OP_##T##_##P##M_IMM_OFF = ARM_OP_MEM_HALF; \ + static const ArmOpCompiler ARM_OP_##T##_##P##P_IMM_OFF = ARM_OP_MEM_HALF + +#define ARM_MEM_HALF_OP_DEF(T) \ + ARM_MEM_HALF_OP_DEF2(T, POS_INDE_); \ + ARM_MEM_HALF_OP_DEF2(T, ); \ + ARM_MEM_HALF_OP_DEF2(T, PRE_INDE_) + +ARM_MEM_HALF_OP_DEF(STRH); +ARM_MEM_HALF_OP_DEF(LDRH); +ARM_MEM_HALF_OP_DEF(STRSB); +ARM_MEM_HALF_OP_DEF(LDRSB); +ARM_MEM_HALF_OP_DEF(STRSH); +ARM_MEM_HALF_OP_DEF(LDRSH); + +// +#define SIGNEXTEND_24(i) (((s32)i<<8)>>8) +static OP_RESULT ARM_OP_B_BL(uint32_t pc, uint32_t opcode) +{ + const AG_COND cond = (AG_COND)bit(opcode, 28, 4); + const bool has_link = bit(opcode, 24); + + const bool unconditional = (cond == AL || cond == EGG); + int32_t regs[1] = { (has_link || cond == EGG) ? 14 : -1 }; + regman->get(1, regs); + + uint32_t dest = (pc + 8 + (SIGNEXTEND_24(bit(opcode, 0, 24)) << 2)); + + if (!unconditional) + { + block->load_constant(0, pc + 4); + + block->b("run", cond); + block->b("skip"); + block->set_label("run"); + } + + if (cond == EGG) + { + change_mode(true); + + if (has_link) + { + dest += 2; + } + } + + if (has_link || cond == EGG) + { + block->load_constant(regs[0], pc + 4); + regman->mark_dirty(regs[0]); + } + + block->load_constant(0, dest); + + if (!unconditional) + { + block->set_label("skip"); + block->resolve_label("run"); + block->resolve_label("skip"); + } + + block->str(0, RCPU, mem2::imm(offsetof(armcpu_t, instruct_adr))); + + + // TODO: Timing + return OPR_RESULT(OPR_BRANCHED, 3); +} + +#define ARM_OP_B ARM_OP_B_BL +#define ARM_OP_BL ARM_OP_B_BL + +//// + +#define ARM_OP_LDRD_STRD_POST_INDEX 0 +#define ARM_OP_LDRD_STRD_OFFSET_PRE_INDEX 0 +#define ARM_OP_MRS_CPSR 0 +#define ARM_OP_SWP 0 +#define ARM_OP_MSR_CPSR 0 +#define ARM_OP_BX 0 +#define ARM_OP_BLX_REG 0 +#define ARM_OP_BKPT 0 +#define ARM_OP_MRS_SPSR 0 +#define ARM_OP_SWPB 0 +#define ARM_OP_MSR_SPSR 0 +#define ARM_OP_STREX 0 +#define ARM_OP_LDREX 0 +#define ARM_OP_MSR_CPSR_IMM_VAL 0 +#define ARM_OP_MSR_SPSR_IMM_VAL 0 +#define ARM_OP_STMDA 0 +#define ARM_OP_LDMDA 0 +#define ARM_OP_STMDA_W 0 +#define ARM_OP_LDMDA_W 0 +#define ARM_OP_STMDA2 0 +#define ARM_OP_LDMDA2 0 +#define ARM_OP_STMDA2_W 0 +#define ARM_OP_LDMDA2_W 0 +#define ARM_OP_STMIA 0 +#define ARM_OP_LDMIA 0 +#define ARM_OP_STMIA_W 0 +#define ARM_OP_LDMIA_W 0 +#define ARM_OP_STMIA2 0 +#define ARM_OP_LDMIA2 0 +#define ARM_OP_STMIA2_W 0 +#define ARM_OP_LDMIA2_W 0 +#define ARM_OP_STMDB 0 +#define ARM_OP_LDMDB 0 +#define ARM_OP_STMDB_W 0 +#define ARM_OP_LDMDB_W 0 +#define ARM_OP_STMDB2 0 +#define ARM_OP_LDMDB2 0 +#define ARM_OP_STMDB2_W 0 +#define ARM_OP_LDMDB2_W 0 +#define ARM_OP_STMIB 0 +#define ARM_OP_LDMIB 0 +#define ARM_OP_STMIB_W 0 +#define ARM_OP_LDMIB_W 0 +#define ARM_OP_STMIB2 0 +#define ARM_OP_LDMIB2 0 +#define ARM_OP_STMIB2_W 0 +#define ARM_OP_LDMIB2_W 0 +#define ARM_OP_STC_OPTION 0 +#define ARM_OP_LDC_OPTION 0 +#define ARM_OP_STC_M_POSTIND 0 +#define ARM_OP_LDC_M_POSTIND 0 +#define ARM_OP_STC_P_POSTIND 0 +#define ARM_OP_LDC_P_POSTIND 0 +#define ARM_OP_STC_M_IMM_OFF 0 +#define ARM_OP_LDC_M_IMM_OFF 0 +#define ARM_OP_STC_M_PREIND 0 +#define ARM_OP_LDC_M_PREIND 0 +#define ARM_OP_STC_P_IMM_OFF 0 +#define ARM_OP_LDC_P_IMM_OFF 0 +#define ARM_OP_STC_P_PREIND 0 +#define ARM_OP_LDC_P_PREIND 0 +#define ARM_OP_CDP 0 +#define ARM_OP_MCR 0 +#define ARM_OP_MRC 0 +#define ARM_OP_SWI 0 +#define ARM_OP_UND 0 +static const ArmOpCompiler arm_instruction_compilers[4096] = { +#define TABDECL(x) ARM_##x +#include "instruction_tabdef.inc" +#undef TABDECL +}; + +//////// +// THUMB +//////// +static OP_RESULT THUMB_OP_SHIFT(uint32_t pc, uint32_t opcode) +{ + const uint32_t rd = bit(opcode, 0, 3); + const uint32_t rs = bit(opcode, 3, 3); + const uint32_t imm = bit(opcode, 6, 5); + const AG_ALU_SHIFT op = (AG_ALU_SHIFT)bit(opcode, 11, 2); + + int32_t regs[2] = { rd | 0x10, rs }; + regman->get(2, regs); + + const reg_t nrd = regs[0]; + const reg_t nrs = regs[1]; + + block->movs(nrd, alu2::reg_shift_imm(nrs, op, imm)); + mark_status_dirty(); + + regman->mark_dirty(nrd); + + return OPR_RESULT(OPR_CONTINUE, 1); +} + +static OP_RESULT THUMB_OP_ADDSUB_REGIMM(uint32_t pc, uint32_t opcode) +{ + const uint32_t rd = bit(opcode, 0, 3); + const uint32_t rs = bit(opcode, 3, 3); + const AG_ALU_OP op = bit(opcode, 9) ? SUBS : ADDS; + const bool arg_type = bit(opcode, 10); + const uint32_t arg = bit(opcode, 6, 3); + + int32_t regs[3] = { rd | 0x10, rs, (!arg_type) ? arg : -1 }; + regman->get(3, regs); + + const reg_t nrd = regs[0]; + const reg_t nrs = regs[1]; + + if (arg_type) // Immediate + { + block->alu_op(op, nrd, nrs, alu2::imm(arg)); + mark_status_dirty(); + } + else + { + block->alu_op(op, nrd, nrs, alu2::reg(regs[2])); + mark_status_dirty(); + } + + regman->mark_dirty(nrd); + + return OPR_RESULT(OPR_CONTINUE, 1); +} + +static OP_RESULT THUMB_OP_MCAS_IMM8(uint32_t pc, uint32_t opcode) +{ + const reg_t rd = bit(opcode, 8, 3); + const uint32_t op = bit(opcode, 11, 2); + const uint32_t imm = bit(opcode, 0, 8); + + int32_t regs[1] = { rd }; + regman->get(1, regs); + const reg_t nrd = regs[0]; + + switch (op) + { + case 0: block->alu_op(MOVS, nrd, nrd, alu2::imm(imm)); break; + case 1: block->alu_op(CMP , nrd, nrd, alu2::imm(imm)); break; + case 2: block->alu_op(ADDS, nrd, nrd, alu2::imm(imm)); break; + case 3: block->alu_op(SUBS, nrd, nrd, alu2::imm(imm)); break; + } + + mark_status_dirty(); + + if (op != 1) // Don't keep the result of a CMP instruction + { + regman->mark_dirty(nrd); + } + + return OPR_RESULT(OPR_CONTINUE, 1); +} + +static OP_RESULT THUMB_OP_ALU(uint32_t pc, uint32_t opcode) +{ + const uint32_t rd = bit(opcode, 0, 3); + const uint32_t rs = bit(opcode, 3, 3); + const uint32_t op = bit(opcode, 6, 4); + bool need_writeback = false; + + if (op == 13) // TODO: The MULS is interpreted for now + { + return OPR_INTERPRET; + } + + int32_t regs[2] = { rd, rs }; + regman->get(2, regs); + + const reg_t nrd = regs[0]; + const reg_t nrs = regs[1]; + + switch (op) + { + case 0: block->ands(nrd, alu2::reg(nrs)); break; + case 1: block->eors(nrd, alu2::reg(nrs)); break; + case 5: block->adcs(nrd, alu2::reg(nrs)); break; + case 6: block->sbcs(nrd, alu2::reg(nrs)); break; + case 8: block->tst (nrd, alu2::reg(nrs)); break; + case 10: block->cmp (nrd, alu2::reg(nrs)); break; + case 11: block->cmn (nrd, alu2::reg(nrs)); break; + case 12: block->orrs(nrd, alu2::reg(nrs)); break; + case 14: block->bics(nrd, alu2::reg(nrs)); break; + case 15: block->mvns(nrd, alu2::reg(nrs)); break; + + case 2: block->movs(nrd, alu2::reg_shift_reg(nrd, LSL, nrs)); break; + case 3: block->movs(nrd, alu2::reg_shift_reg(nrd, LSR, nrs)); break; + case 4: block->movs(nrd, alu2::reg_shift_reg(nrd, ASR, nrs)); break; + case 7: block->movs(nrd, alu2::reg_shift_reg(nrd, arm_gen::ROR, nrs)); break; + + case 9: block->rsbs(nrd, nrs, alu2::imm(0)); break; + } + + mark_status_dirty(); + + static const bool op_wb[16] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1 }; + if (op_wb[op]) + { + regman->mark_dirty(nrd); + } + + return OPR_RESULT(OPR_CONTINUE, 1); +} + +static OP_RESULT THUMB_OP_SPE(uint32_t pc, uint32_t opcode) +{ + const uint32_t rd = bit(opcode, 0, 3) + (bit(opcode, 7) ? 8 : 0); + const uint32_t rs = bit(opcode, 3, 4); + const uint32_t op = bit(opcode, 8, 2); + + if (rd == 0xF || rs == 0xF) + { + return OPR_INTERPRET; + } + + int32_t regs[2] = { rd, rs }; + regman->get(2, regs); + + const reg_t nrd = regs[0]; + const reg_t nrs = regs[1]; + + switch (op) + { + case 0: block->add(nrd, alu2::reg(nrs)); break; + case 1: block->cmp(nrd, alu2::reg(nrs)); break; + case 2: block->mov(nrd, alu2::reg(nrs)); break; + } + + if (op != 1) + { + regman->mark_dirty(nrd); + } + else + { + mark_status_dirty(); + } + + return OPR_RESULT(OPR_CONTINUE, 1); +} + +static OP_RESULT THUMB_OP_MEMORY_DELEGATE(uint32_t pc, uint32_t opcode, bool LOAD, uint32_t SIZE, uint32_t EXTEND, bool REG_OFFSET) +{ + const uint32_t rd = bit(opcode, 0, 3); + const uint32_t rb = bit(opcode, 3, 3); + const uint32_t ro = bit(opcode, 6, 3); + const uint32_t off = bit(opcode, 6, 5); + + int32_t regs[3] = { rd | (LOAD ? 0x10 : 0), rb, REG_OFFSET ? ro : -1}; + regman->get(3, regs); + + const reg_t dest = regs[0]; + const reg_t base = regs[1]; + + // Calc EA + + if (REG_OFFSET) + { + const reg_t offset = regs[2]; + block->mov(0, alu2::reg(base)); + block->add(0, alu2::reg(offset)); + } + else + { + block->add(0, base, alu2::imm(off << SIZE)); + } + + // Load access function + block->load_constant(2, mem_funcs[(SIZE << 2) + (LOAD ? 0 : 2) + block_procnum]); + + if (!LOAD) + { + block->mov(1, alu2::reg(dest)); + } + + call(2); + + if (LOAD) + { + if (EXTEND) + { + if (SIZE == 0) + { + block->sxtb(dest, 0); + } + else + { + block->sxth(dest, 0); + } + } + else + { + block->mov(dest, alu2::reg(0)); + } + + regman->mark_dirty(dest); + } + + // TODO + return OPR_RESULT(OPR_CONTINUE, 3); +} + +// SIZE: 0=8, 1=16, 2=32 +template +static OP_RESULT THUMB_OP_MEMORY(uint32_t pc, uint32_t opcode) +{ + return THUMB_OP_MEMORY_DELEGATE(pc, opcode, LOAD, SIZE, EXTEND, REG_OFFSET); +} + +static OP_RESULT THUMB_OP_LDR_PCREL(uint32_t pc, uint32_t opcode) +{ + const uint32_t offset = bit(opcode, 0, 8); + const reg_t rd = bit(opcode, 8, 3); + + int32_t regs[1] = { rd | 0x10 }; + regman->get(1, regs); + + const reg_t dest = regs[0]; + + block->load_constant(0, ((pc + 4) & ~2) + (offset << 2)); + block->load_constant(2, mem_funcs[8 + block_procnum]); + call(2); + block->mov(dest, alu2::reg(0)); + + regman->mark_dirty(dest); + return OPR_RESULT(OPR_CONTINUE, 3); +} + +static OP_RESULT THUMB_OP_STR_SPREL(uint32_t pc, uint32_t opcode) +{ + const uint32_t offset = bit(opcode, 0, 8); + const reg_t rd = bit(opcode, 8, 3); + + int32_t regs[2] = { rd, 13 }; + regman->get(2, regs); + + const reg_t src = regs[0]; + const reg_t base = regs[1]; + + block->add(0, base, alu2::imm_rol(offset, 2)); + block->mov(1, alu2::reg(src)); + block->load_constant(2, mem_funcs[10 + block_procnum]); + call(2); + + return OPR_RESULT(OPR_CONTINUE, 3); +} + +static OP_RESULT THUMB_OP_LDR_SPREL(uint32_t pc, uint32_t opcode) +{ + const uint32_t offset = bit(opcode, 0, 8); + const reg_t rd = bit(opcode, 8, 3); + + int32_t regs[2] = { rd | 0x10, 13 }; + regman->get(2, regs); + + const reg_t dest = regs[0]; + const reg_t base = regs[1]; + + block->add(0, base, alu2::imm_rol(offset, 2)); + block->load_constant(2, mem_funcs[8 + block_procnum]); + call(2); + block->mov(dest, alu2::reg(0)); + + regman->mark_dirty(dest); + return OPR_RESULT(OPR_CONTINUE, 3); +} + +static OP_RESULT THUMB_OP_B_COND(uint32_t pc, uint32_t opcode) +{ + const AG_COND cond = (AG_COND)bit(opcode, 8, 4); + + block->load_constant(0, pc + 2); + block->load_constant(0, (pc + 4) + ((u32)((s8)(opcode&0xFF))<<1), cond); + block->str(0, RCPU, mem2::imm(offsetof(armcpu_t, instruct_adr))); + + block->add(RCYC, alu2::imm(2), cond); + + return OPR_RESULT(OPR_BRANCHED, 1); +} + +static OP_RESULT THUMB_OP_B_UNCOND(uint32_t pc, uint32_t opcode) +{ + int32_t offs = (opcode & 0x7FF) | (bit(opcode, 10) ? 0xFFFFF800 : 0); + block->load_constant(0, pc + 4 + (offs << 1)); + + block->str(0, RCPU, mem2::imm(offsetof(armcpu_t, instruct_adr))); + + return OPR_RESULT(OPR_BRANCHED, 3); +} + +static OP_RESULT THUMB_OP_ADJUST_SP(uint32_t pc, uint32_t opcode) +{ + const uint32_t offs = bit(opcode, 0, 7); + + int32_t regs[1] = { 13 }; + regman->get(1, regs); + + const reg_t sp = regs[0]; + + if (bit(opcode, 7)) block->sub(sp, alu2::imm_rol(offs, 2)); + else block->add(sp, alu2::imm_rol(offs, 2)); + + regman->mark_dirty(sp); + + return OPR_RESULT(OPR_CONTINUE, 1); +} + +static OP_RESULT THUMB_OP_ADD_2PC(uint32_t pc, uint32_t opcode) +{ + const uint32_t offset = bit(opcode, 0, 8); + const reg_t rd = bit(opcode, 8, 3); + + int32_t regs[1] = { rd | 0x10 }; + regman->get(1, regs); + + const reg_t dest = regs[0]; + + block->load_constant(dest, ((pc + 4) & 0xFFFFFFFC) + (offset << 2)); + regman->mark_dirty(dest); + + return OPR_RESULT(OPR_CONTINUE, 1); +} + +static OP_RESULT THUMB_OP_ADD_2SP(uint32_t pc, uint32_t opcode) +{ + const uint32_t offset = bit(opcode, 0, 8); + const reg_t rd = bit(opcode, 8, 3); + + int32_t regs[2] = { 13, rd | 0x10 }; + regman->get(2, regs); + + const reg_t sp = regs[0]; + const reg_t dest = regs[1]; + + block->add(dest, sp, alu2::imm_rol(offset, 2)); + regman->mark_dirty(dest); + + return OPR_RESULT(OPR_CONTINUE, 1); +} + +static OP_RESULT THUMB_OP_BX_BLX_THUMB(uint32_t pc, uint32_t opcode) +{ + const reg_t rm = bit(opcode, 3, 4); + const bool link = bit(opcode, 7); + + if (rm == 15) + return OPR_INTERPRET; + + block->load_constant(0, pc + 4); + + int32_t regs[2] = { link ? 14 : -1, (rm != 15) ? (int32_t)rm : -1 }; + regman->get(2, regs); + + if (link) + { + const reg_t lr = regs[0]; + block->sub(lr, 0, alu2::imm(1)); + regman->mark_dirty(lr); + } + + reg_t target = regs[1]; + + change_mode_reg(target, 2, 3); + block->bic(0, target, alu2::imm(1)); + block->str(0, RCPU, mem2::imm(offsetof(armcpu_t, instruct_adr))); + + return OPR_RESULT(OPR_BRANCHED, 3); +} + +#if 1 +#define THUMB_OP_BL_LONG 0 +#else +static OP_RESULT THUMB_OP_BL_LONG(uint32_t pc, uint32_t opcode) +{ + static const uint32_t op = bit(opcode, 11, 5); + int32_t offset = bit(opcode, 0, 11); + + reg_t lr = regman->get(14, op == 0x1E); + + if (op == 0x1E) + { + offset |= (offset & 0x400) ? 0xFFFFF800 : 0; + block->load_constant(lr, (pc + 4) + (offset << 12)); + } + else + { + block->load_constant(0, offset << 1); + + block->add(0, lr, alu2::reg(0)); + block->str(0, RCPU, mem2::imm(offsetof(armcpu_t, instruct_adr))); + + block->load_constant(lr, pc + 3); + + if (op != 0x1F) + { + change_mode(false); + } + } + + regman->mark_dirty(lr); + + if (op == 0x1E) + { + return OPR_RESULT(OPR_CONTINUE, 1); + } + else + { + return OPR_RESULT(OPR_BRANCHED, (op == 0x1F) ? 3 : 4); + } +} +#endif + +#define THUMB_OP_INTERPRET 0 +#define THUMB_OP_UND_THUMB THUMB_OP_INTERPRET + +#define THUMB_OP_LSL THUMB_OP_SHIFT +#define THUMB_OP_LSL_0 THUMB_OP_SHIFT +#define THUMB_OP_LSR THUMB_OP_SHIFT +#define THUMB_OP_LSR_0 THUMB_OP_SHIFT +#define THUMB_OP_ASR THUMB_OP_SHIFT +#define THUMB_OP_ASR_0 THUMB_OP_SHIFT + +#define THUMB_OP_ADD_REG THUMB_OP_ADDSUB_REGIMM +#define THUMB_OP_SUB_REG THUMB_OP_ADDSUB_REGIMM +#define THUMB_OP_ADD_IMM3 THUMB_OP_ADDSUB_REGIMM +#define THUMB_OP_SUB_IMM3 THUMB_OP_ADDSUB_REGIMM + +#define THUMB_OP_MOV_IMM8 THUMB_OP_MCAS_IMM8 +#define THUMB_OP_CMP_IMM8 THUMB_OP_MCAS_IMM8 +#define THUMB_OP_ADD_IMM8 THUMB_OP_MCAS_IMM8 +#define THUMB_OP_SUB_IMM8 THUMB_OP_MCAS_IMM8 + +#define THUMB_OP_AND THUMB_OP_ALU +#define THUMB_OP_EOR THUMB_OP_ALU +#define THUMB_OP_LSL_REG THUMB_OP_ALU +#define THUMB_OP_LSR_REG THUMB_OP_ALU +#define THUMB_OP_ASR_REG THUMB_OP_ALU +#define THUMB_OP_ADC_REG THUMB_OP_ALU +#define THUMB_OP_SBC_REG THUMB_OP_ALU +#define THUMB_OP_ROR_REG THUMB_OP_ALU +#define THUMB_OP_TST THUMB_OP_ALU +#define THUMB_OP_NEG THUMB_OP_ALU +#define THUMB_OP_CMP THUMB_OP_ALU +#define THUMB_OP_CMN THUMB_OP_ALU +#define THUMB_OP_ORR THUMB_OP_ALU +#define THUMB_OP_MUL_REG THUMB_OP_INTERPRET +#define THUMB_OP_BIC THUMB_OP_ALU +#define THUMB_OP_MVN THUMB_OP_ALU + +#define THUMB_OP_ADD_SPE THUMB_OP_SPE +#define THUMB_OP_CMP_SPE THUMB_OP_SPE +#define THUMB_OP_MOV_SPE THUMB_OP_SPE + +#define THUMB_OP_ADJUST_P_SP THUMB_OP_ADJUST_SP +#define THUMB_OP_ADJUST_M_SP THUMB_OP_ADJUST_SP + +#define THUMB_OP_LDRB_REG_OFF THUMB_OP_MEMORY +#define THUMB_OP_LDRH_REG_OFF THUMB_OP_MEMORY +#define THUMB_OP_LDR_REG_OFF THUMB_OP_MEMORY + +#define THUMB_OP_STRB_REG_OFF THUMB_OP_MEMORY +#define THUMB_OP_STRH_REG_OFF THUMB_OP_MEMORY +#define THUMB_OP_STR_REG_OFF THUMB_OP_MEMORY + +#define THUMB_OP_LDRB_IMM_OFF THUMB_OP_MEMORY +#define THUMB_OP_LDRH_IMM_OFF THUMB_OP_MEMORY +#define THUMB_OP_LDR_IMM_OFF THUMB_OP_MEMORY + +#define THUMB_OP_STRB_IMM_OFF THUMB_OP_MEMORY +#define THUMB_OP_STRH_IMM_OFF THUMB_OP_MEMORY +#define THUMB_OP_STR_IMM_OFF THUMB_OP_MEMORY + +#define THUMB_OP_LDRSB_REG_OFF THUMB_OP_MEMORY +#define THUMB_OP_LDRSH_REG_OFF THUMB_OP_MEMORY + +#define THUMB_OP_BX_THUMB THUMB_OP_BX_BLX_THUMB +#define THUMB_OP_BLX_THUMB THUMB_OP_BX_BLX_THUMB +#define THUMB_OP_BL_10 THUMB_OP_BL_LONG +#define THUMB_OP_BL_11 THUMB_OP_BL_LONG +#define THUMB_OP_BLX THUMB_OP_BL_LONG + + +// UNDEFINED OPS +#define THUMB_OP_PUSH THUMB_OP_INTERPRET +#define THUMB_OP_PUSH_LR THUMB_OP_INTERPRET +#define THUMB_OP_POP THUMB_OP_INTERPRET +#define THUMB_OP_POP_PC THUMB_OP_INTERPRET +#define THUMB_OP_BKPT_THUMB THUMB_OP_INTERPRET +#define THUMB_OP_STMIA_THUMB THUMB_OP_INTERPRET +#define THUMB_OP_LDMIA_THUMB THUMB_OP_INTERPRET +#define THUMB_OP_SWI_THUMB THUMB_OP_INTERPRET + +static const ArmOpCompiler thumb_instruction_compilers[1024] = { +#define TABDECL(x) THUMB_##x +#include "thumb_tabdef.inc" +#undef TABDECL +}; + + + +// ============================================================================================= IMM + +//----------------------------------------------------------------------------- +// Compiler +//----------------------------------------------------------------------------- + +static u32 instr_attributes(bool thumb, u32 opcode) +{ + return thumb ? thumb_attributes[opcode>>6] + : instruction_attributes[INSTRUCTION_INDEX(opcode)]; +} + +static bool instr_is_branch(bool thumb, u32 opcode) +{ + u32 x = instr_attributes(thumb, opcode); + if(thumb) + return (x & BRANCH_ALWAYS) + || ((x & BRANCH_POS0) && ((opcode&7) | ((opcode>>4)&8)) == 15) + || (x & BRANCH_SWI) + || (x & JIT_BYPASS); + else + return (x & BRANCH_ALWAYS) + || ((x & BRANCH_POS12) && REG_POS(opcode,12) == 15) + || ((x & BRANCH_LDM) && BIT15(opcode)) + || (x & BRANCH_SWI) + || (x & JIT_BYPASS); +} + + + +template +static ArmOpCompiled compile_basicblock() +{ + block_procnum = PROCNUM; + + const bool thumb = ARMPROC.CPSR.bits.T == 1; + const u32 base = ARMPROC.instruct_adr; + const u32 isize = thumb ? 2 : 4; + + uint32_t pc = base; + bool compiled_op = true; + bool has_ended = false; + uint32_t constant_cycles = 0; + + // NOTE: Expected register usage + // R5 = Pointer to ARMPROC + // R6 = Cycle counter + + regman->reset(); + block->push(0x4DF0); + + block->load_constant(RCPU, (uint32_t)&ARMPROC); + block->load_constant(RCYC, 0); + + load_status(3); + + for (uint32_t i = 0; i < CommonSettings.jit_max_block_size && !has_ended; i ++, pc += isize) + { + uint32_t opcode = thumb ? _MMU_read16(pc) : _MMU_read32(pc); + + ArmOpCompiler compiler = thumb ? thumb_instruction_compilers[opcode >> 6] + : arm_instruction_compilers[INSTRUCTION_INDEX(opcode)]; + + int result = compiler ? compiler(pc, opcode) : OPR_INTERPRET; + + constant_cycles += OPR_RESULT_CYCLES(result); + switch (OPR_RESULT_ACTION(result)) + { + case OPR_INTERPRET: + { + if (compiled_op) + { + arm_jit_prefetch(pc, opcode, thumb); + compiled_op = false; + } + + regman->flush_all(); + regman->reset(); + + block->load_constant(0, (uint32_t)&armcpu_exec); + call(0); + block->add(RCYC, alu2::reg(0)); + + has_ended = has_ended || instr_is_branch(thumb, opcode); + + break; + } + + case OPR_BRANCHED: + { + has_ended = true; + compiled_op = false; + break; + } + + case OPR_CONTINUE: + { + compiled_op = true; + break; + } + } + } + + if (compiled_op) + { + block->load_constant(0, pc); + block->str(0, RCPU, mem2::imm(offsetof(armcpu_t, instruct_adr))); + } + + write_status(3); + + regman->flush_all(); + regman->reset(); + + block->load_constant(1, constant_cycles); + block->add(0, 1, alu2::reg(RCYC)); + + block->pop(0x8DF0); + + void* fn_ptr = block->fn_pointer(); + JIT_COMPILED_FUNC(base, PROCNUM) = (uintptr_t)fn_ptr; + return (ArmOpCompiled)fn_ptr; +} + + +template u32 arm_jit_compile() +{ + u32 adr = ARMPROC.instruct_adr; + u32 mask_adr = (adr & 0x07FFFFFE) >> 4; + if(((recompile_counts[mask_adr >> 1] >> 4*(mask_adr & 1)) & 0xF) > 8) + { + ArmOpCompiled f = op_decode[PROCNUM][ARMPROC.CPSR.bits.T]; + JIT_COMPILED_FUNC(adr, PROCNUM) = (uintptr_t)f; + return f(); + } + + recompile_counts[mask_adr >> 1] += 1 << 4*(mask_adr & 1); + + if (block->instructions_remaining() < 1000) + { + arm_jit_reset(true); + } + + return compile_basicblock()(); +} + +template u32 arm_jit_compile<0>(); +template u32 arm_jit_compile<1>(); + +void arm_jit_reset(bool enable, bool suppress_msg) +{ + if (!suppress_msg) + printf("CPU mode: %s\n", enable?"JIT":"Interpreter"); + + saveBlockSizeJIT = CommonSettings.jit_max_block_size; + + if (enable) + { + printf("JIT: max block size %d instruction(s)\n", CommonSettings.jit_max_block_size); + +#ifdef MAPPED_JIT_FUNCS + + #define JITFREE(x) memset(x,0,sizeof(x)); + JITFREE(JIT.MAIN_MEM); + JITFREE(JIT.SWIRAM); + JITFREE(JIT.ARM9_ITCM); + JITFREE(JIT.ARM9_LCDC); + JITFREE(JIT.ARM9_BIOS); + JITFREE(JIT.ARM7_BIOS); + JITFREE(JIT.ARM7_ERAM); + JITFREE(JIT.ARM7_WIRAM); + JITFREE(JIT.ARM7_WRAM); + #undef JITFREE + + memset(recompile_counts, 0, sizeof(recompile_counts)); + init_jit_mem(); + +#else + for(int i=0; i +#include "arm_gen.h" +#include "armcpu.h" + +extern const arm_gen::reg_t RCPU; + +class register_manager +{ + public: + register_manager(arm_gen::code_pool* apool) : pool(apool) + { + reset(); + } + + void reset() + { + memset(mapping, 0xFF, sizeof(mapping)); + memset(usage_tag, 0, sizeof(usage_tag)); + memset(dirty, 0, sizeof(dirty)); + memset(weak, 0, sizeof(weak)); + next_usage_tag = 1; + } + + bool is_usable(arm_gen::reg_t reg) const + { + static const uint32_t USE_MAP = 0xDE0; + return (USE_MAP & (1 << reg)) ? true : false; + } + + private: + int32_t find(uint32_t emu_reg_id) + { + for (int i = 0; i != 16; i ++) + { + if (is_usable(i) && mapping[i] == emu_reg_id) + { + usage_tag[i] = next_usage_tag ++; + assert(is_usable(i)); + return i; + } + } + + return -1; + } + + int32_t get_loaded(uint32_t emu_reg_id, bool no_read) + { + int32_t current = find(emu_reg_id); + + if (current >= 0) + { + if (weak[current] && !no_read) + { + read_emu(current, emu_reg_id); + weak[current] = false; + } + } + + return current; + } + + arm_gen::reg_t get_oldest() + { + uint32_t result = 0; + uint32_t lowtag = 0xFFFFFFFF; + + for (int i = 0; i != 16; i ++) + { + if (is_usable(i) && usage_tag[i] < lowtag) + { + lowtag = usage_tag[i]; + result = i; + } + } + + assert(is_usable(result)); + return result; + } + + public: + void get(uint32_t reg_count, int32_t* emu_reg_ids) + { + assert(reg_count < 5); + bool found[5] = { false, false, false, false, false }; + + // Find existing registers + for (uint32_t i = 0; i < reg_count; i ++) + { + if (emu_reg_ids[i] < 0) + { + found[i] = true; + } + else + { + int32_t current = get_loaded(emu_reg_ids[i] & 0xF, emu_reg_ids[i] & 0x10); + if (current >= 0) + { + emu_reg_ids[i] = current; + found[i] = true; + } + } + } + + // Load new registers + for (uint32_t i = 0; i != reg_count; i ++) + { + if (!found[i]) + { + // Search register list again, in case the same register is used twice + int32_t current = get_loaded(emu_reg_ids[i] & 0xF, emu_reg_ids[i] & 0x10); + if (current >= 0) + { + emu_reg_ids[i] = current; + found[i] = true; + } + else + { + // Read the new register + arm_gen::reg_t result = get_oldest(); + flush(result); + + if (!(emu_reg_ids[i] & 0x10)) + { + read_emu(result, emu_reg_ids[i] & 0xF); + } + + mapping[result] = emu_reg_ids[i] & 0xF; + usage_tag[result] = next_usage_tag ++; + weak[result] = (emu_reg_ids[i] & 0x10) ? true : false; + + emu_reg_ids[i] = result; + found[i] = true; + } + } + } + } + + void mark_dirty(uint32_t native_reg) + { + assert(is_usable(native_reg)); + dirty[native_reg] = true; + weak[native_reg] = false; + } + + void flush(uint32_t native_reg) + { + assert(is_usable(native_reg)); + if (dirty[native_reg] && !weak[native_reg]) + { + write_emu(native_reg, mapping[native_reg]); + dirty[native_reg] = false; + } + } + + void flush_all() + { + for (int i = 0; i != 16; i ++) + { + if (is_usable(i)) + { + flush(i); + } + } + } + + private: + void read_emu(arm_gen::reg_t native, arm_gen::reg_t emu) + { + pool->ldr(native, RCPU, arm_gen::mem2::imm(offsetof(armcpu_t, R) + 4 * emu)); + } + + void write_emu(arm_gen::reg_t native, arm_gen::reg_t emu) + { + pool->str(native, RCPU, arm_gen::mem2::imm(offsetof(armcpu_t, R) + 4 * emu)); + } + + private: + arm_gen::code_pool* pool; + + uint32_t mapping[16]; // Mapping[native] = emu + uint32_t usage_tag[16]; + bool dirty[16]; + bool weak[16]; + + uint32_t next_usage_tag; +}; + +#endif