/* Copyright 2020 flyinghead This file is part of flycast. flycast is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. flycast is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with flycast. If not, see . */ #include "build.h" #if HOST_CPU == CPU_ARM && FEAT_AREC != DYNAREC_NONE #include "arm7_rec.h" #include "hw/mem/_vmem.h" #define _DEVEL 1 #define EMIT_I aicaarm::armEmit32(I) #define EMIT_GET_PTR() aicaarm::recompiler::currentCode() namespace aicaarm { static void armEmit32(u32 emit32); } #include "arm_emitter/arm_emitter.h" #undef I using namespace ARM; namespace aicaarm { static void (*arm_dispatch)(); static void loadReg(eReg host_reg, Arm7Reg guest_reg, ArmOp::Condition cc = ArmOp::AL) { LDR(host_reg, r8, (u8*)&arm_Reg[guest_reg].I - (u8*)&arm_Reg[0].I, ARM::Offset, (ARM::ConditionCode)cc); } static void storeReg(eReg host_reg, Arm7Reg guest_reg, ArmOp::Condition cc = ArmOp::AL) { STR(host_reg, r8, (u8*)&arm_Reg[guest_reg].I - (u8*)&arm_Reg[0].I, ARM::Offset, (ARM::ConditionCode)cc); } static const std::array alloc_regs{ r5, r6, r7, r9, r10, r11 }; class Arm32ArmRegAlloc : public ArmRegAlloc { using super = ArmRegAlloc; void LoadReg(int host_reg, Arm7Reg armreg, ArmOp::Condition cc = ArmOp::AL) { // printf("LoadReg R%d <- r%d\n", host_reg, armreg); loadReg(getReg(host_reg), armreg, cc); } void StoreReg(int host_reg, Arm7Reg armreg, ArmOp::Condition cc = ArmOp::AL) { // printf("StoreReg R%d -> r%d\n", host_reg, armreg); storeReg(getReg(host_reg), armreg, cc); } static eReg getReg(int i) { verify(i >= 0 && (u32)i < alloc_regs.size()); return alloc_regs[i]; } public: Arm32ArmRegAlloc(const std::vector& block_ops) : super(block_ops) {} eReg map(Arm7Reg r) { int i = super::map(r); return getReg(i); } friend super; }; static void armEmit32(u32 emit32) { if (recompiler::spaceLeft() <= 1024) { ERROR_LOG(AICA_ARM, "JIT buffer full: %d bytes free", recompiler::spaceLeft()); die("AICA ARM code buffer full"); } *(u32 *)recompiler::currentCode() = emit32; recompiler::advance(4); } static Arm32ArmRegAlloc *regalloc; static void loadFlags() { //Load flags loadReg(r3, RN_PSR_FLAGS); //move them to flags register MSR(0, 8, r3); } static void storeFlags() { //get results from flags register MRS(r3, 0); //Store flags storeReg(r3, RN_PSR_FLAGS); } static u32 *startConditional(ArmOp::Condition cc) { if (cc == ArmOp::AL) return nullptr; verify(cc <= ArmOp::LE); ARM::ConditionCode condition = (ARM::ConditionCode)((u32)cc ^ 1); u32 *code = (u32 *)recompiler::currentCode(); JUMP((u32)code, condition); return code; } static void endConditional(u32 *pos) { if (pos != nullptr) { u32 *curpos = (u32 *)recompiler::currentCode(); ARM::ConditionCode condition = (ARM::ConditionCode)(*pos >> 28); recompiler::icPtr = (u8 *)pos; JUMP((u32)curpos, condition); recompiler::icPtr = (u8 *)curpos; } } static eReg getOperand(ArmOp::Operand arg, eReg scratch_reg) { if (arg.isNone()) return (eReg)-1; else if (arg.isImmediate()) { if (is_i8r4(arg.getImmediate())) MOV(scratch_reg, arg.getImmediate()); else MOV32(scratch_reg, arg.getImmediate()); } else if (arg.isReg()) { if (!arg.isShifted()) return regalloc->map(arg.getReg().armreg); MOV(scratch_reg, regalloc->map(arg.getReg().armreg)); } if (!arg.shift_imm) { // Shift by register eReg shift_reg = regalloc->map(arg.shift_reg.armreg); MOV(scratch_reg, scratch_reg, (ARM::ShiftOp)arg.shift_type, shift_reg); } else { // Shift by immediate if (arg.shift_value != 0 || arg.shift_type != ArmOp::LSL) // LSL 0 is a no-op MOV(scratch_reg, scratch_reg, (ARM::ShiftOp)arg.shift_type, arg.shift_value); } return scratch_reg; } template void emit3ArgOp(const ArmOp& op) { eReg rn; const ArmOp::Operand *op2; if (op.op_type != ArmOp::MOV && op.op_type != ArmOp::MVN) { rn = getOperand(op.arg[0], r2); op2 = &op.arg[1]; } else op2 = &op.arg[0]; eReg rd = regalloc->map(op.rd.getReg().armreg); bool set_flags = op.flags & ArmOp::OP_SETS_FLAGS; eReg rm; if (op2->isImmediate()) { if (is_i8r4(op2->getImmediate()) && op2->shift_imm) { OpImmediate(rd, rn, op2->getImmediate(), set_flags, CC_AL); return; } MOV32(r0, op2->getImmediate()); rm = r0; } else if (op2->isReg()) rm = regalloc->map(op2->getReg().armreg); if (op2->shift_imm) OpShiftImm(rd, rn, rm, (ShiftOp)op2->shift_type, op2->shift_value, set_flags, CC_AL); else { // Shift by reg eReg shift_reg = regalloc->map(op2->shift_reg.armreg); OpShiftReg(rd, rn, rm, (ShiftOp)op2->shift_type, shift_reg, set_flags, CC_AL); } } template void emit2ArgOp(const ArmOp& op) { // Used for rd (MOV, MVN) and rn (CMP, TST, ...) eReg rd; const ArmOp::Operand *op2; if (op.op_type != ArmOp::MOV && op.op_type != ArmOp::MVN) { rd = getOperand(op.arg[0], r2); op2 = &op.arg[1]; } else { op2 = &op.arg[0]; rd = regalloc->map(op.rd.getReg().armreg); } bool set_flags = op.flags & ArmOp::OP_SETS_FLAGS; eReg rm; if (op2->isImmediate()) { if (is_i8r4(op2->getImmediate()) && op2->shift_imm) { OpImmediate(rd, op2->getImmediate(), set_flags, CC_AL); return; } MOV32(r0, op2->getImmediate()); rm = r0; } else if (op2->isReg()) rm = regalloc->map(op2->getReg().armreg); if (op2->shift_imm) OpShiftImm(rd, rm, (ShiftOp)op2->shift_type, op2->shift_value, set_flags, CC_AL); else { // Shift by reg eReg shift_reg = regalloc->map(op2->shift_reg.armreg); OpShiftReg(rd, rm, (ShiftOp)op2->shift_type, shift_reg, set_flags, CC_AL); } } static void emitDataProcOp(const ArmOp& op) { switch (op.op_type) { case ArmOp::AND: emit3ArgOp<&AND, &AND, &AND>(op); break; case ArmOp::EOR: emit3ArgOp<&EOR, &EOR, &EOR>(op); break; case ArmOp::SUB: emit3ArgOp<&SUB, &SUB, &SUB>(op); break; case ArmOp::RSB: emit3ArgOp<&RSB, &RSB, &RSB>(op); break; case ArmOp::ADD: emit3ArgOp<&ADD, &ADD, &ADD>(op); break; case ArmOp::ORR: emit3ArgOp<&ORR, &ORR, &ORR>(op); break; case ArmOp::BIC: emit3ArgOp<&BIC, &BIC, &BIC>(op); break; case ArmOp::ADC: emit3ArgOp<&ADC, &ADC, &ADC>(op); break; case ArmOp::SBC: emit3ArgOp<&SBC, &SBC, &SBC>(op); break; case ArmOp::RSC: emit3ArgOp<&RSC, &RSC, &RSC>(op); break; case ArmOp::TST: emit2ArgOp<&TST, &TST, &TST>(op); break; case ArmOp::TEQ: emit2ArgOp<&TEQ, &TEQ, &TEQ>(op); break; case ArmOp::CMP: emit2ArgOp<&CMP, &CMP, &CMP>(op); break; case ArmOp::CMN: emit2ArgOp<&CMN, &CMN, &CMN>(op); break; case ArmOp::MOV: emit2ArgOp<&MOV, &MOV, &MOV>(op); break; case ArmOp::MVN: emit2ArgOp<&MVN, &MVN, &MVN>(op); break; default: die("invalid op"); break; } } static void call(u32 addr, ARM::ConditionCode cc = ARM::CC_AL) { storeFlags(); CALL(addr, cc); loadFlags(); } static void emitMemOp(const ArmOp& op) { eReg addr_reg = getOperand(op.arg[0], r2); if (op.pre_index) { const ArmOp::Operand& offset = op.arg[1]; if (offset.isReg()) { eReg offset_reg = getOperand(offset, r3); if (op.add_offset) ADD(r0, addr_reg, offset_reg); else SUB(r0, addr_reg, offset_reg); addr_reg = r0; } else if (offset.isImmediate() && offset.getImmediate() != 0) { if (is_i8r4(offset.getImmediate())) { if (op.add_offset) ADD(r0, addr_reg, offset.getImmediate()); else SUB(r0, addr_reg, offset.getImmediate()); } else { MOV32(r0, offset.getImmediate()); if (op.add_offset) ADD(r0, addr_reg, r0); else SUB(r0, addr_reg, r0); } addr_reg = r0; } } if (addr_reg != r0) MOV(r0, addr_reg); if (op.op_type == ArmOp::STR) { if (op.arg[2].isImmediate()) { if (is_i8r4(op.arg[2].getImmediate())) MOV(r1, op.arg[2].getImmediate()); else MOV32(r1, op.arg[2].getImmediate()); } else MOV(r1, regalloc->map(op.arg[2].getReg().armreg)); } call((u32)recompiler::getMemOp(op.op_type == ArmOp::LDR, op.byte_xfer)); if (op.op_type == ArmOp::LDR) MOV(regalloc->map(op.rd.getReg().armreg), r0); } static void emitBranch(const ArmOp& op) { if (op.arg[0].isImmediate()) MOV32(r0, op.arg[0].getImmediate()); else { MOV(r0, regalloc->map(op.arg[0].getReg().armreg)); BIC(r0, r0, 3); } storeReg(r0, R15_ARM_NEXT); } static void emitMRS(const ArmOp& op) { call((u32)CPUUpdateCPSR); if (op.spsr) loadReg(regalloc->map(op.rd.getReg().armreg), RN_SPSR); else loadReg(regalloc->map(op.rd.getReg().armreg), RN_CPSR); } static void emitMSR(const ArmOp& op) { if (op.arg[0].isImmediate()) MOV32(r0, op.arg[0].getImmediate()); else MOV(r0, regalloc->map(op.arg[0].getReg().armreg)); if (op.spsr) call((u32)recompiler::MSR_do<1>); else call((u32)recompiler::MSR_do<0>); } static void emitFallback(const ArmOp& op) { //Call interpreter MOV32(r0, op.arg[0].getImmediate()); call((u32)recompiler::interpret); } void arm7backend_compile(const std::vector& block_ops, u32 cycles) { void *codestart = recompiler::currentCode(); loadReg(r2, CYCL_CNT); while (!is_i8r4(cycles)) { SUB(r2, r2, 256); cycles -= 256; } SUB(r2, r2, cycles); storeReg(r2, CYCL_CNT); regalloc = new Arm32ArmRegAlloc(block_ops); loadFlags(); for (u32 i = 0; i < block_ops.size(); i++) { const ArmOp& op = block_ops[i]; DEBUG_LOG(AICA_ARM, "-> %s", op.toString().c_str()); u32 *condPos = nullptr; if (op.op_type != ArmOp::FALLBACK) condPos = startConditional(op.condition); regalloc->load(i); if (op.op_type <= ArmOp::MVN) // data processing op emitDataProcOp(op); else if (op.op_type <= ArmOp::STR) // memory load/store emitMemOp(op); else if (op.op_type <= ArmOp::BL) // branch emitBranch(op); else if (op.op_type == ArmOp::MRS) emitMRS(op); else if (op.op_type == ArmOp::MSR) emitMSR(op); else if (op.op_type == ArmOp::FALLBACK) emitFallback(op); else die("invalid"); regalloc->store(i); endConditional(condPos); } storeFlags(); JUMP((uintptr_t)arm_dispatch); vmem_platform_flush_cache(codestart, (u8*)recompiler::currentCode() - 1, codestart, (u8*)recompiler::currentCode() - 1); delete regalloc; regalloc = nullptr; } void arm7backend_flush() { if (!recompiler::empty()) { verify(arm_mainloop != nullptr); verify(arm_compilecode != nullptr); return; } void *codestart = recompiler::currentCode(); uintptr_t arm_exit = (uintptr_t)codestart; uintptr_t arm_dofiq = (uintptr_t)codestart; // arm_mainloop: arm_mainloop = (arm_mainloop_t)codestart; u32 regList = (1 << r4) | (1 << r5) | (1 << r6) | (1 << r7) | (1 << r8) | (1 << r9) | (1 << r10) | (1 << r11) | (1 << lr); PUSH(regList); SUB(sp, sp, 4); // 8-byte stack alignment MOV(r8, r0); // load regs MOV(r4, r1); // load entry points // arm_dispatch: arm_dispatch = (void (*)())recompiler::currentCode(); loadReg(r3, CYCL_CNT); // load cycle counter loadReg(r0, R15_ARM_NEXT); // load Next PC loadReg(r1, INTR_PEND); // load Interrupt CMP(r3, 0); u8 *exit_fixup = (u8 *)recompiler::currentCode(); JUMP(arm_exit, CC_LE); // exit if counter <= 0 UBFX(r2, r0, 2, 21); // assuming 8 MB address space max (23 bits) CMP(r1, 0); u8 *dofiq_fixup = (u8 *)recompiler::currentCode(); JUMP(arm_dofiq, CC_NE); // if interrupt pending, handle it LDR(pc, r4, r2, AddrMode::Offset, true, ShiftOp::S_LSL, 2); // arm_dofiq: arm_dofiq = (uintptr_t)recompiler::currentCode(); // fix up u8 *icptr_save = (u8 *)recompiler::currentCode(); recompiler::icPtr = dofiq_fixup; JUMP(arm_dofiq, CC_NE); recompiler::icPtr = icptr_save; // end fix up CALL((uintptr_t)CPUFiq); JUMP((uintptr_t)arm_dispatch); // arm_exit: arm_exit = (uintptr_t)recompiler::currentCode(); // fix up icptr_save = (u8 *)recompiler::currentCode(); recompiler::icPtr = exit_fixup; JUMP(arm_exit, CC_LE); recompiler::icPtr = icptr_save; // end fix up ADD(sp, sp, 4); POP(regList); MOV(pc, lr); // arm_compilecode: arm_compilecode = (void (*)())recompiler::currentCode(); CALL((uintptr_t)recompiler::compile); JUMP((uintptr_t)arm_dispatch); vmem_platform_flush_cache(codestart, (u8*)recompiler::currentCode() - 1, codestart, (u8*)recompiler::currentCode() - 1); } } #endif // HOST_CPU == CPU_ARM && FEAT_AREC != DYNAREC_NONE