/* Copyright 2020 flyinghead This file is part of flycast. flycast is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. flycast is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with flycast. If not, see . */ #include "build.h" #if HOST_CPU == CPU_ARM && FEAT_AREC != DYNAREC_NONE #include "arm7_rec.h" #include "hw/mem/_vmem.h" #include using namespace vixl::aarch32; namespace aicaarm { class Arm32Assembler : public MacroAssembler { public: Arm32Assembler() = default; Arm32Assembler(u8 *buffer, size_t size) : MacroAssembler(buffer, size, A32) {} void Finalize() { FinalizeCode(); vmem_platform_flush_cache(GetBuffer()->GetStartAddress(), GetCursorAddress() - 1, GetBuffer()->GetStartAddress(), GetCursorAddress() - 1); } }; static Arm32Assembler ass; static void (*arm_dispatch)(); static void loadReg(Register host_reg, Arm7Reg guest_reg, ConditionType cc = al) { ass.Ldr(cc, host_reg, MemOperand(r8, (u8*)&arm_Reg[guest_reg].I - (u8*)&arm_Reg[0].I)); } static void storeReg(Register host_reg, Arm7Reg guest_reg, ConditionType cc = al) { ass.Str(cc, host_reg, MemOperand(r8, (u8*)&arm_Reg[guest_reg].I - (u8*)&arm_Reg[0].I)); } const std::array alloc_regs{ r5, r6, r7, r9, r10, r11 }; class Arm32ArmRegAlloc : public ArmRegAlloc { using super = ArmRegAlloc; void LoadReg(int host_reg, Arm7Reg armreg, ArmOp::Condition cc = ArmOp::AL) { loadReg(getReg(host_reg), armreg, (ConditionType)cc); } void StoreReg(int host_reg, Arm7Reg armreg, ArmOp::Condition cc = ArmOp::AL) { storeReg(getReg(host_reg), armreg, (ConditionType)cc); } static Register getReg(int i) { return alloc_regs[i]; } public: Arm32ArmRegAlloc(const std::vector& block_ops) : super(block_ops) {} Register map(Arm7Reg r) { int i = super::map(r); return getReg(i); } friend super; }; static Arm32ArmRegAlloc *regalloc; static void loadFlags() { //Load flags loadReg(r3, RN_PSR_FLAGS); //move them to flags register ass.Msr(APSR_nzcvq, r3); } static void storeFlags() { //get results from flags register ass.Mrs(r3, APSR); //Store flags storeReg(r3, RN_PSR_FLAGS); } static Label *startConditional(ArmOp::Condition cc) { if (cc == ArmOp::AL) return nullptr; ConditionType condition = (ConditionType)((u32)cc ^ 1); Label *label = new Label(); ass.B(condition, label); return label; } static void endConditional(Label *label) { if (label != nullptr) { ass.Bind(label); delete label; } } static Operand getOperand(const ArmOp::Operand& arg) { Register reg; if (arg.isNone()) return reg; if (arg.isImmediate()) { if (!arg.isShifted()) return Operand(arg.getImmediate()); // Used by pc-rel ops: pc is immediate but can be shifted by reg (or even imm if op sets flags) ass.Mov(r1, arg.getImmediate()); reg = r1; } else if (arg.isReg()) reg = regalloc->map(arg.getReg().armreg); if (arg.isShifted()) { if (!arg.shift_imm) { // Shift by register Register shift_reg = regalloc->map(arg.shift_reg.armreg); return Operand(reg, (ShiftType)arg.shift_type, shift_reg); } else { // Shift by immediate if (arg.shift_value != 0 || arg.shift_type != ArmOp::LSL) // LSL 0 is a no-op { if (arg.shift_value == 0 && arg.shift_type == ArmOp::ROR) return Operand(reg, RRX); else { u32 shiftValue = arg.shift_value; if (shiftValue == 0 && (arg.shift_type == ArmOp::LSR || arg.shift_type == ArmOp::ASR)) shiftValue = 32; return Operand(reg, (ShiftType)arg.shift_type, shiftValue); } } } } return reg; } static Register loadOperand(const ArmOp::Operand& arg, Register scratch_reg) { Operand operand = getOperand(arg); if (operand.IsPlainRegister()) return operand.GetBaseRegister(); ass.Mov(scratch_reg, operand); return scratch_reg; } template void emit3ArgOp(const ArmOp& op) { bool set_flags = op.flags & ArmOp::OP_SETS_FLAGS; Register rd = regalloc->map(op.rd.getReg().armreg); Register rn = loadOperand(op.arg[0], r2); Operand operand = getOperand(op.arg[1]); (ass.*Op)((FlagsUpdate)set_flags, al, rd, rn, operand); } template void emit2ArgOp(const ArmOp& op) { bool set_flags = op.flags & ArmOp::OP_SETS_FLAGS; Register rd = regalloc->map(op.rd.getReg().armreg); Operand operand = getOperand(op.arg[0]); (ass.*Op)((FlagsUpdate)set_flags, al, rd, operand); } template void emitTestOp(const ArmOp& op) { Register rn = loadOperand(op.arg[0], r2); Operand operand = getOperand(op.arg[1]); (ass.*Op)(al, rn, operand); } static void emitDataProcOp(const ArmOp& op) { switch (op.op_type) { case ArmOp::AND: emit3ArgOp<&MacroAssembler::And>(op); break; case ArmOp::EOR: emit3ArgOp<&MacroAssembler::Eor>(op); break; case ArmOp::SUB: emit3ArgOp<&MacroAssembler::Sub>(op); break; case ArmOp::RSB: emit3ArgOp<&MacroAssembler::Rsb>(op); break; case ArmOp::ADD: emit3ArgOp<&MacroAssembler::Add>(op); break; case ArmOp::ORR: emit3ArgOp<&MacroAssembler::Orr>(op); break; case ArmOp::BIC: emit3ArgOp<&MacroAssembler::Bic>(op); break; case ArmOp::ADC: emit3ArgOp<&MacroAssembler::Adc>(op); break; case ArmOp::SBC: emit3ArgOp<&MacroAssembler::Sbc>(op); break; case ArmOp::RSC: emit3ArgOp<&MacroAssembler::Rsc>(op); break; case ArmOp::TST: emitTestOp<&MacroAssembler::Tst>(op); break; case ArmOp::TEQ: emitTestOp<&MacroAssembler::Teq>(op); break; case ArmOp::CMP: emitTestOp<&MacroAssembler::Cmp>(op); break; case ArmOp::CMN: emitTestOp<&MacroAssembler::Cmn>(op); break; case ArmOp::MOV: emit2ArgOp<&MacroAssembler::Mov>(op); break; case ArmOp::MVN: emit2ArgOp<&MacroAssembler::Mvn>(op); break; default: die("invalid op"); break; } } static void jump(const void *code) { ptrdiff_t offset = reinterpret_cast(code) - ass.GetBuffer()->GetStartAddress(); Label code_label(offset); ass.B(&code_label); } static void call(const void *code, bool saveFlags = true) { if (saveFlags) storeFlags(); ptrdiff_t offset = reinterpret_cast(code) - ass.GetBuffer()->GetStartAddress(); Label code_label(offset); ass.Bl(&code_label); if (saveFlags) loadFlags(); } static void emitMemOp(const ArmOp& op) { Register addr_reg = loadOperand(op.arg[0], r2); if (op.pre_index) { const ArmOp::Operand& offset = op.arg[1]; if (offset.isReg()) { Register offset_reg = loadOperand(offset, r3); if (op.add_offset) ass.Add(r0, addr_reg, offset_reg); else ass.Sub(r0, addr_reg, offset_reg); addr_reg = r0; } else if (offset.isImmediate() && offset.getImmediate() != 0) { if (ImmediateA32::IsImmediateA32(offset.getImmediate())) { if (op.add_offset) ass.Add(r0, addr_reg, offset.getImmediate()); else ass.Sub(r0, addr_reg, offset.getImmediate()); } else { ass.Mov(r0, offset.getImmediate()); if (op.add_offset) ass.Add(r0, addr_reg, r0); else ass.Sub(r0, addr_reg, r0); } addr_reg = r0; } } if (!addr_reg.Is(r0)) ass.Mov(r0, addr_reg); if (op.op_type == ArmOp::STR) { if (op.arg[2].isImmediate()) ass.Mov(r1, op.arg[2].getImmediate()); else ass.Mov(r1, regalloc->map(op.arg[2].getReg().armreg)); } call(recompiler::getMemOp(op.op_type == ArmOp::LDR, op.byte_xfer)); if (op.op_type == ArmOp::LDR) ass.Mov(regalloc->map(op.rd.getReg().armreg), r0); } static void emitBranch(const ArmOp& op) { if (op.arg[0].isImmediate()) ass.Mov(r0, op.arg[0].getImmediate()); else { ass.Mov(r0, regalloc->map(op.arg[0].getReg().armreg)); ass.Bic(r0, r0, 3); } storeReg(r0, R15_ARM_NEXT); } static void emitMRS(const ArmOp& op) { call((void *)CPUUpdateCPSR); if (op.spsr) loadReg(regalloc->map(op.rd.getReg().armreg), RN_SPSR); else loadReg(regalloc->map(op.rd.getReg().armreg), RN_CPSR); } static void emitMSR(const ArmOp& op) { if (op.arg[0].isImmediate()) ass.Mov(r0, op.arg[0].getImmediate()); else ass.Mov(r0, regalloc->map(op.arg[0].getReg().armreg)); if (op.spsr) call((void *)recompiler::MSR_do<1>); else call((void *)recompiler::MSR_do<0>); } static void emitFallback(const ArmOp& op) { //Call interpreter ass.Mov(r0, op.arg[0].getImmediate()); call((void *)recompiler::interpret); } void arm7backend_compile(const std::vector& block_ops, u32 cycles) { ass = Arm32Assembler((u8 *)recompiler::currentCode(), recompiler::spaceLeft()); loadReg(r2, CYCL_CNT); while (!ImmediateA32::IsImmediateA32(cycles)) { ass.Sub(r2, r2, 256); cycles -= 256; } ass.Sub(r2, r2, cycles); storeReg(r2, CYCL_CNT); regalloc = new Arm32ArmRegAlloc(block_ops); loadFlags(); for (u32 i = 0; i < block_ops.size(); i++) { const ArmOp& op = block_ops[i]; DEBUG_LOG(AICA_ARM, "-> %s", op.toString().c_str()); Label *condLabel = nullptr; if (op.op_type != ArmOp::FALLBACK) condLabel = startConditional(op.condition); regalloc->load(i); if (op.op_type <= ArmOp::MVN) // data processing op emitDataProcOp(op); else if (op.op_type <= ArmOp::STR) // memory load/store emitMemOp(op); else if (op.op_type <= ArmOp::BL) // branch emitBranch(op); else if (op.op_type == ArmOp::MRS) emitMRS(op); else if (op.op_type == ArmOp::MSR) emitMSR(op); else if (op.op_type == ArmOp::FALLBACK) emitFallback(op); else die("invalid"); regalloc->store(i); endConditional(condLabel); } storeFlags(); jump((void *)arm_dispatch); ass.Finalize(); recompiler::advance(ass.GetBuffer()->GetSizeInBytes()); delete regalloc; regalloc = nullptr; } void arm7backend_flush() { if (!recompiler::empty()) { verify(arm_mainloop != nullptr); verify(arm_compilecode != nullptr); return; } ass = Arm32Assembler((u8 *)recompiler::currentCode(), recompiler::spaceLeft()); Label arm_exit; Label arm_dofiq; // arm_mainloop: arm_mainloop = ass.GetCursorAddress(); RegisterList regList = RegisterList::Union( RegisterList(r4, r5, r6, r7), RegisterList(r8, r9, r10, r11), RegisterList(lr)); ass.Push(regList); ass.Sub(sp, sp, 4); // 8-byte stack alignment ass.Mov(r8, r0); // load regs ass.Mov(r4, r1); // load entry points // arm_dispatch: arm_dispatch = ass.GetCursorAddress(); loadReg(r3, CYCL_CNT); // load cycle counter loadReg(r0, R15_ARM_NEXT); // load Next PC loadReg(r1, INTR_PEND); // load Interrupt ass.Cmp(r3, 0); ass.B(le, &arm_exit); // exit if counter <= 0 ass.Ubfx(r2, r0, 2, 21); // assuming 8 MB address space max (23 bits) ass.Cmp(r1, 0); ass.B(ne, &arm_dofiq); // if interrupt pending, handle it ass.Ldr(pc, MemOperand(r4, r2, LSL, 2)); // arm_dofiq: ass.Bind(&arm_dofiq); call((void *)CPUFiq, false); jump((void *)arm_dispatch); // arm_exit: ass.Bind(&arm_exit); ass.Add(sp, sp, 4); ass.Pop(regList); ass.Mov(pc, lr); // arm_compilecode: arm_compilecode = ass.GetCursorAddress(); call((void *)recompiler::compile, false); jump((void *)arm_dispatch); ass.Finalize(); recompiler::advance(ass.GetBuffer()->GetSizeInBytes()); } } #endif // HOST_CPU == CPU_ARM && FEAT_AREC != DYNAREC_NONE