505 lines
12 KiB
C++
505 lines
12 KiB
C++
/*
|
|
Copyright 2020 flyinghead
|
|
|
|
This file is part of flycast.
|
|
|
|
flycast is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
flycast is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with flycast. If not, see <https://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "build.h"
|
|
|
|
#if HOST_CPU == CPU_ARM && FEAT_AREC != DYNAREC_NONE
|
|
#include "arm7_rec.h"
|
|
#include "hw/mem/_vmem.h"
|
|
|
|
#include <aarch32/macro-assembler-aarch32.h>
|
|
using namespace vixl::aarch32;
|
|
|
|
namespace aicaarm {
|
|
|
|
class Arm32Assembler : public MacroAssembler
|
|
{
|
|
public:
|
|
Arm32Assembler() = default;
|
|
Arm32Assembler(u8 *buffer, size_t size) : MacroAssembler(buffer, size, A32) {}
|
|
|
|
void Finalize() {
|
|
FinalizeCode();
|
|
vmem_platform_flush_cache(GetBuffer()->GetStartAddress<void *>(), GetCursorAddress<u8 *>() - 1,
|
|
GetBuffer()->GetStartAddress<void *>(), GetCursorAddress<u8 *>() - 1);
|
|
}
|
|
};
|
|
|
|
static Arm32Assembler ass;
|
|
|
|
static void (*arm_dispatch)();
|
|
|
|
static void loadReg(Register host_reg, Arm7Reg guest_reg, ConditionType cc = al)
|
|
{
|
|
ass.Ldr(cc, host_reg, MemOperand(r8, (u8*)&arm_Reg[guest_reg].I - (u8*)&arm_Reg[0].I));
|
|
}
|
|
|
|
static void storeReg(Register host_reg, Arm7Reg guest_reg, ConditionType cc = al)
|
|
{
|
|
ass.Str(cc, host_reg, MemOperand(r8, (u8*)&arm_Reg[guest_reg].I - (u8*)&arm_Reg[0].I));
|
|
}
|
|
|
|
const std::array<Register, 6> alloc_regs{
|
|
r5, r6, r7, r9, r10, r11
|
|
};
|
|
|
|
class Arm32ArmRegAlloc : public ArmRegAlloc<alloc_regs.size(), Arm32ArmRegAlloc>
|
|
{
|
|
using super = ArmRegAlloc<alloc_regs.size(), Arm32ArmRegAlloc>;
|
|
|
|
void LoadReg(int host_reg, Arm7Reg armreg, ArmOp::Condition cc = ArmOp::AL)
|
|
{
|
|
loadReg(getReg(host_reg), armreg, (ConditionType)cc);
|
|
}
|
|
|
|
void StoreReg(int host_reg, Arm7Reg armreg, ArmOp::Condition cc = ArmOp::AL)
|
|
{
|
|
storeReg(getReg(host_reg), armreg, (ConditionType)cc);
|
|
}
|
|
|
|
static Register getReg(int i)
|
|
{
|
|
return alloc_regs[i];
|
|
}
|
|
|
|
public:
|
|
Arm32ArmRegAlloc(const std::vector<ArmOp>& block_ops)
|
|
: super(block_ops) {}
|
|
|
|
Register map(Arm7Reg r)
|
|
{
|
|
int i = super::map(r);
|
|
return getReg(i);
|
|
}
|
|
|
|
friend super;
|
|
};
|
|
|
|
static Arm32ArmRegAlloc *regalloc;
|
|
|
|
static void loadFlags()
|
|
{
|
|
//Load flags
|
|
loadReg(r3, RN_PSR_FLAGS);
|
|
//move them to flags register
|
|
ass.Msr(APSR_nzcvq, r3);
|
|
}
|
|
|
|
static void storeFlags()
|
|
{
|
|
//get results from flags register
|
|
ass.Mrs(r3, APSR);
|
|
//Store flags
|
|
storeReg(r3, RN_PSR_FLAGS);
|
|
}
|
|
|
|
static Label *startConditional(ArmOp::Condition cc)
|
|
{
|
|
if (cc == ArmOp::AL)
|
|
return nullptr;
|
|
ConditionType condition = (ConditionType)((u32)cc ^ 1);
|
|
Label *label = new Label();
|
|
ass.B(condition, label);
|
|
|
|
return label;
|
|
}
|
|
|
|
static void endConditional(Label *label)
|
|
{
|
|
if (label != nullptr)
|
|
{
|
|
ass.Bind(label);
|
|
delete label;
|
|
}
|
|
}
|
|
|
|
static Operand getOperand(const ArmOp::Operand& arg)
|
|
{
|
|
Register reg;
|
|
if (arg.isNone())
|
|
return reg;
|
|
if (arg.isImmediate())
|
|
{
|
|
if (!arg.isShifted())
|
|
return Operand(arg.getImmediate());
|
|
// Used by pc-rel ops: pc is immediate but can be shifted by reg (or even imm if op sets flags)
|
|
ass.Mov(r1, arg.getImmediate());
|
|
reg = r1;
|
|
}
|
|
else if (arg.isReg())
|
|
reg = regalloc->map(arg.getReg().armreg);
|
|
|
|
if (arg.isShifted())
|
|
{
|
|
if (!arg.shift_imm)
|
|
{
|
|
// Shift by register
|
|
Register shift_reg = regalloc->map(arg.shift_reg.armreg);
|
|
return Operand(reg, (ShiftType)arg.shift_type, shift_reg);
|
|
}
|
|
else
|
|
{
|
|
// Shift by immediate
|
|
if (arg.shift_value != 0 || arg.shift_type != ArmOp::LSL) // LSL 0 is a no-op
|
|
{
|
|
if (arg.shift_value == 0 && arg.shift_type == ArmOp::ROR)
|
|
return Operand(reg, RRX);
|
|
else
|
|
{
|
|
u32 shiftValue = arg.shift_value;
|
|
if (shiftValue == 0 && (arg.shift_type == ArmOp::LSR || arg.shift_type == ArmOp::ASR))
|
|
shiftValue = 32;
|
|
return Operand(reg, (ShiftType)arg.shift_type, shiftValue);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return reg;
|
|
|
|
}
|
|
|
|
static Register loadOperand(const ArmOp::Operand& arg, Register scratch_reg)
|
|
{
|
|
Operand operand = getOperand(arg);
|
|
if (operand.IsPlainRegister())
|
|
return operand.GetBaseRegister();
|
|
ass.Mov(scratch_reg, operand);
|
|
return scratch_reg;
|
|
}
|
|
|
|
template <void (MacroAssembler::*Op)(FlagsUpdate flags, Condition cond, Register rd, Register rn, const Operand& operand)>
|
|
void emit3ArgOp(const ArmOp& op)
|
|
{
|
|
bool set_flags = op.flags & ArmOp::OP_SETS_FLAGS;
|
|
Register rd = regalloc->map(op.rd.getReg().armreg);
|
|
Register rn = loadOperand(op.arg[0], r2);
|
|
Operand operand = getOperand(op.arg[1]);
|
|
(ass.*Op)((FlagsUpdate)set_flags, al, rd, rn, operand);
|
|
}
|
|
|
|
template <void (MacroAssembler::*Op)(FlagsUpdate flags, Condition cond, Register rd, const Operand& operand)>
|
|
void emit2ArgOp(const ArmOp& op)
|
|
{
|
|
bool set_flags = op.flags & ArmOp::OP_SETS_FLAGS;
|
|
Register rd = regalloc->map(op.rd.getReg().armreg);
|
|
Operand operand = getOperand(op.arg[0]);
|
|
(ass.*Op)((FlagsUpdate)set_flags, al, rd, operand);
|
|
}
|
|
|
|
template <void (MacroAssembler::*Op)(Condition cond, Register rn, const Operand& operand)>
|
|
void emitTestOp(const ArmOp& op)
|
|
{
|
|
Register rn = loadOperand(op.arg[0], r2);
|
|
Operand operand = getOperand(op.arg[1]);
|
|
(ass.*Op)(al, rn, operand);
|
|
}
|
|
|
|
static void emitDataProcOp(const ArmOp& op)
|
|
{
|
|
switch (op.op_type)
|
|
{
|
|
case ArmOp::AND:
|
|
emit3ArgOp<&MacroAssembler::And>(op);
|
|
break;
|
|
case ArmOp::EOR:
|
|
emit3ArgOp<&MacroAssembler::Eor>(op);
|
|
break;
|
|
case ArmOp::SUB:
|
|
emit3ArgOp<&MacroAssembler::Sub>(op);
|
|
break;
|
|
case ArmOp::RSB:
|
|
emit3ArgOp<&MacroAssembler::Rsb>(op);
|
|
break;
|
|
case ArmOp::ADD:
|
|
emit3ArgOp<&MacroAssembler::Add>(op);
|
|
break;
|
|
case ArmOp::ORR:
|
|
emit3ArgOp<&MacroAssembler::Orr>(op);
|
|
break;
|
|
case ArmOp::BIC:
|
|
emit3ArgOp<&MacroAssembler::Bic>(op);
|
|
break;
|
|
case ArmOp::ADC:
|
|
emit3ArgOp<&MacroAssembler::Adc>(op);
|
|
break;
|
|
case ArmOp::SBC:
|
|
emit3ArgOp<&MacroAssembler::Sbc>(op);
|
|
break;
|
|
case ArmOp::RSC:
|
|
emit3ArgOp<&MacroAssembler::Rsc>(op);
|
|
break;
|
|
case ArmOp::TST:
|
|
emitTestOp<&MacroAssembler::Tst>(op);
|
|
break;
|
|
case ArmOp::TEQ:
|
|
emitTestOp<&MacroAssembler::Teq>(op);
|
|
break;
|
|
case ArmOp::CMP:
|
|
emitTestOp<&MacroAssembler::Cmp>(op);
|
|
break;
|
|
case ArmOp::CMN:
|
|
emitTestOp<&MacroAssembler::Cmn>(op);
|
|
break;
|
|
case ArmOp::MOV:
|
|
emit2ArgOp<&MacroAssembler::Mov>(op);
|
|
break;
|
|
case ArmOp::MVN:
|
|
emit2ArgOp<&MacroAssembler::Mvn>(op);
|
|
break;
|
|
default:
|
|
die("invalid op");
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void jump(const void *code)
|
|
{
|
|
ptrdiff_t offset = reinterpret_cast<uintptr_t>(code) - ass.GetBuffer()->GetStartAddress<uintptr_t>();
|
|
Label code_label(offset);
|
|
ass.B(&code_label);
|
|
}
|
|
|
|
static void call(const void *code, bool saveFlags = true)
|
|
{
|
|
if (saveFlags)
|
|
storeFlags();
|
|
ptrdiff_t offset = reinterpret_cast<uintptr_t>(code) - ass.GetBuffer()->GetStartAddress<uintptr_t>();
|
|
Label code_label(offset);
|
|
ass.Bl(&code_label);
|
|
if (saveFlags)
|
|
loadFlags();
|
|
}
|
|
|
|
static void emitMemOp(const ArmOp& op)
|
|
{
|
|
Register addr_reg = loadOperand(op.arg[0], r2);
|
|
if (op.pre_index)
|
|
{
|
|
const ArmOp::Operand& offset = op.arg[1];
|
|
if (offset.isReg())
|
|
{
|
|
Register offset_reg = loadOperand(offset, r3);
|
|
if (op.add_offset)
|
|
ass.Add(r0, addr_reg, offset_reg);
|
|
else
|
|
ass.Sub(r0, addr_reg, offset_reg);
|
|
addr_reg = r0;
|
|
}
|
|
else if (offset.isImmediate() && offset.getImmediate() != 0)
|
|
{
|
|
if (ImmediateA32::IsImmediateA32(offset.getImmediate()))
|
|
{
|
|
if (op.add_offset)
|
|
ass.Add(r0, addr_reg, offset.getImmediate());
|
|
else
|
|
ass.Sub(r0, addr_reg, offset.getImmediate());
|
|
}
|
|
else
|
|
{
|
|
ass.Mov(r0, offset.getImmediate());
|
|
if (op.add_offset)
|
|
ass.Add(r0, addr_reg, r0);
|
|
else
|
|
ass.Sub(r0, addr_reg, r0);
|
|
}
|
|
addr_reg = r0;
|
|
}
|
|
}
|
|
if (!addr_reg.Is(r0))
|
|
ass.Mov(r0, addr_reg);
|
|
if (op.op_type == ArmOp::STR)
|
|
{
|
|
if (op.arg[2].isImmediate())
|
|
ass.Mov(r1, op.arg[2].getImmediate());
|
|
else
|
|
ass.Mov(r1, regalloc->map(op.arg[2].getReg().armreg));
|
|
}
|
|
|
|
call(recompiler::getMemOp(op.op_type == ArmOp::LDR, op.byte_xfer));
|
|
|
|
if (op.op_type == ArmOp::LDR)
|
|
ass.Mov(regalloc->map(op.rd.getReg().armreg), r0);
|
|
|
|
}
|
|
|
|
static void emitBranch(const ArmOp& op)
|
|
{
|
|
if (op.arg[0].isImmediate())
|
|
ass.Mov(r0, op.arg[0].getImmediate());
|
|
else
|
|
{
|
|
ass.Mov(r0, regalloc->map(op.arg[0].getReg().armreg));
|
|
ass.Bic(r0, r0, 3);
|
|
}
|
|
storeReg(r0, R15_ARM_NEXT);
|
|
}
|
|
|
|
static void emitMRS(const ArmOp& op)
|
|
{
|
|
call((void *)CPUUpdateCPSR);
|
|
|
|
if (op.spsr)
|
|
loadReg(regalloc->map(op.rd.getReg().armreg), RN_SPSR);
|
|
else
|
|
loadReg(regalloc->map(op.rd.getReg().armreg), RN_CPSR);
|
|
}
|
|
|
|
static void emitMSR(const ArmOp& op)
|
|
{
|
|
if (op.arg[0].isImmediate())
|
|
ass.Mov(r0, op.arg[0].getImmediate());
|
|
else
|
|
ass.Mov(r0, regalloc->map(op.arg[0].getReg().armreg));
|
|
|
|
if (op.spsr)
|
|
call((void *)recompiler::MSR_do<1>);
|
|
else
|
|
call((void *)recompiler::MSR_do<0>);
|
|
}
|
|
|
|
static void emitFallback(const ArmOp& op)
|
|
{
|
|
//Call interpreter
|
|
ass.Mov(r0, op.arg[0].getImmediate());
|
|
call((void *)recompiler::interpret);
|
|
}
|
|
|
|
void arm7backend_compile(const std::vector<ArmOp>& block_ops, u32 cycles)
|
|
{
|
|
ass = Arm32Assembler((u8 *)recompiler::currentCode(), recompiler::spaceLeft());
|
|
|
|
loadReg(r2, CYCL_CNT);
|
|
while (!ImmediateA32::IsImmediateA32(cycles))
|
|
{
|
|
ass.Sub(r2, r2, 256);
|
|
cycles -= 256;
|
|
}
|
|
ass.Sub(r2, r2, cycles);
|
|
storeReg(r2, CYCL_CNT);
|
|
|
|
regalloc = new Arm32ArmRegAlloc(block_ops);
|
|
|
|
loadFlags();
|
|
|
|
for (u32 i = 0; i < block_ops.size(); i++)
|
|
{
|
|
const ArmOp& op = block_ops[i];
|
|
DEBUG_LOG(AICA_ARM, "-> %s", op.toString().c_str());
|
|
|
|
Label *condLabel = nullptr;
|
|
|
|
if (op.op_type != ArmOp::FALLBACK)
|
|
condLabel = startConditional(op.condition);
|
|
|
|
regalloc->load(i);
|
|
|
|
if (op.op_type <= ArmOp::MVN)
|
|
// data processing op
|
|
emitDataProcOp(op);
|
|
else if (op.op_type <= ArmOp::STR)
|
|
// memory load/store
|
|
emitMemOp(op);
|
|
else if (op.op_type <= ArmOp::BL)
|
|
// branch
|
|
emitBranch(op);
|
|
else if (op.op_type == ArmOp::MRS)
|
|
emitMRS(op);
|
|
else if (op.op_type == ArmOp::MSR)
|
|
emitMSR(op);
|
|
else if (op.op_type == ArmOp::FALLBACK)
|
|
emitFallback(op);
|
|
else
|
|
die("invalid");
|
|
|
|
regalloc->store(i);
|
|
|
|
endConditional(condLabel);
|
|
}
|
|
storeFlags();
|
|
|
|
jump((void *)arm_dispatch);
|
|
|
|
ass.Finalize();
|
|
recompiler::advance(ass.GetBuffer()->GetSizeInBytes());
|
|
|
|
delete regalloc;
|
|
regalloc = nullptr;
|
|
}
|
|
|
|
void arm7backend_flush()
|
|
{
|
|
if (!recompiler::empty())
|
|
{
|
|
verify(arm_mainloop != nullptr);
|
|
verify(arm_compilecode != nullptr);
|
|
return;
|
|
}
|
|
ass = Arm32Assembler((u8 *)recompiler::currentCode(), recompiler::spaceLeft());
|
|
Label arm_exit;
|
|
Label arm_dofiq;
|
|
|
|
// arm_mainloop:
|
|
arm_mainloop = ass.GetCursorAddress<arm_mainloop_t>();
|
|
RegisterList regList = RegisterList::Union(
|
|
RegisterList(r4, r5, r6, r7),
|
|
RegisterList(r8, r9, r10, r11),
|
|
RegisterList(lr));
|
|
ass.Push(regList);
|
|
ass.Sub(sp, sp, 4); // 8-byte stack alignment
|
|
|
|
ass.Mov(r8, r0); // load regs
|
|
ass.Mov(r4, r1); // load entry points
|
|
|
|
// arm_dispatch:
|
|
arm_dispatch = ass.GetCursorAddress<void (*)()>();
|
|
loadReg(r3, CYCL_CNT); // load cycle counter
|
|
loadReg(r0, R15_ARM_NEXT); // load Next PC
|
|
loadReg(r1, INTR_PEND); // load Interrupt
|
|
ass.Cmp(r3, 0);
|
|
ass.B(le, &arm_exit); // exit if counter <= 0
|
|
ass.Ubfx(r2, r0, 2, 21); // assuming 8 MB address space max (23 bits)
|
|
ass.Cmp(r1, 0);
|
|
ass.B(ne, &arm_dofiq); // if interrupt pending, handle it
|
|
|
|
ass.Ldr(pc, MemOperand(r4, r2, LSL, 2));
|
|
|
|
// arm_dofiq:
|
|
ass.Bind(&arm_dofiq);
|
|
call((void *)CPUFiq, false);
|
|
jump((void *)arm_dispatch);
|
|
|
|
// arm_exit:
|
|
ass.Bind(&arm_exit);
|
|
ass.Add(sp, sp, 4);
|
|
ass.Pop(regList);
|
|
ass.Mov(pc, lr);
|
|
|
|
// arm_compilecode:
|
|
arm_compilecode = ass.GetCursorAddress<void (*)()>();
|
|
call((void *)recompiler::compile, false);
|
|
jump((void *)arm_dispatch);
|
|
|
|
ass.Finalize();
|
|
recompiler::advance(ass.GetBuffer()->GetSizeInBytes());
|
|
}
|
|
|
|
}
|
|
#endif // HOST_CPU == CPU_ARM && FEAT_AREC != DYNAREC_NONE
|