move ARM64 JIT backend here

This commit is contained in:
RSDuck 2020-02-04 18:29:52 +01:00
parent baed0ac0d5
commit 99b34efe2d
18 changed files with 9188 additions and 8 deletions

View File

@ -36,7 +36,7 @@ detect_architecture("__i386__" x86)
detect_architecture("__arm__" ARM)
detect_architecture("__aarch64__" ARM64)
if (ARCHITECTURE STREQUAL x86_64)
if (ARCHITECTURE STREQUAL x86_64 OR ARCHITECTURE STREQUAL ARM64)
option(ENABLE_JIT "Enable x64 JIT recompiler" ON)
endif()

View File

@ -254,10 +254,14 @@ public:
u32 DTCMSetting, ITCMSetting;
u8 ITCM[0x8000];
// for aarch64 JIT they need to go up here
// to be addressable by a 12-bit immediate
u32 ITCMSize;
u8 DTCM[0x4000];
u32 DTCMBase, DTCMSize;
s32 RegionCodeCycles;
u8 ITCM[0x8000];
u8 DTCM[0x4000];
u8 ICache[0x2000];
u32 ICacheTags[64*4];
@ -282,7 +286,6 @@ public:
// code/16N/32N/32S
u8 MemTimings[0x100000][4];
s32 RegionCodeCycles;
u8* CurICacheLine;
bool (*GetMemRegion)(u32 addr, bool write, NDS::MemRegion* region);

View File

@ -6,7 +6,11 @@
#include "Config.h"
#include "ARMJIT_Internal.h"
#if defined(__x86_64__)
#include "ARMJIT_x64/ARMJIT_Compiler.h"
#else
#include "ARMJIT_A64/ARMJIT_Compiler.h"
#endif
#include "ARMInterpreter_ALU.h"
#include "ARMInterpreter_LoadStore.h"

View File

@ -0,0 +1,837 @@
#include "ARMJIT_Compiler.h"
using namespace Arm64Gen;
namespace ARMJIT
{
void Compiler::Comp_RegShiftReg(int op, bool S, Op2& op2, ARM64Reg rs)
{
if (!(CurInstr.SetFlags & 0x2))
S = false;
CPSRDirty |= S;
UBFX(W1, rs, 0, 8);
if (!S)
{
if (op == 3)
RORV(W0, op2.Reg.Rm, W1);
else
{
CMP(W1, 32);
if (op == 2)
{
MOVI2R(W2, 31);
CSEL(W1, W2, W1, CC_GE);
ASRV(W0, op2.Reg.Rm, W1);
}
else
{
if (op == 0)
LSLV(W0, op2.Reg.Rm, W1);
else if (op == 1)
LSRV(W0, op2.Reg.Rm, W1);
CSEL(W0, WZR, W0, CC_GE);
}
}
}
else
{
MOV(W0, op2.Reg.Rm);
FixupBranch zero = CBZ(W1);
SUB(W1, W1, 1);
if (op == 3)
{
RORV(W0, op2.Reg.Rm, W1);
BFI(RCPSR, W0, 29, 1);
}
else
{
CMP(W1, 31);
if (op == 2)
{
MOVI2R(W2, 31);
CSEL(W1, W2, W1, CC_GT);
ASRV(W0, op2.Reg.Rm, W1);
BFI(RCPSR, W0, 29, 1);
}
else
{
if (op == 0)
{
LSLV(W0, op2.Reg.Rm, W1);
UBFX(W1, W0, 31, 1);
}
else if (op == 1)
LSRV(W0, op2.Reg.Rm, W1);
CSEL(W1, WZR, op ? W0 : W1, CC_GT);
BFI(RCPSR, W1, 29, 1);
CSEL(W0, WZR, W0, CC_GE);
}
}
MOV(W0, W0, ArithOption(W0, (ShiftType)op, 1));
SetJumpTarget(zero);
}
op2 = Op2(W0, ST_LSL, 0);
}
void Compiler::Comp_RegShiftImm(int op, int amount, bool S, Op2& op2, ARM64Reg tmp)
{
if (!(CurInstr.SetFlags & 0x2))
S = false;
CPSRDirty |= S;
switch (op)
{
case 0: // LSL
if (S && amount)
{
UBFX(tmp, op2.Reg.Rm, 32 - amount, 1);
BFI(RCPSR, tmp, 29, 1);
}
op2 = Op2(op2.Reg.Rm, ST_LSL, amount);
return;
case 1: // LSR
if (S)
{
UBFX(tmp, op2.Reg.Rm, (amount ? amount : 32) - 1, 1);
BFI(RCPSR, tmp, 29, 1);
}
if (amount == 0)
{
op2 = Op2(0);
return;
}
op2 = Op2(op2.Reg.Rm, ST_LSR, amount);
return;
case 2: // ASR
if (S)
{
UBFX(tmp, op2.Reg.Rm, (amount ? amount : 32) - 1, 1);
BFI(RCPSR, tmp, 29, 1);
}
op2 = Op2(op2.Reg.Rm, ST_ASR, amount ? amount : 31);
return;
case 3: // ROR
if (amount == 0)
{
UBFX(tmp, RCPSR, 29, 1);
LSL(tmp, tmp, 31);
if (S)
BFI(RCPSR, op2.Reg.Rm, 29, 1);
ORR(tmp, tmp, op2.Reg.Rm, ArithOption(tmp, ST_LSR, 1));
op2 = Op2(tmp, ST_LSL, 0);
}
else
{
if (S)
{
UBFX(tmp, op2.Reg.Rm, amount - 1, 1);
BFI(RCPSR, tmp, 29, 1);
}
op2 = Op2(op2.Reg.Rm, ST_ROR, amount);
}
return;
}
}
void Compiler::Comp_RetriveFlags(bool retriveCV)
{
if (CurInstr.SetFlags)
CPSRDirty = true;
if (CurInstr.SetFlags & 0x4)
{
CSET(W0, CC_EQ);
BFI(RCPSR, W0, 30, 1);
}
if (CurInstr.SetFlags & 0x8)
{
CSET(W0, CC_MI);
BFI(RCPSR, W0, 31, 1);
}
if (retriveCV)
{
if (CurInstr.SetFlags & 0x2)
{
CSET(W0, CC_CS);
BFI(RCPSR, W0, 29, 1);
}
if (CurInstr.SetFlags & 0x1)
{
CSET(W0, CC_VS);
BFI(RCPSR, W0, 28, 1);
}
}
}
void Compiler::Comp_Logical(int op, bool S, ARM64Reg rd, ARM64Reg rn, Op2 op2)
{
if (S && !CurInstr.SetFlags)
S = false;
switch (op)
{
case 0x0: // AND
if (S)
{
if (op2.IsImm)
ANDSI2R(rd, rn, op2.Imm, W0);
else
ANDS(rd, rn, op2.Reg.Rm, op2.ToArithOption());
}
else
{
if (op2.IsImm)
ANDI2R(rd, rn, op2.Imm, W0);
else
AND(rd, rn, op2.Reg.Rm, op2.ToArithOption());
}
break;
case 0x1: // EOR
if (op2.IsImm)
EORI2R(rd, rn, op2.Imm, W0);
else
EOR(rd, rn, op2.Reg.Rm, op2.ToArithOption());
if (S && FlagsNZNeeded())
TST(rd, rd);
break;
case 0xC: // ORR
if (op2.IsImm)
ORRI2R(rd, rn, op2.Imm, W0);
else
ORR(rd, rn, op2.Reg.Rm, op2.ToArithOption());
if (S && FlagsNZNeeded())
TST(rd, rd);
break;
case 0xE: // BIC
if (S)
{
if (op2.IsImm)
ANDSI2R(rd, rn, ~op2.Imm, W0);
else
BICS(rd, rn, op2.Reg.Rm, op2.ToArithOption());
}
else
{
if (op2.IsImm)
ANDI2R(rd, rn, ~op2.Imm, W0);
else
BIC(rd, rn, op2.Reg.Rm, op2.ToArithOption());
}
break;
}
if (S)
Comp_RetriveFlags(false);
}
void Compiler::Comp_Arithmetic(int op, bool S, ARM64Reg rd, ARM64Reg rn, Op2 op2)
{
if (!op2.IsImm && op2.Reg.ShiftType == ST_ROR)
{
MOV(W0, op2.Reg.Rm, op2.ToArithOption());
op2 = Op2(W0, ST_LSL, 0);
}
if (S && !CurInstr.SetFlags)
S = false;
bool CVInGP = false;
switch (op)
{
case 0x2: // SUB
if (S)
{
if (op2.IsImm)
SUBSI2R(rd, rn, op2.Imm, W0);
else
SUBS(rd, rn, op2.Reg.Rm, op2.ToArithOption());
}
else
{
if (op2.IsImm)
{
MOVI2R(W2, op2.Imm);
SUBI2R(rd, rn, op2.Imm, W0);
}
else
SUB(rd, rn, op2.Reg.Rm, op2.ToArithOption());
}
break;
case 0x3: // RSB
if (op2.IsZero())
{
op2 = Op2(WZR);
}
else if (op2.IsImm)
{
MOVI2R(W1, op2.Imm);
op2 = Op2(W1);
}
else if (op2.Reg.ShiftAmount != 0)
{
MOV(W1, op2.Reg.Rm, op2.ToArithOption());
op2 = Op2(W1);
}
if (S)
SUBS(rd, op2.Reg.Rm, rn);
else
SUB(rd, op2.Reg.Rm, rn);
break;
case 0x4: // ADD
if (S)
{
if (op2.IsImm)
ADDSI2R(rd, rn, op2.Imm, W0);
else
ADDS(rd, rn, op2.Reg.Rm, op2.ToArithOption());
}
else
{
if (op2.IsImm)
ADDI2R(rd, rn, op2.Imm, W0);
else
ADD(rd, rn, op2.Reg.Rm, op2.ToArithOption());
}
break;
case 0x5: // ADC
UBFX(W2, RCPSR, 29, 1);
if (S)
{
CVInGP = true;
ADDS(W1, rn, W2);
CSET(W2, CC_CS);
CSET(W3, CC_VS);
if (op2.IsImm)
ADDSI2R(rd, W1, op2.Imm, W0);
else
ADDS(rd, W1, op2.Reg.Rm, op2.ToArithOption());
CSINC(W2, W2, WZR, CC_CC);
CSINC(W3, W3, WZR, CC_VC);
}
else
{
ADD(W1, rn, W2);
if (op2.IsImm)
ADDI2R(rd, W1, op2.Imm, W0);
else
ADD(rd, W1, op2.Reg.Rm, op2.ToArithOption());
}
break;
case 0x6: // SBC
UBFX(W2, RCPSR, 29, 1);
// W1 = -op2 - 1
if (op2.IsImm)
MOVI2R(W1, ~op2.Imm);
else
ORN(W1, WZR, op2.Reg.Rm, op2.ToArithOption());
if (S)
{
CVInGP = true;
ADDS(W1, W2, W1);
CSET(W2, CC_CS);
CSET(W3, CC_VS);
ADDS(rd, rn, W1);
CSINC(W2, W2, WZR, CC_CC);
CSINC(W3, W3, WZR, CC_VC);
}
else
{
ADD(W1, W2, W1);
ADD(rd, rn, W1);
}
break;
case 0x7: // RSC
UBFX(W2, RCPSR, 29, 1);
// W1 = -rn - 1
MVN(W1, rn);
if (S)
{
CVInGP = true;
ADDS(W1, W2, W1);
CSET(W2, CC_CS);
CSET(W3, CC_VS);
if (op2.IsImm)
ADDSI2R(rd, W1, op2.Imm);
else
ADDS(rd, W1, op2.Reg.Rm, op2.ToArithOption());
CSINC(W2, W2, WZR, CC_CC);
CSINC(W3, W3, WZR, CC_VC);
}
else
{
ADD(W1, W2, W1);
if (op2.IsImm)
ADDI2R(rd, W1, op2.Imm);
else
ADD(rd, W1, op2.Reg.Rm, op2.ToArithOption());
}
break;
}
if (S)
{
if (CVInGP)
{
BFI(RCPSR, W2, 29, 1);
BFI(RCPSR, W3, 28, 1);
}
Comp_RetriveFlags(!CVInGP);
}
}
void Compiler::Comp_Compare(int op, ARM64Reg rn, Op2 op2)
{
if (!op2.IsImm && op2.Reg.ShiftType == ST_ROR)
{
MOV(W0, op2.Reg.Rm, op2.ToArithOption());
op2 = Op2(W0, ST_LSL, 0);
}
switch (op)
{
case 0x8: // TST
if (op2.IsImm)
TSTI2R(rn, op2.Imm, W0);
else
ANDS(WZR, rn, op2.Reg.Rm, op2.ToArithOption());
break;
case 0x9: // TEQ
if (op2.IsImm)
EORI2R(W0, rn, op2.Imm, W0);
else
EOR(W0, rn, op2.Reg.Rm, op2.ToArithOption());
TST(W0, W0);
break;
case 0xA: // CMP
if (op2.IsImm)
CMPI2R(rn, op2.Imm, W0);
else
CMP(rn, op2.Reg.Rm, op2.ToArithOption());
break;
case 0xB: // CMN
if (op2.IsImm)
ADDSI2R(WZR, rn, op2.Imm, W0);
else
CMN(rn, op2.Reg.Rm, op2.ToArithOption());
break;
}
Comp_RetriveFlags(op >= 0xA);
}
// also counts cycles!
void Compiler::A_Comp_GetOp2(bool S, Op2& op2)
{
if (CurInstr.Instr & (1 << 25))
{
Comp_AddCycles_C();
op2 = Op2(ROR(CurInstr.Instr & 0xFF, (CurInstr.Instr >> 7) & 0x1E));
}
else
{
int op = (CurInstr.Instr >> 5) & 0x3;
op2.Reg.Rm = MapReg(CurInstr.A_Reg(0));
if (CurInstr.Instr & (1 << 4))
{
Comp_AddCycles_CI(1);
ARM64Reg rs = MapReg(CurInstr.A_Reg(8));
if (CurInstr.A_Reg(0) == 15)
{
ADD(W0, op2.Reg.Rm, 4);
op2.Reg.Rm = W0;
}
Comp_RegShiftReg(op, S, op2, rs);
}
else
{
Comp_AddCycles_C();
int amount = (CurInstr.Instr >> 7) & 0x1F;
Comp_RegShiftImm(op, amount, S, op2);
}
}
}
void Compiler::A_Comp_ALUCmpOp()
{
u32 op = (CurInstr.Instr >> 21) & 0xF;
ARM64Reg rn = MapReg(CurInstr.A_Reg(16));
Op2 op2;
A_Comp_GetOp2(op <= 0x9, op2);
Comp_Compare(op, rn, op2);
}
void Compiler::A_Comp_ALUMovOp()
{
bool S = CurInstr.Instr & (1 << 20);
u32 op = (CurInstr.Instr >> 21) & 0xF;
ARM64Reg rd = MapReg(CurInstr.A_Reg(12));
Op2 op2;
A_Comp_GetOp2(S, op2);
if (op == 0xF) // MVN
{
if (op2.IsImm)
{
if (CurInstr.Cond() == 0xE)
RegCache.PutLiteral(CurInstr.A_Reg(12), ~op2.Imm);
MOVI2R(rd, ~op2.Imm);
}
else
ORN(rd, WZR, op2.Reg.Rm, op2.ToArithOption());
}
else // MOV
{
if (op2.IsImm)
{
if (CurInstr.Cond() == 0xE)
RegCache.PutLiteral(CurInstr.A_Reg(12), op2.Imm);
MOVI2R(rd, op2.Imm);
}
else
MOV(rd, op2.Reg.Rm, op2.ToArithOption());
}
if (S)
{
if (FlagsNZNeeded())
TST(rd, rd);
Comp_RetriveFlags(false);
}
if (CurInstr.Info.Branches())
Comp_JumpTo(rd, true, S);
}
void Compiler::A_Comp_ALUTriOp()
{
bool S = CurInstr.Instr & (1 << 20);
u32 op = (CurInstr.Instr >> 21) & 0xF;
bool logical = (1 << op) & 0xF303;
ARM64Reg rd = MapReg(CurInstr.A_Reg(12));
ARM64Reg rn = MapReg(CurInstr.A_Reg(16));
Op2 op2;
A_Comp_GetOp2(S && logical, op2);
if (op2.IsImm && op2.Imm == 0)
op2 = Op2(WZR, ST_LSL, 0);
if (logical)
Comp_Logical(op, S, rd, rn, op2);
else
Comp_Arithmetic(op, S, rd, rn, op2);
if (CurInstr.Info.Branches())
Comp_JumpTo(rd, true, S);
}
void Compiler::A_Comp_Clz()
{
Comp_AddCycles_C();
ARM64Reg rd = MapReg(CurInstr.A_Reg(12));
ARM64Reg rm = MapReg(CurInstr.A_Reg(0));
CLZ(rd, rm);
assert(Num == 0);
}
void Compiler::Comp_Mul_Mla(bool S, bool mla, ARM64Reg rd, ARM64Reg rm, ARM64Reg rs, ARM64Reg rn)
{
if (Num == 0)
{
Comp_AddCycles_CI(S ? 3 : 1);
}
else
{
CLZ(W0, rs);
CLS(W1, rs);
CMP(W0, W1);
CSEL(W0, W0, W1, CC_GT);
Comp_AddCycles_CI(mla ? 1 : 0, W0, ArithOption(W0, ST_LSR, 3));
}
if (mla)
MADD(rd, rm, rs, rn);
else
MUL(rd, rm, rs);
if (S && FlagsNZNeeded())
{
TST(rd, rd);
Comp_RetriveFlags(false);
}
}
void Compiler::A_Comp_Mul_Long()
{
ARM64Reg rd = MapReg(CurInstr.A_Reg(16));
ARM64Reg rm = MapReg(CurInstr.A_Reg(0));
ARM64Reg rs = MapReg(CurInstr.A_Reg(8));
ARM64Reg rn = MapReg(CurInstr.A_Reg(12));
bool S = CurInstr.Instr & (1 << 20);
bool add = CurInstr.Instr & (1 << 21);
bool sign = CurInstr.Instr & (1 << 22);
if (Num == 0)
{
Comp_AddCycles_CI(S ? 3 : 1);
}
else
{
CLZ(W0, rs);
CLS(W1, rs);
CMP(W0, W1);
CSEL(W0, W0, W1, CC_GT);
Comp_AddCycles_CI(0, W0, ArithOption(W0, ST_LSR, 3));
}
if (add)
{
MOV(W0, rn);
BFI(X0, EncodeRegTo64(rd), 32, 32);
if (sign)
SMADDL(EncodeRegTo64(rn), rm, rs, X0);
else
UMADDL(EncodeRegTo64(rn), rm, rs, X0);
if (S && FlagsNZNeeded())
TST(EncodeRegTo64(rn), EncodeRegTo64(rn));
UBFX(EncodeRegTo64(rd), EncodeRegTo64(rn), 32, 32);
}
else
{
if (sign)
SMULL(EncodeRegTo64(rn), rm, rs);
else
UMULL(EncodeRegTo64(rn), rm, rs);
if (S && FlagsNZNeeded())
TST(EncodeRegTo64(rn), EncodeRegTo64(rn));
UBFX(EncodeRegTo64(rd), EncodeRegTo64(rn), 32, 32);
}
if (S)
Comp_RetriveFlags(false);
}
void Compiler::A_Comp_Mul()
{
ARM64Reg rd = MapReg(CurInstr.A_Reg(16));
ARM64Reg rm = MapReg(CurInstr.A_Reg(0));
ARM64Reg rs = MapReg(CurInstr.A_Reg(8));
bool S = CurInstr.Instr & (1 << 20);
bool mla = CurInstr.Instr & (1 << 21);
ARM64Reg rn = INVALID_REG;
if (mla)
rn = MapReg(CurInstr.A_Reg(12));
Comp_Mul_Mla(S, mla, rd, rm, rs, rn);
}
void Compiler::T_Comp_ShiftImm()
{
Comp_AddCycles_C();
u32 op = (CurInstr.Instr >> 11) & 0x3;
int amount = (CurInstr.Instr >> 6) & 0x1F;
ARM64Reg rd = MapReg(CurInstr.T_Reg(0));
Op2 op2;
op2.Reg.Rm = MapReg(CurInstr.T_Reg(3));
Comp_RegShiftImm(op, amount, true, op2);
if (op2.IsImm)
MOVI2R(rd, op2.Imm);
else
MOV(rd, op2.Reg.Rm, op2.ToArithOption());
if (FlagsNZNeeded())
TST(rd, rd);
Comp_RetriveFlags(false);
}
void Compiler::T_Comp_AddSub_()
{
Comp_AddCycles_C();
Op2 op2;
if (CurInstr.Instr & (1 << 10))
op2 = Op2((CurInstr.Instr >> 6) & 0x7);
else
op2 = Op2(MapReg(CurInstr.T_Reg(6)));
Comp_Arithmetic(
CurInstr.Instr & (1 << 9) ? 0x2 : 0x4,
true,
MapReg(CurInstr.T_Reg(0)),
MapReg(CurInstr.T_Reg(3)),
op2);
}
void Compiler::T_Comp_ALUImm8()
{
Comp_AddCycles_C();
u32 imm = CurInstr.Instr & 0xFF;
int op = (CurInstr.Instr >> 11) & 0x3;
ARM64Reg rd = MapReg(CurInstr.T_Reg(8));
switch (op)
{
case 0:
MOVI2R(rd, imm);
if (FlagsNZNeeded())
TST(rd, rd);
Comp_RetriveFlags(false);
break;
case 1:
Comp_Compare(0xA, rd, Op2(imm));
break;
case 2:
case 3:
Comp_Arithmetic(op == 2 ? 0x4 : 0x2, true, rd, rd, Op2(imm));
break;
}
}
void Compiler::T_Comp_ALU()
{
int op = (CurInstr.Instr >> 6) & 0xF;
ARM64Reg rd = MapReg(CurInstr.T_Reg(0));
ARM64Reg rs = MapReg(CurInstr.T_Reg(3));
if ((op >= 0x2 && op <= 0x4) || op == 0x7)
Comp_AddCycles_CI(1);
else
Comp_AddCycles_C();
switch (op)
{
case 0x0:
Comp_Logical(0x0, true, rd, rd, Op2(rs));
break;
case 0x1:
Comp_Logical(0x1, true, rd, rd, Op2(rs));
break;
case 0x2:
case 0x3:
case 0x4:
case 0x7:
{
Op2 op2;
op2.Reg.Rm = rd;
Comp_RegShiftReg(op == 0x7 ? 3 : (op - 0x2), true, op2, rs);
MOV(rd, op2.Reg.Rm, op2.ToArithOption());
if (FlagsNZNeeded())
TST(rd, rd);
Comp_RetriveFlags(false);
}
break;
case 0x5:
Comp_Arithmetic(0x5, true, rd, rd, Op2(rs));
break;
case 0x6:
Comp_Arithmetic(0x6, true, rd, rd, Op2(rs));
break;
case 0x8:
Comp_Compare(0x8, rd, Op2(rs));
break;
case 0x9:
Comp_Arithmetic(0x3, true, rd, rs, Op2(0));
break;
case 0xA:
Comp_Compare(0xA, rd, Op2(rs));
break;
case 0xB:
Comp_Compare(0xB, rd, Op2(rs));
break;
case 0xC:
Comp_Logical(0xC, true, rd, rd, Op2(rs));
break;
case 0xD:
Comp_Mul_Mla(true, false, rd, rd, rs, INVALID_REG);
break;
case 0xE:
Comp_Logical(0xE, true, rd, rd, Op2(rs));
break;
case 0xF:
MVN(rd, rs);
if (FlagsNZNeeded())
TST(rd, rd);
Comp_RetriveFlags(false);
break;
}
}
void Compiler::T_Comp_ALU_HiReg()
{
u32 rd = ((CurInstr.Instr & 0x7) | ((CurInstr.Instr >> 4) & 0x8));
ARM64Reg rdMapped = MapReg(rd);
ARM64Reg rs = MapReg((CurInstr.Instr >> 3) & 0xF);
u32 op = (CurInstr.Instr >> 8) & 0x3;
Comp_AddCycles_C();
switch (op)
{
case 0:
Comp_Arithmetic(0x4, false, rdMapped, rdMapped, Op2(rs));
break;
case 1:
Comp_Compare(0xA, rdMapped, rs);
return;
case 2:
MOV(rdMapped, rs);
break;
}
if (rd == 15)
{
Comp_JumpTo(rdMapped, false, false);
}
}
void Compiler::T_Comp_AddSP()
{
Comp_AddCycles_C();
ARM64Reg sp = MapReg(13);
u32 offset = (CurInstr.Instr & 0x7F) << 2;
if (CurInstr.Instr & (1 << 7))
SUB(sp, sp, offset);
else
ADD(sp, sp, offset);
}
void Compiler::T_Comp_RelAddr()
{
Comp_AddCycles_C();
ARM64Reg rd = MapReg(CurInstr.T_Reg(8));
u32 offset = (CurInstr.Instr & 0xFF) << 2;
if (CurInstr.Instr & (1 << 11))
{
ARM64Reg sp = MapReg(13);
ADD(rd, sp, offset);
}
else
MOVI2R(rd, (R15 & ~2) + offset);
}
}

View File

@ -0,0 +1,452 @@
#include "ARMJIT_Compiler.h"
using namespace Arm64Gen;
// hack
const int kCodeCacheTiming = 3;
namespace ARMJIT
{
template <typename T>
void jumpToTrampoline(T* cpu, u32 addr, bool changeCPSR)
{
cpu->JumpTo(addr, changeCPSR);
}
void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
{
// we can simplify constant branches by a lot
// it's not completely safe to assume stuff like, which instructions to preload
// we'll see how it works out
IrregularCycles = true;
u32 newPC;
u32 cycles = 0;
bool setupRegion = false;
if (addr & 0x1 && !Thumb)
{
CPSRDirty = true;
ORRI2R(RCPSR, RCPSR, 0x20);
}
else if (!(addr & 0x1) && Thumb)
{
CPSRDirty = true;
ANDI2R(RCPSR, RCPSR, ~0x20);
}
if (Num == 0)
{
ARMv5* cpu9 = (ARMv5*)CurCPU;
u32 oldregion = R15 >> 24;
u32 newregion = addr >> 24;
u32 regionCodeCycles = cpu9->MemTimings[addr >> 12][0];
u32 compileTimeCodeCycles = cpu9->RegionCodeCycles;
cpu9->RegionCodeCycles = regionCodeCycles;
MOVI2R(W0, regionCodeCycles);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARMv5, RegionCodeCycles));
setupRegion = newregion != oldregion;
if (setupRegion)
cpu9->SetupCodeMem(addr);
if (addr & 0x1)
{
addr &= ~0x1;
newPC = addr+2;
// two-opcodes-at-once fetch
// doesn't matter if we put garbage in the MSbs there
if (addr & 0x2)
{
cpu9->CodeRead32(addr-2, true) >> 16;
cycles += cpu9->CodeCycles;
cpu9->CodeRead32(addr+2, false);
cycles += CurCPU->CodeCycles;
}
else
{
cpu9->CodeRead32(addr, true);
cycles += cpu9->CodeCycles;
}
}
else
{
addr &= ~0x3;
newPC = addr+4;
cpu9->CodeRead32(addr, true);
cycles += cpu9->CodeCycles;
cpu9->CodeRead32(addr+4, false);
cycles += cpu9->CodeCycles;
}
cpu9->RegionCodeCycles = compileTimeCodeCycles;
if (setupRegion)
cpu9->SetupCodeMem(R15);
}
else
{
ARMv4* cpu7 = (ARMv4*)CurCPU;
u32 codeRegion = addr >> 24;
u32 codeCycles = addr >> 15; // cheato
cpu7->CodeRegion = codeRegion;
cpu7->CodeCycles = codeCycles;
MOVI2R(W0, codeRegion);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CodeRegion));
MOVI2R(W0, codeCycles);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CodeCycles));
if (addr & 0x1)
{
addr &= ~0x1;
newPC = addr+2;
// this is necessary because ARM7 bios protection
u32 compileTimePC = CurCPU->R[15];
CurCPU->R[15] = newPC;
cycles += NDS::ARM7MemTimings[codeCycles][0] + NDS::ARM7MemTimings[codeCycles][1];
CurCPU->R[15] = compileTimePC;
}
else
{
addr &= ~0x3;
newPC = addr+4;
u32 compileTimePC = CurCPU->R[15];
CurCPU->R[15] = newPC;
cycles += NDS::ARM7MemTimings[codeCycles][2] + NDS::ARM7MemTimings[codeCycles][3];
CurCPU->R[15] = compileTimePC;
}
cpu7->CodeRegion = R15 >> 24;
cpu7->CodeCycles = addr >> 15;
}
if (Exit)
{
MOVI2R(W0, newPC);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, R[15]));
}
if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles)
ConstantCycles += cycles;
else
ADD(RCycles, RCycles, cycles);
}
void* Compiler::Gen_JumpTo9(int kind)
{
AlignCode16();
void* res = GetRXPtr();
MOVI2R(W2, kCodeCacheTiming);
// W1 - code cycles non branch
// W2 - branch code cycles
LSR(W1, W0, 12);
LSL(W1, W1, 2);
ADDI2R(W1, W1, offsetof(ARMv5, MemTimings), W2);
LDRB(W1, RCPU, W1);
LDR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARMv5, ITCMSize));
STR(INDEX_UNSIGNED, W1, RCPU, offsetof(ARMv5, RegionCodeCycles));
CMP(W0, W3);
FixupBranch outsideITCM = B(CC_LO);
MOVI2R(W1, 1);
MOVI2R(W2, 1);
SetJumpTarget(outsideITCM);
FixupBranch switchToThumb;
if (kind == 0)
switchToThumb = TBNZ(W0, 0);
if (kind == 0 || kind == 1)
{
ANDI2R(W0, W0, ~3);
if (kind == 0)
ANDI2R(RCPSR, RCPSR, ~0x20);
ADD(W3, W0, 4);
STR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARM, R[15]));
ADD(W1, W1, W2);
ADD(RCycles, RCycles, W1);
RET();
}
if (kind == 0 || kind == 2)
{
if (kind == 0)
{
SetJumpTarget(switchToThumb);
ORRI2R(RCPSR, RCPSR, 0x20);
}
ANDI2R(W0, W0, ~1);
ADD(W3, W0, 2);
STR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARM, R[15]));
FixupBranch halfwordLoc = TBZ(W0, 1);
ADD(W1, W1, W2);
ADD(RCycles, RCycles, W1);
RET();
SetJumpTarget(halfwordLoc);
ADD(RCycles, RCycles, W2);
RET();
}
return res;
}
void* Compiler::Gen_JumpTo7(int kind)
{
void* res = GetRXPtr();
LSR(W1, W0, 24);
STR(INDEX_UNSIGNED, W1, RCPU, offsetof(ARM, CodeRegion));
LSR(W1, W0, 15);
STR(INDEX_UNSIGNED, W1, RCPU, offsetof(ARM, CodeCycles));
MOVP2R(X2, NDS::ARM7MemTimings);
LDR(W3, X2, ArithOption(W1, true));
FixupBranch switchToThumb;
if (kind == 0)
switchToThumb = TBNZ(W0, 0);
if (kind == 0 || kind == 1)
{
UBFX(W2, W3, 0, 8);
UBFX(W3, W3, 8, 8);
ADD(W2, W3, W2);
ADD(RCycles, RCycles, W2);
ANDI2R(W0, W0, ~3);
if (kind == 0)
ANDI2R(RCPSR, RCPSR, ~0x20);
ADD(W3, W0, 4);
STR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARM, R[15]));
RET();
}
if (kind == 0 || kind == 2)
{
if (kind == 0)
{
SetJumpTarget(switchToThumb);
ORRI2R(RCPSR, RCPSR, 0x20);
}
UBFX(W2, W3, 16, 8);
UBFX(W3, W3, 24, 8);
ADD(W2, W3, W2);
ADD(RCycles, RCycles, W2);
ANDI2R(W0, W0, ~1);
ADD(W3, W0, 2);
STR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARM, R[15]));
RET();
}
return res;
}
void Compiler::Comp_JumpTo(Arm64Gen::ARM64Reg addr, bool switchThumb, bool restoreCPSR)
{
IrregularCycles = true;
if (!restoreCPSR)
{
if (switchThumb)
CPSRDirty = true;
MOV(W0, addr);
BL((Num ? JumpToFuncs7 : JumpToFuncs9)[switchThumb ? 0 : (Thumb + 1)]);
}
else
{
BitSet16 hiRegsLoaded(RegCache.DirtyRegs & 0xFF00);
bool previouslyDirty = CPSRDirty;
SaveCPSR();
if (restoreCPSR)
{
if (Thumb || CurInstr.Cond() >= 0xE)
RegCache.Flush();
else
{
// the ugly way...
// we only save them, to load and save them again
for (int reg : hiRegsLoaded)
SaveReg(reg, RegCache.Mapping[reg]);
}
}
if (switchThumb)
MOV(W1, addr);
else
{
if (Thumb)
ORRI2R(W1, addr, 1);
else
ANDI2R(W1, addr, ~1);
}
MOV(X0, RCPU);
MOVI2R(W2, restoreCPSR);
if (Num == 0)
QuickCallFunction(X3, jumpToTrampoline<ARMv5>);
else
QuickCallFunction(X3, jumpToTrampoline<ARMv4>);
if (!Thumb && restoreCPSR && CurInstr.Cond() < 0xE)
{
for (int reg : hiRegsLoaded)
LoadReg(reg, RegCache.Mapping[reg]);
}
if (previouslyDirty)
LoadCPSR();
CPSRDirty = previouslyDirty;
}
}
void Compiler::A_Comp_BranchImm()
{
int op = (CurInstr.Instr >> 24) & 1;
s32 offset = (s32)(CurInstr.Instr << 8) >> 6;
u32 target = R15 + offset;
bool link = op;
if (CurInstr.Cond() == 0xF) // BLX_imm
{
target += (op << 1) + 1;
link = true;
}
if (link)
MOVI2R(MapReg(14), R15 - 4);
Comp_JumpTo(target);
}
void Compiler::A_Comp_BranchXchangeReg()
{
ARM64Reg rn = MapReg(CurInstr.A_Reg(0));
MOV(W0, rn);
if ((CurInstr.Instr & 0xF0) == 0x30) // BLX_reg
MOVI2R(MapReg(14), R15 - 4);
Comp_JumpTo(W0, true);
}
void Compiler::T_Comp_BCOND()
{
u32 cond = (CurInstr.Instr >> 8) & 0xF;
FixupBranch skipExecute = CheckCondition(cond);
s32 offset = (s32)(CurInstr.Instr << 24) >> 23;
Comp_JumpTo(R15 + offset + 1, true);
Comp_BranchSpecialBehaviour();
FixupBranch skipFailed = B();
SetJumpTarget(skipExecute);
Comp_AddCycles_C(true);
if (CurInstr.BranchFlags & branch_FollowCondTaken)
{
SaveCPSR(false);
RegCache.PrepareExit();
ADD(W0, RCycles, ConstantCycles);
ABI_PopRegisters(SavedRegs);
RET();
}
SetJumpTarget(skipFailed);
}
void Compiler::T_Comp_B()
{
s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 20;
Comp_JumpTo(R15 + offset + 1);
}
void Compiler::T_Comp_BranchXchangeReg()
{
bool link = CurInstr.Instr & (1 << 7);
if (link)
{
if (Num == 1)
{
printf("BLX unsupported on ARM7!!!\n");
return;
}
MOV(W0, MapReg(CurInstr.A_Reg(3)));
MOVI2R(MapReg(14), R15 - 1);
Comp_JumpTo(W0, true);
}
else
{
ARM64Reg rn = MapReg(CurInstr.A_Reg(3));
Comp_JumpTo(rn, true);
}
}
void Compiler::T_Comp_BL_LONG_1()
{
s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 9;
MOVI2R(MapReg(14), R15 + offset);
Comp_AddCycles_C();
}
void Compiler::T_Comp_BL_LONG_2()
{
ARM64Reg lr = MapReg(14);
s32 offset = (CurInstr.Instr & 0x7FF) << 1;
ADD(W0, lr, offset);
MOVI2R(lr, (R15 - 2) | 1);
Comp_JumpTo(W0, Num == 0 && !(CurInstr.Instr & (1 << 12)));
}
void Compiler::T_Comp_BL_Merged()
{
Comp_AddCycles_C();
R15 += 2;
u32 upperPart = CurInstr.Instr >> 16;
u32 target = (R15 - 2) + ((s32)((CurInstr.Instr & 0x7FF) << 21) >> 9);
target += (upperPart & 0x7FF) << 1;
if (Num == 1 || upperPart & (1 << 12))
target |= 1;
MOVI2R(MapReg(14), (R15 - 2) | 1);
Comp_JumpTo(target);
}
}

View File

@ -0,0 +1,707 @@
#include "ARMJIT_Compiler.h"
#include "../ARMInterpreter.h"
#include "../ARMJIT_Internal.h"
#ifdef __SWITCH__
#include "../switch/compat_switch.h"
extern char __start__;
#endif
#include <malloc.h>
using namespace Arm64Gen;
namespace ARMJIT
{
/*
Recompiling classic ARM to ARMv8 code is at the same time
easier and trickier than compiling to a less related architecture
like x64. At one hand you can translate a lot of instructions directly.
But at the same time, there are a ton of exceptions, like for
example ADD and SUB can't have a RORed second operand on ARMv8.
*/
template <>
const ARM64Reg RegisterCache<Compiler, ARM64Reg>::NativeRegAllocOrder[] =
{W19, W20, W21, W22, W23, W24, W25, W26};
template <>
const int RegisterCache<Compiler, ARM64Reg>::NativeRegsAvailable = 8;
const int JitMemSize = 16 * 1024 * 1024;
void Compiler::MovePC()
{
ADD(MapReg(15), MapReg(15), Thumb ? 2 : 4);
}
Compiler::Compiler()
{
#ifdef __SWITCH__
JitRWBase = memalign(0x1000, JitMemSize);
JitRXStart = (u8*)&__start__ - JitMemSize - 0x1000;
JitRWStart = virtmemReserve(JitMemSize);
MemoryInfo info = {0};
u32 pageInfo = {0};
int i = 0;
while (JitRXStart != NULL)
{
svcQueryMemory(&info, &pageInfo, (u64)JitRXStart);
if (info.type != MemType_Unmapped)
JitRXStart = (void*)((u8*)info.addr - JitMemSize - 0x1000);
else
break;
if (i++ > 8)
{
printf("couldn't find unmapped place for jit memory\n");
JitRXStart = NULL;
}
}
assert(JitRXStart != NULL);
bool succeded = R_SUCCEEDED(svcMapProcessCodeMemory(envGetOwnProcessHandle(), (u64)JitRXStart, (u64)JitRWBase, JitMemSize));
assert(succeded);
succeded = R_SUCCEEDED(svcSetProcessMemoryPermission(envGetOwnProcessHandle(), (u64)JitRXStart, JitMemSize, Perm_Rx));
assert(succeded);
succeded = R_SUCCEEDED(svcMapProcessMemory(JitRWStart, envGetOwnProcessHandle(), (u64)JitRXStart, JitMemSize));
assert(succeded);
SetCodeBase((u8*)JitRWStart, (u8*)JitRXStart);
JitMemUseableSize = JitMemSize;
Reset();
#endif
for (int i = 0; i < 3; i++)
{
for (int j = 0; j < 2; j++)
{
MemFunc9[i][j] = Gen_MemoryRoutine9(8 << i, j);
}
}
MemFunc7[0][0] = (void*)NDS::ARM7Read8;
MemFunc7[1][0] = (void*)NDS::ARM7Read16;
MemFunc7[2][0] = (void*)NDS::ARM7Read32;
MemFunc7[0][1] = (void*)NDS::ARM7Write8;
MemFunc7[1][1] = (void*)NDS::ARM7Write16;
MemFunc7[2][1] = (void*)NDS::ARM7Write32;
for (int i = 0; i < 2; i++)
{
for (int j = 0; j < 2; j++)
{
MemFuncsSeq9[i][j] = Gen_MemoryRoutine9Seq(i, j);
MemFuncsSeq7[i][j] = Gen_MemoryRoutine7Seq(i, j);
}
}
for (int i = 0; i < 3; i++)
{
JumpToFuncs9[i] = Gen_JumpTo9(i);
JumpToFuncs7[i] = Gen_JumpTo7(i);
}
/*
W0 - mode
W1 - reg num
W3 - in/out value of reg
*/
{
ReadBanked = GetRXPtr();
ADD(X2, RCPU, X1, ArithOption(X1, ST_LSL, 2));
CMP(W0, 0x11);
FixupBranch fiq = B(CC_EQ);
SUBS(W1, W1, 13 - 8);
ADD(X2, RCPU, X1, ArithOption(X1, ST_LSL, 2));
FixupBranch notEverything = B(CC_LT);
CMP(W0, 0x12);
FixupBranch irq = B(CC_EQ);
CMP(W0, 0x13);
FixupBranch svc = B(CC_EQ);
CMP(W0, 0x17);
FixupBranch abt = B(CC_EQ);
CMP(W0, 0x1B);
FixupBranch und = B(CC_EQ);
SetJumpTarget(notEverything);
RET();
SetJumpTarget(fiq);
LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_FIQ));
RET();
SetJumpTarget(irq);
LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_IRQ));
RET();
SetJumpTarget(svc);
LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_SVC));
RET();
SetJumpTarget(abt);
LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_ABT));
RET();
SetJumpTarget(und);
LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_UND));
RET();
}
{
WriteBanked = GetRXPtr();
ADD(X2, RCPU, X1, ArithOption(X1, ST_LSL, 2));
CMP(W0, 0x11);
FixupBranch fiq = B(CC_EQ);
SUBS(W1, W1, 13 - 8);
ADD(X2, RCPU, X1, ArithOption(X1, ST_LSL, 2));
FixupBranch notEverything = B(CC_LT);
CMP(W0, 0x12);
FixupBranch irq = B(CC_EQ);
CMP(W0, 0x13);
FixupBranch svc = B(CC_EQ);
CMP(W0, 0x17);
FixupBranch abt = B(CC_EQ);
CMP(W0, 0x1B);
FixupBranch und = B(CC_EQ);
SetJumpTarget(notEverything);
MOVI2R(W4, 0);
RET();
SetJumpTarget(fiq);
STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_FIQ));
MOVI2R(W4, 1);
RET();
SetJumpTarget(irq);
STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_IRQ));
MOVI2R(W4, 1);
RET();
SetJumpTarget(svc);
STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_SVC));
MOVI2R(W4, 1);
RET();
SetJumpTarget(abt);
STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_ABT));
MOVI2R(W4, 1);
RET();
SetJumpTarget(und);
STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_UND));
MOVI2R(W4, 1);
RET();
}
//FlushIcache();
JitMemUseableSize -= GetCodeOffset();
SetCodeBase((u8*)GetRWPtr(), (u8*)GetRXPtr());
}
Compiler::~Compiler()
{
#ifdef __SWITCH__
if (JitRWStart != NULL)
{
bool succeded = R_SUCCEEDED(svcUnmapProcessMemory(JitRWStart, envGetOwnProcessHandle(), (u64)JitRXStart, JitMemSize));
assert(succeded);
virtmemFree(JitRWStart, JitMemSize);
succeded = R_SUCCEEDED(svcUnmapProcessCodeMemory(envGetOwnProcessHandle(), (u64)JitRXStart, (u64)JitRWBase, JitMemSize));
assert(succeded);
free(JitRWBase);
}
#endif
}
void Compiler::LoadReg(int reg, ARM64Reg nativeReg)
{
if (reg == 15)
MOVI2R(nativeReg, R15);
else
LDR(INDEX_UNSIGNED, nativeReg, RCPU, offsetof(ARM, R[reg]));
}
void Compiler::SaveReg(int reg, ARM64Reg nativeReg)
{
STR(INDEX_UNSIGNED, nativeReg, RCPU, offsetof(ARM, R[reg]));
}
void Compiler::LoadCPSR()
{
assert(!CPSRDirty);
LDR(INDEX_UNSIGNED, RCPSR, RCPU, offsetof(ARM, CPSR));
}
void Compiler::SaveCPSR(bool markClean)
{
if (CPSRDirty)
{
STR(INDEX_UNSIGNED, RCPSR, RCPU, offsetof(ARM, CPSR));
CPSRDirty = CPSRDirty && !markClean;
}
}
FixupBranch Compiler::CheckCondition(u32 cond)
{
if (cond >= 0x8)
{
LSR(W1, RCPSR, 28);
MOVI2R(W2, 1);
LSLV(W2, W2, W1);
ANDI2R(W2, W2, ARM::ConditionTable[cond], W3);
return CBZ(W2);
}
else
{
u8 bit = (28 + ((~(cond >> 1) & 1) << 1 | (cond >> 2 & 1) ^ (cond >> 1 & 1)));
if (cond & 1)
return TBNZ(RCPSR, bit);
else
return TBZ(RCPSR, bit);
}
}
#define F(x) &Compiler::A_Comp_##x
const Compiler::CompileFunc A_Comp[ARMInstrInfo::ak_Count] =
{
// AND
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// EOR
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// SUB
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// RSB
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// ADD
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// ADC
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// SBC
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// RSC
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// ORR
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// MOV
F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp),
F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp),
// BIC
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// MVN
F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp),
F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp),
// TST
F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp),
// TEQ
F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp),
// CMP
F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp),
// CMN
F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp),
// Mul
F(Mul), F(Mul), F(Mul_Long), F(Mul_Long), F(Mul_Long), F(Mul_Long), NULL, NULL, NULL, NULL, NULL,
// ARMv5 exclusives
F(Clz), NULL, NULL, NULL, NULL,
// STR
F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB),
// STRB
F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB),
// LDR
F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB),
// LDRB
F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB),
// STRH
F(MemHD), F(MemHD), F(MemHD), F(MemHD),
// LDRD
NULL, NULL, NULL, NULL,
// STRD
NULL, NULL, NULL, NULL,
// LDRH
F(MemHD), F(MemHD), F(MemHD), F(MemHD),
// LDRSB
F(MemHD), F(MemHD), F(MemHD), F(MemHD),
// LDRSH
F(MemHD), F(MemHD), F(MemHD), F(MemHD),
// Swap
NULL, NULL,
// LDM, STM
F(LDM_STM), F(LDM_STM),
// Branch
F(BranchImm), F(BranchImm), F(BranchImm), F(BranchXchangeReg), F(BranchXchangeReg),
// Special
NULL, NULL, NULL, NULL, NULL, NULL, NULL
};
#undef F
#define F(x) &Compiler::T_Comp_##x
const Compiler::CompileFunc T_Comp[ARMInstrInfo::tk_Count] =
{
// Shift imm
F(ShiftImm), F(ShiftImm), F(ShiftImm),
// Add/sub tri operand
F(AddSub_), F(AddSub_), F(AddSub_), F(AddSub_),
// 8 bit imm
F(ALUImm8), F(ALUImm8), F(ALUImm8), F(ALUImm8),
// ALU
F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU),
F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU),
// ALU hi reg
F(ALU_HiReg), F(ALU_HiReg), F(ALU_HiReg),
// PC/SP relative ops
F(RelAddr), F(RelAddr), F(AddSP),
// LDR PC rel
F(LoadPCRel),
// LDR/STR reg offset
F(MemReg), F(MemReg), F(MemReg), F(MemReg),
// LDR/STR sign extended, half
F(MemRegHalf), F(MemRegHalf), F(MemRegHalf), F(MemRegHalf),
// LDR/STR imm offset
F(MemImm), F(MemImm), F(MemImm), F(MemImm),
// LDR/STR half imm offset
F(MemImmHalf), F(MemImmHalf),
// LDR/STR sp rel
F(MemSPRel), F(MemSPRel),
// PUSH/POP
F(PUSH_POP), F(PUSH_POP),
// LDMIA, STMIA
F(LDMIA_STMIA), F(LDMIA_STMIA),
// Branch
F(BCOND), F(BranchXchangeReg), F(BranchXchangeReg), F(B), F(BL_LONG_1), F(BL_LONG_2),
// Unk, SVC
NULL, NULL,
F(BL_Merged)
};
bool Compiler::CanCompile(bool thumb, u16 kind)
{
return (thumb ? T_Comp[kind] : A_Comp[kind]) != NULL;
}
void Compiler::Comp_BranchSpecialBehaviour()
{
if (CurInstr.BranchFlags & branch_IdleBranch)
{
MOVI2R(W0, 1);
STRB(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, IdleLoop));
}
if (CurInstr.BranchFlags & branch_FollowCondNotTaken)
{
SaveCPSR(false);
RegCache.PrepareExit();
ADD(W0, RCycles, ConstantCycles);
ABI_PopRegisters(SavedRegs);
RET();
}
}
JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount)
{
if (JitMemUseableSize - GetCodeOffset() < 1024 * 16)
{
printf("JIT memory full, resetting...\n");
ResetBlockCache();
}
JitBlockEntry res = (JitBlockEntry)GetRXPtr();
Thumb = thumb;
Num = cpu->Num;
CurCPU = cpu;
ConstantCycles = 0;
RegCache = RegisterCache<Compiler, ARM64Reg>(this, instrs, instrsCount, true);
//printf("compiling block at %x\n", R15 - (Thumb ? 2 : 4));
const u32 ALL_CALLEE_SAVED = 0x7FF80000;
SavedRegs = BitSet32((RegCache.GetPushRegs() | BitSet32(0x78000000)) & BitSet32(ALL_CALLEE_SAVED));
//if (Num == 1)
{
ABI_PushRegisters(SavedRegs);
MOVP2R(RCPU, CurCPU);
MOVI2R(RCycles, 0);
LoadCPSR();
}
for (int i = 0; i < instrsCount; i++)
{
CurInstr = instrs[i];
R15 = CurInstr.Addr + (Thumb ? 4 : 8);
CodeRegion = R15 >> 24;
CompileFunc comp = Thumb
? T_Comp[CurInstr.Info.Kind]
: A_Comp[CurInstr.Info.Kind];
Exit = i == (instrsCount - 1) || (CurInstr.BranchFlags & branch_FollowCondNotTaken);
//printf("%x instr %x regs: r%x w%x n%x flags: %x %x %x\n", R15, CurInstr.Instr, CurInstr.Info.SrcRegs, CurInstr.Info.DstRegs, CurInstr.Info.ReadFlags, CurInstr.Info.NotStrictlyNeeded, CurInstr.Info.WriteFlags, CurInstr.SetFlags);
bool isConditional = Thumb ? CurInstr.Info.Kind == ARMInstrInfo::tk_BCOND : CurInstr.Cond() < 0xE;
if (comp == NULL || (CurInstr.BranchFlags & branch_FollowCondTaken) || (i == instrsCount - 1 && (!CurInstr.Info.Branches() || isConditional)))
{
MOVI2R(W0, R15);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, R[15]));
if (comp == NULL)
{
MOVI2R(W0, CurInstr.Instr);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CurInstr));
}
if (Num == 0)
{
MOVI2R(W0, (s32)CurInstr.CodeCycles);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CodeCycles));
}
}
if (comp == NULL)
{
SaveCPSR();
RegCache.Flush();
}
else
RegCache.Prepare(Thumb, i);
if (Thumb)
{
if (comp == NULL)
{
MOV(X0, RCPU);
QuickCallFunction(X1, InterpretTHUMB[CurInstr.Info.Kind]);
}
else
(this->*comp)();
}
else
{
u32 cond = CurInstr.Cond();
if (CurInstr.Info.Kind == ARMInstrInfo::ak_BLX_IMM)
{
if (comp)
(this->*comp)();
else
{
MOV(X0, RCPU);
QuickCallFunction(X1, ARMInterpreter::A_BLX_IMM);
}
}
else if (cond == 0xF)
Comp_AddCycles_C();
else
{
IrregularCycles = false;
FixupBranch skipExecute;
if (cond < 0xE)
skipExecute = CheckCondition(cond);
if (comp == NULL)
{
MOV(X0, RCPU);
QuickCallFunction(X1, InterpretARM[CurInstr.Info.Kind]);
}
else
{
(this->*comp)();
}
Comp_BranchSpecialBehaviour();
if (cond < 0xE)
{
if (IrregularCycles)
{
FixupBranch skipNop = B();
SetJumpTarget(skipExecute);
Comp_AddCycles_C();
if (CurInstr.BranchFlags & branch_FollowCondTaken)
{
SaveCPSR(false);
RegCache.PrepareExit();
ADD(W0, RCycles, ConstantCycles);
ABI_PopRegisters(SavedRegs);
RET();
}
SetJumpTarget(skipNop);
}
else
SetJumpTarget(skipExecute);
}
}
}
if (comp == NULL)
LoadCPSR();
}
RegCache.Flush();
//if (Num == 1)
{
SaveCPSR();
ADD(W0, RCycles, ConstantCycles);
ABI_PopRegisters(SavedRegs);
}
//else
// ADD(RCycles, RCycles, ConstantCycles);
RET();
FlushIcache();
//printf("finished\n");
return res;
}
void Compiler::Reset()
{
SetCodePtr(0);
const u32 brk_0 = 0xD4200000;
for (int i = 0; i < JitMemUseableSize / 4; i++)
*(((u32*)GetRWPtr()) + i) = brk_0;
}
void Compiler::Comp_AddCycles_C(bool nonConst)
{
s32 cycles = Num ?
NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 1 : 3]
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles);
if (!nonConst && !CurInstr.Info.Branches())
ConstantCycles += cycles;
else
ADD(RCycles, RCycles, cycles);
}
void Compiler::Comp_AddCycles_CI(u32 numI)
{
s32 cycles = (Num ?
NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + numI;
if (Thumb || CurInstr.Cond() >= 0xE)
ConstantCycles += cycles;
else
ADD(RCycles, RCycles, cycles);
}
void Compiler::Comp_AddCycles_CI(u32 c, ARM64Reg numI, ArithOption shift)
{
s32 cycles = (Num ?
NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + c;
ADD(RCycles, RCycles, numI, shift);
if (Thumb || CurInstr.Cond() >= 0xE)
ConstantCycles += c;
else
ADD(RCycles, RCycles, cycles);
}
void Compiler::Comp_AddCycles_CDI()
{
if (Num == 0)
Comp_AddCycles_CD();
else
{
IrregularCycles = true;
s32 cycles;
s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
s32 numD = CurInstr.DataCycles;
if (CurInstr.DataRegion == 0x02) // mainRAM
{
if (CodeRegion == 0x02)
cycles = numC + numD;
else
{
numC++;
cycles = std::max(numC + numD - 3, std::max(numC, numD));
}
}
else if (CodeRegion == 0x02)
{
numD++;
cycles = std::max(numC + numD - 3, std::max(numC, numD));
}
else
{
cycles = numC + numD + 1;
}
if (!Thumb && CurInstr.Cond() < 0xE)
ADD(RCycles, RCycles, cycles);
else
ConstantCycles += cycles;
}
}
void Compiler::Comp_AddCycles_CD()
{
u32 cycles = 0;
if (Num == 0)
{
s32 numC = (R15 & 0x2) ? 0 : CurInstr.CodeCycles;
s32 numD = CurInstr.DataCycles;
//if (DataRegion != CodeRegion)
cycles = std::max(numC + numD - 6, std::max(numC, numD));
IrregularCycles = cycles != numC;
}
else
{
s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
s32 numD = CurInstr.DataCycles;
if (CurInstr.DataRegion == 0x02)
{
if (CodeRegion == 0x02)
cycles += numC + numD;
else
cycles += std::max(numC + numD - 3, std::max(numC, numD));
}
else if (CodeRegion == 0x02)
{
cycles += std::max(numC + numD - 3, std::max(numC, numD));
}
else
{
cycles += numC + numD;
}
IrregularCycles = true;
}
if ((!Thumb && CurInstr.Cond() < 0xE) && IrregularCycles)
ADD(RCycles, RCycles, cycles);
else
ConstantCycles += cycles;
}
}

View File

@ -0,0 +1,234 @@
#ifndef ARMJIT_COMPILER_H
#define ARMJIT_COMPILER_H
#include "../ARM.h"
#include "../ARMJIT.h"
#include "../dolphin/Arm64Emitter.h"
#include "../ARMJIT_Internal.h"
#include "../ARMJIT_RegisterCache.h"
namespace ARMJIT
{
const Arm64Gen::ARM64Reg RCPSR = Arm64Gen::W27;
const Arm64Gen::ARM64Reg RCycles = Arm64Gen::W28;
const Arm64Gen::ARM64Reg RCPU = Arm64Gen::X29;
struct Op2
{
Op2()
{}
Op2(Arm64Gen::ARM64Reg rm) : IsImm(false)
{
Reg.Rm = rm;
Reg.ShiftType = Arm64Gen::ST_LSL;
Reg.ShiftAmount = 0;
}
Op2(u32 imm) : IsImm(true), Imm(imm)
{}
Op2(Arm64Gen::ARM64Reg rm, Arm64Gen::ShiftType st, int amount) : IsImm(false)
{
Reg.Rm = rm;
Reg.ShiftType = st;
Reg.ShiftAmount = amount;
}
Arm64Gen::ArithOption ToArithOption()
{
assert(!IsImm);
return Arm64Gen::ArithOption(Reg.Rm, Reg.ShiftType, Reg.ShiftAmount);
}
bool IsSimpleReg()
{ return !IsImm && !Reg.ShiftAmount && Reg.ShiftType == Arm64Gen::ST_LSL; }
bool ImmFits12Bit()
{ return IsImm && (Imm & 0xFFF == Imm); }
bool IsZero()
{ return IsImm && !Imm; }
bool IsImm;
union
{
struct
{
Arm64Gen::ARM64Reg Rm;
Arm64Gen::ShiftType ShiftType;
int ShiftAmount;
} Reg;
u32 Imm;
};
};
class Compiler : Arm64Gen::ARM64XEmitter
{
public:
typedef void (Compiler::*CompileFunc)();
Compiler();
~Compiler();
Arm64Gen::ARM64Reg MapReg(int reg)
{
assert(RegCache.Mapping[reg] != Arm64Gen::INVALID_REG);
return RegCache.Mapping[reg];
}
JitBlockEntry CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount);
bool CanCompile(bool thumb, u16 kind);
bool FlagsNZNeeded()
{
return CurInstr.SetFlags & 0xC;
}
void Reset();
void Comp_AddCycles_C(bool forceNonConst = false);
void Comp_AddCycles_CI(u32 numI);
void Comp_AddCycles_CI(u32 c, Arm64Gen::ARM64Reg numI, Arm64Gen::ArithOption shift);
void Comp_AddCycles_CD();
void Comp_AddCycles_CDI();
void MovePC();
void LoadReg(int reg, Arm64Gen::ARM64Reg nativeReg);
void SaveReg(int reg, Arm64Gen::ARM64Reg nativeReg);
void LoadCPSR();
void SaveCPSR(bool markClean = true);
void A_Comp_ALUTriOp();
void A_Comp_ALUMovOp();
void A_Comp_ALUCmpOp();
void A_Comp_Mul();
void A_Comp_Mul_Long();
void A_Comp_Clz();
void A_Comp_MemWB();
void A_Comp_MemHD();
void A_Comp_LDM_STM();
void A_Comp_BranchImm();
void A_Comp_BranchXchangeReg();
void T_Comp_ShiftImm();
void T_Comp_AddSub_();
void T_Comp_ALUImm8();
void T_Comp_ALU();
void T_Comp_ALU_HiReg();
void T_Comp_AddSP();
void T_Comp_RelAddr();
void T_Comp_MemReg();
void T_Comp_MemImm();
void T_Comp_MemRegHalf();
void T_Comp_MemImmHalf();
void T_Comp_LoadPCRel();
void T_Comp_MemSPRel();
void T_Comp_LDMIA_STMIA();
void T_Comp_PUSH_POP();
void T_Comp_BCOND();
void T_Comp_B();
void T_Comp_BranchXchangeReg();
void T_Comp_BL_LONG_1();
void T_Comp_BL_LONG_2();
void T_Comp_BL_Merged();
s32 Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode);
void Comp_Mul_Mla(bool S, bool mla, Arm64Gen::ARM64Reg rd, Arm64Gen::ARM64Reg rm, Arm64Gen::ARM64Reg rs, Arm64Gen::ARM64Reg rn);
void Comp_Compare(int op, Arm64Gen::ARM64Reg rn, Op2 op2);
void Comp_Logical(int op, bool S, Arm64Gen::ARM64Reg rd, Arm64Gen::ARM64Reg rn, Op2 op2);
void Comp_Arithmetic(int op, bool S, Arm64Gen::ARM64Reg rd, Arm64Gen::ARM64Reg rn, Op2 op2);
void Comp_RetriveFlags(bool retriveCV);
Arm64Gen::FixupBranch CheckCondition(u32 cond);
void Comp_JumpTo(Arm64Gen::ARM64Reg addr, bool switchThumb, bool restoreCPSR = false);
void Comp_JumpTo(u32 addr, bool forceNonConstantCycles = false);
void A_Comp_GetOp2(bool S, Op2& op2);
void Comp_RegShiftImm(int op, int amount, bool S, Op2& op2, Arm64Gen::ARM64Reg tmp = Arm64Gen::W0);
void Comp_RegShiftReg(int op, bool S, Op2& op2, Arm64Gen::ARM64Reg rs);
void Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr);
enum
{
memop_Writeback = 1 << 0,
memop_Post = 1 << 1,
memop_SignExtend = 1 << 2,
memop_Store = 1 << 3,
memop_SubtractOffset = 1 << 4
};
void Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags);
void* Gen_MemoryRoutine9(int size, bool store);
void* Gen_MemoryRoutine9Seq(bool store, bool preinc);
void* Gen_MemoryRoutine7Seq(bool store, bool preinc);
// 0 = switch mode, 1 = stay arm, 2 = stay thumb
void* Gen_JumpTo9(int kind);
void* Gen_JumpTo7(int kind);
void Comp_BranchSpecialBehaviour();
bool Exit;
FetchedInstr CurInstr;
bool Thumb;
u32 R15;
u32 Num;
ARM* CurCPU;
u32 ConstantCycles;
u32 CodeRegion;
BitSet32 SavedRegs;
u32 JitMemUseableSize;
void* ReadBanked, *WriteBanked;
// [size][store]
void* MemFunc9[3][2];
void* MemFunc7[3][2];
// [store][pre increment]
void* MemFuncsSeq9[2][2];
// "[code in main ram]
void* MemFuncsSeq7[2][2];
void* JumpToFuncs9[3];
void* JumpToFuncs7[3];
RegisterCache<Compiler, Arm64Gen::ARM64Reg> RegCache;
bool CPSRDirty = false;
bool IrregularCycles = false;
#ifdef __SWITCH__
void* JitRWBase;
void* JitRWStart;
void* JitRXStart;
#endif
};
}
#endif

View File

@ -0,0 +1,848 @@
#include "ARMJIT_Compiler.h"
#include "../Config.h"
using namespace Arm64Gen;
namespace ARMJIT
{
// W0 - address
// (if store) W1 - value to store
// W2 - code cycles
void* Compiler::Gen_MemoryRoutine9(int size, bool store)
{
AlignCode16();
void* res = GetRXPtr();
u32 addressMask;
switch (size)
{
case 32: addressMask = ~3; break;
case 16: addressMask = ~1; break;
case 8: addressMask = ~0; break;
}
LDR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARMv5, DTCMBase));
LDR(INDEX_UNSIGNED, W4, RCPU, offsetof(ARMv5, DTCMSize));
SUB(W3, W0, W3);
CMP(W3, W4);
FixupBranch insideDTCM = B(CC_LO);
UBFX(W4, W0, 24, 8);
CMP(W4, 0x02);
FixupBranch outsideMainRAM = B(CC_NEQ);
ANDI2R(W3, W0, addressMask & (MAIN_RAM_SIZE - 1));
MOVP2R(X4, NDS::MainRAM);
if (!store && size == 32)
{
LDR(W3, X3, X4);
ANDI2R(W0, W0, 3);
LSL(W0, W0, 3);
RORV(W0, W3, W0);
}
else if (store)
STRGeneric(size, W1, X3, X4);
else
LDRGeneric(size, false, W0, X3, X4);
RET();
SetJumpTarget(outsideMainRAM);
LDR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARMv5, ITCMSize));
CMP(W0, W3);
FixupBranch insideITCM = B(CC_LO);
if (store)
{
if (size > 8)
ANDI2R(W0, W0, addressMask);
switch (size)
{
case 32: QuickTailCall(X4, NDS::ARM9Write32); break;
case 16: QuickTailCall(X4, NDS::ARM9Write16); break;
case 8: QuickTailCall(X4, NDS::ARM9Write8); break;
}
}
else
{
if (size == 32)
ABI_PushRegisters({0, 30});
if (size > 8)
ANDI2R(W0, W0, addressMask);
switch (size)
{
case 32: QuickCallFunction(X4, NDS::ARM9Read32); break;
case 16: QuickTailCall (X4, NDS::ARM9Read16); break;
case 8: QuickTailCall (X4, NDS::ARM9Read8 ); break;
}
if (size == 32)
{
ABI_PopRegisters({1, 30});
ANDI2R(W1, W1, 3);
LSL(W1, W1, 3);
RORV(W0, W0, W1);
RET();
}
}
SetJumpTarget(insideDTCM);
ANDI2R(W3, W3, 0x3FFF & addressMask);
ADDI2R(W3, W3, offsetof(ARMv5, DTCM), W4);
if (!store && size == 32)
{
ANDI2R(W4, W0, 3);
LDR(W0, RCPU, W3);
LSL(W4, W4, 3);
RORV(W0, W0, W4);
}
else if (store)
STRGeneric(size, W1, RCPU, W3);
else
LDRGeneric(size, false, W0, RCPU, W3);
RET();
SetJumpTarget(insideITCM);
ANDI2R(W3, W0, 0x7FFF & addressMask);
if (store)
{
LSR(W0, W3, 8);
ADDI2R(W0, W0, ExeMemRegionOffsets[exeMem_ITCM], W4);
MOVP2R(X4, CodeRanges);
ADD(X4, X4, X0, ArithOption(X0, ST_LSL, 4));
static_assert(sizeof(AddressRange) == 16);
LDR(INDEX_UNSIGNED, W4, X4, offsetof(AddressRange, Blocks.Length));
FixupBranch null = CBZ(W4);
ABI_PushRegisters({1, 3, 30});
QuickCallFunction(X4, InvalidateByAddr);
ABI_PopRegisters({1, 3, 30});
SetJumpTarget(null);
}
ADDI2R(W3, W3, offsetof(ARMv5, ITCM), W4);
if (!store && size == 32)
{
ANDI2R(W4, W0, 3);
LDR(W0, RCPU, W3);
LSL(W4, W4, 3);
RORV(W0, W0, W4);
}
else if (store)
STRGeneric(size, W1, RCPU, W3);
else
LDRGeneric(size, false, W0, RCPU, W3);
RET();
return res;
}
/*
W0 - base address
X1 - stack space
W2 - values count
*/
void* Compiler::Gen_MemoryRoutine9Seq(bool store, bool preinc)
{
AlignCode16();
void* res = GetRXPtr();
void* loopStart = GetRXPtr();
SUB(W2, W2, 1);
if (preinc)
ADD(W0, W0, 4);
LDR(INDEX_UNSIGNED, W4, RCPU, offsetof(ARMv5, DTCMBase));
LDR(INDEX_UNSIGNED, W5, RCPU, offsetof(ARMv5, DTCMSize));
SUB(W4, W0, W4);
CMP(W4, W5);
FixupBranch insideDTCM = B(CC_LO);
LDR(INDEX_UNSIGNED, W4, RCPU, offsetof(ARMv5, ITCMSize));
CMP(W0, W4);
FixupBranch insideITCM = B(CC_LO);
ABI_PushRegisters({0, 1, 2, 30}); // TODO: move SP only once
if (store)
{
LDR(X1, X1, ArithOption(X2, true));
QuickCallFunction(X4, NDS::ARM9Write32);
ABI_PopRegisters({0, 1, 2, 30});
}
else
{
QuickCallFunction(X4, NDS::ARM9Read32);
MOV(W4, W0);
ABI_PopRegisters({0, 1, 2, 30});
STR(X4, X1, ArithOption(X2, true));
}
if (!preinc)
ADD(W0, W0, 4);
CBNZ(W2, loopStart);
RET();
SetJumpTarget(insideDTCM);
ANDI2R(W4, W4, ~3 & 0x3FFF);
ADDI2R(X4, X4, offsetof(ARMv5, DTCM));
if (store)
{
LDR(X5, X1, ArithOption(X2, true));
STR(W5, RCPU, X4);
}
else
{
LDR(W5, RCPU, X4);
STR(X5, X1, ArithOption(X2, true));
}
if (!preinc)
ADD(W0, W0, 4);
CBNZ(W2, loopStart);
RET();
SetJumpTarget(insideITCM);
ANDI2R(W4, W0, ~3 & 0x7FFF);
if (store)
{
LSR(W6, W4, 8);
ADDI2R(W6, W6, ExeMemRegionOffsets[exeMem_ITCM], W5);
MOVP2R(X5, CodeRanges);
ADD(X5, X5, X6, ArithOption(X6, ST_LSL, 4));
static_assert(sizeof(AddressRange) == 16);
LDR(INDEX_UNSIGNED, W5, X5, offsetof(AddressRange, Blocks.Length));
FixupBranch null = CBZ(W5);
ABI_PushRegisters({0, 1, 2, 4, 30});
MOV(W0, W6);
QuickCallFunction(X5, InvalidateByAddr);
ABI_PopRegisters({0, 1, 2, 4, 30});
SetJumpTarget(null);
}
ADDI2R(W4, W4, offsetof(ARMv5, ITCM), W5);
if (store)
{
LDR(X5, X1, ArithOption(X2, true));
STR(W5, RCPU, X4);
}
else
{
LDR(W5, RCPU, X4);
STR(X5, X1, ArithOption(X2, true));
}
if (!preinc)
ADD(W0, W0, 4);
CBNZ(W2, loopStart);
RET();
return res;
}
void* Compiler::Gen_MemoryRoutine7Seq(bool store, bool preinc)
{
AlignCode16();
void* res = GetRXPtr();
void* loopStart = GetRXPtr();
SUB(W2, W2, 1);
if (preinc)
ADD(W0, W0, 4);
ABI_PushRegisters({0, 1, 2, 30});
if (store)
{
LDR(X1, X1, ArithOption(X2, true));
QuickCallFunction(X4, NDS::ARM7Write32);
ABI_PopRegisters({0, 1, 2, 30});
}
else
{
QuickCallFunction(X4, NDS::ARM7Read32);
MOV(W4, W0);
ABI_PopRegisters({0, 1, 2, 30});
STR(X4, X1, ArithOption(X2, true));
}
if (!preinc)
ADD(W0, W0, 4);
CBNZ(W2, loopStart);
RET();
return res;
}
void Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr)
{
u32 val;
// make sure arm7 bios is accessible
u32 tmpR15 = CurCPU->R[15];
CurCPU->R[15] = R15;
if (size == 32)
{
CurCPU->DataRead32(addr & ~0x3, &val);
val = ROR(val, (addr & 0x3) << 3);
}
else if (size == 16)
{
CurCPU->DataRead16(addr & ~0x1, &val);
if (signExtend)
val = ((s32)val << 16) >> 16;
}
else
{
CurCPU->DataRead8(addr, &val);
if (signExtend)
val = ((s32)val << 24) >> 24;
}
CurCPU->R[15] = tmpR15;
MOVI2R(MapReg(rd), val);
if (Thumb || CurInstr.Cond() == 0xE)
RegCache.PutLiteral(rd, val);
}
void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags)
{
u32 addressMask = ~0;
if (size == 32)
addressMask = ~3;
if (size == 16)
addressMask = ~1;
if (flags & memop_Store)
Comp_AddCycles_CD();
else
Comp_AddCycles_CDI();
if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && offset.IsImm && !(flags & (memop_Post|memop_Store|memop_Writeback)))
{
u32 addr = R15 + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
u32 translatedAddr = Num == 0 ? TranslateAddr<0>(addr) : TranslateAddr<1>(addr);
if (!(CodeRanges[translatedAddr / 512].InvalidLiterals & (1 << ((translatedAddr & 0x1FF) / 16))))
{
Comp_MemLoadLiteral(size, flags & memop_SignExtend, rd, addr);
return;
}
}
{
ARM64Reg rdMapped = MapReg(rd);
ARM64Reg rnMapped = MapReg(rn);
bool inlinePreparation = Num == 1;
u32 constLocalROR32 = 4;
void* memFunc = Num == 0
? MemFunc9[size >> 4][!!(flags & memop_Store)]
: MemFunc7[size >> 4][!!((flags & memop_Store))];
if (Config::JIT_LiteralOptimisations && (rd != 15 || (flags & memop_Store)) && offset.IsImm && RegCache.IsLiteral(rn))
{
u32 addr = RegCache.LiteralValues[rn] + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
NDS::MemRegion region;
region.Mem = NULL;
if (Num == 0)
{
ARMv5* cpu5 = (ARMv5*)CurCPU;
// stupid dtcm...
if (addr >= cpu5->DTCMBase && addr < (cpu5->DTCMBase + cpu5->DTCMSize))
{
region.Mem = cpu5->DTCM;
region.Mask = 0x3FFF;
}
else
{
NDS::ARM9GetMemRegion(addr, flags & memop_Store, &region);
}
}
else
NDS::ARM7GetMemRegion(addr, flags & memop_Store, &region);
if (region.Mem != NULL)
{
void* ptr = &region.Mem[addr & addressMask & region.Mask];
MOVP2R(X0, ptr);
if (flags & memop_Store)
STRGeneric(size, INDEX_UNSIGNED, rdMapped, X0, 0);
else
{
LDRGeneric(size, flags & memop_SignExtend, INDEX_UNSIGNED, rdMapped, X0, 0);
if (size == 32 && addr & ~0x3)
ROR_(rdMapped, rdMapped, (addr & 0x3) << 3);
}
return;
}
void* specialFunc = GetFuncForAddr(CurCPU, addr, flags & memop_Store, size);
if (specialFunc)
{
memFunc = specialFunc;
inlinePreparation = true;
constLocalROR32 = addr & 0x3;
}
}
ARM64Reg finalAddr = W0;
if (flags & memop_Post)
{
finalAddr = rnMapped;
MOV(W0, rnMapped);
}
if (flags & memop_Store)
MOV(W1, rdMapped);
if (!offset.IsImm)
Comp_RegShiftImm(offset.Reg.ShiftType, offset.Reg.ShiftAmount, false, offset, W2);
// offset might become an immediate
if (offset.IsImm)
{
if (flags & memop_SubtractOffset)
SUB(finalAddr, rnMapped, offset.Imm);
else
ADD(finalAddr, rnMapped, offset.Imm);
}
else
{
if (offset.Reg.ShiftType == ST_ROR)
{
ROR_(W0, offset.Reg.Rm, offset.Reg.ShiftAmount);
offset = Op2(W0);
}
if (flags & memop_SubtractOffset)
SUB(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption());
else
ADD(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption());
}
if (!(flags & memop_Post) && (flags & memop_Writeback))
MOV(rnMapped, W0);
if (inlinePreparation)
{
if (size == 32 && !(flags & memop_Store) && constLocalROR32 == 4)
ANDI2R(rdMapped, W0, 3);
if (size > 8)
ANDI2R(W0, W0, addressMask);
}
QuickCallFunction(X2, memFunc);
if (!(flags & memop_Store))
{
if (inlinePreparation && !(flags & memop_Store) && size == 32)
{
if (constLocalROR32 == 4)
{
LSL(rdMapped, rdMapped, 3);
RORV(rdMapped, W0, rdMapped);
}
else if (constLocalROR32 > 0)
ROR_(rdMapped, W0, constLocalROR32 << 3);
else
MOV(rdMapped, W0);
}
else if (flags & memop_SignExtend)
{
if (size == 16)
SXTH(rdMapped, W0);
else if (size == 8)
SXTB(rdMapped, W0);
else
assert("What's wrong with you?");
}
else
MOV(rdMapped, W0);
if (CurInstr.Info.Branches())
{
if (size < 32)
printf("LDR size < 32 branching?\n");
Comp_JumpTo(rdMapped, Num == 0, false);
}
}
}
}
void Compiler::A_Comp_MemWB()
{
Op2 offset;
if (CurInstr.Instr & (1 << 25))
offset = Op2(MapReg(CurInstr.A_Reg(0)), (ShiftType)((CurInstr.Instr >> 5) & 0x3), (CurInstr.Instr >> 7) & 0x1F);
else
offset = Op2(CurInstr.Instr & 0xFFF);
bool load = CurInstr.Instr & (1 << 20);
bool byte = CurInstr.Instr & (1 << 22);
int flags = 0;
if (!load)
flags |= memop_Store;
if (!(CurInstr.Instr & (1 << 24)))
flags |= memop_Post;
if (CurInstr.Instr & (1 << 21))
flags |= memop_Writeback;
if (!(CurInstr.Instr & (1 << 23)))
flags |= memop_SubtractOffset;
Comp_MemAccess(CurInstr.A_Reg(12), CurInstr.A_Reg(16), offset, byte ? 8 : 32, flags);
}
void Compiler::A_Comp_MemHD()
{
bool load = CurInstr.Instr & (1 << 20);
bool signExtend;
int op = (CurInstr.Instr >> 5) & 0x3;
int size;
if (load)
{
signExtend = op >= 2;
size = op == 2 ? 8 : 16;
}
else
{
size = 16;
signExtend = false;
}
Op2 offset;
if (CurInstr.Instr & (1 << 22))
offset = Op2((CurInstr.Instr & 0xF) | ((CurInstr.Instr >> 4) & 0xF0));
else
offset = Op2(MapReg(CurInstr.A_Reg(0)));
int flags = 0;
if (signExtend)
flags |= memop_SignExtend;
if (!load)
flags |= memop_Store;
if (!(CurInstr.Instr & (1 << 24)))
flags |= memop_Post;
if (!(CurInstr.Instr & (1 << 23)))
flags |= memop_SubtractOffset;
if (CurInstr.Instr & (1 << 21))
flags |= memop_Writeback;
Comp_MemAccess(CurInstr.A_Reg(12), CurInstr.A_Reg(16), offset, size, flags);
}
void Compiler::T_Comp_MemReg()
{
int op = (CurInstr.Instr >> 10) & 0x3;
bool load = op & 0x2;
bool byte = op & 0x1;
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3),
Op2(MapReg(CurInstr.T_Reg(6))), byte ? 8 : 32, load ? 0 : memop_Store);
}
void Compiler::T_Comp_MemImm()
{
int op = (CurInstr.Instr >> 11) & 0x3;
bool load = op & 0x1;
bool byte = op & 0x2;
u32 offset = ((CurInstr.Instr >> 6) & 0x1F) * (byte ? 1 : 4);
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(offset),
byte ? 8 : 32, load ? 0 : memop_Store);
}
void Compiler::T_Comp_MemRegHalf()
{
int op = (CurInstr.Instr >> 10) & 0x3;
bool load = op != 0;
int size = op != 1 ? 16 : 8;
bool signExtend = op & 1;
int flags = 0;
if (signExtend)
flags |= memop_SignExtend;
if (!load)
flags |= memop_Store;
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(MapReg(CurInstr.T_Reg(6))),
size, flags);
}
void Compiler::T_Comp_MemImmHalf()
{
u32 offset = (CurInstr.Instr >> 5) & 0x3E;
bool load = CurInstr.Instr & (1 << 11);
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(offset), 16,
load ? 0 : memop_Store);
}
void Compiler::T_Comp_LoadPCRel()
{
u32 addr = (R15 & ~0x2) + ((CurInstr.Instr & 0xFF) << 2);
if (Config::JIT_LiteralOptimisations)
{
Comp_MemLoadLiteral(32, false, CurInstr.T_Reg(8), addr);
Comp_AddCycles_CDI();
}
else
{
bool negative = addr < R15;
u32 abs = negative ? R15 - addr : addr - R15;
Comp_MemAccess(CurInstr.T_Reg(8), 15, Op2(abs), 32, negative ? memop_SubtractOffset : 0);
}
}
void Compiler::T_Comp_MemSPRel()
{
u32 offset = (CurInstr.Instr & 0xFF) * 4;
bool load = CurInstr.Instr & (1 << 11);
Comp_MemAccess(CurInstr.T_Reg(8), 13, Op2(offset), 32, load ? 0 : memop_Store);
}
s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode)
{
IrregularCycles = true;
int regsCount = regs.Count();
if (regsCount == 0)
return 0; // actually not the right behaviour TODO: fix me
SUB(SP, SP, ((regsCount + 1) & ~1) * 8);
if (store)
{
Comp_AddCycles_CD();
if (usermode && (regs & BitSet16(0x7f00)))
UBFX(W0, RCPSR, 0, 5);
int i = regsCount - 1;
BitSet16::Iterator it = regs.begin();
while (it != regs.end())
{
BitSet16::Iterator nextReg = it;
nextReg++;
int reg = *it;
if (usermode && reg >= 8 && reg < 15)
{
if (RegCache.Mapping[reg] != INVALID_REG)
MOV(W3, MapReg(reg));
else
LoadReg(reg, W3);
MOVI2R(W1, reg - 8);
BL(ReadBanked);
STR(INDEX_UNSIGNED, W3, SP, i * 8);
}
else if (!usermode && nextReg != regs.end())
{
ARM64Reg first = W3;
ARM64Reg second = W4;
if (RegCache.Mapping[reg] != INVALID_REG)
first = MapReg(reg);
else
LoadReg(reg, W3);
if (RegCache.Mapping[*nextReg] != INVALID_REG)
second = MapReg(*nextReg);
else
LoadReg(*nextReg, W4);
STP(INDEX_SIGNED, EncodeRegTo64(second), EncodeRegTo64(first), SP, i * 8 - 8);
i--;
it++;
}
else if (RegCache.Mapping[reg] != INVALID_REG)
STR(INDEX_UNSIGNED, MapReg(reg), SP, i * 8);
else
{
LoadReg(reg, W3);
STR(INDEX_UNSIGNED, W3, SP, i * 8);
}
i--;
it++;
}
}
if (decrement)
{
SUB(W0, MapReg(rn), regsCount * 4);
preinc ^= true;
}
else
MOV(W0, MapReg(rn));
ADD(X1, SP, 0);
MOVI2R(W2, regsCount);
BL(Num ? MemFuncsSeq7[store][preinc] : MemFuncsSeq9[store][preinc]);
if (!store)
{
Comp_AddCycles_CDI();
if (usermode && (regs & BitSet16(0x7f00)))
UBFX(W0, RCPSR, 0, 5);
int i = regsCount - 1;
BitSet16::Iterator it = regs.begin();
while (it != regs.end())
{
BitSet16::Iterator nextReg = it;
nextReg++;
int reg = *it;
if (usermode && reg >= 8 && reg < 15)
{
LDR(INDEX_UNSIGNED, W3, SP, i * 8);
MOVI2R(W1, reg - 8);
BL(WriteBanked);
FixupBranch alreadyWritten = CBNZ(W4);
if (RegCache.Mapping[reg] != INVALID_REG)
{
MOV(MapReg(reg), W3);
RegCache.DirtyRegs |= 1 << reg;
}
else
SaveReg(reg, W3);
SetJumpTarget(alreadyWritten);
}
else if (!usermode && nextReg != regs.end())
{
ARM64Reg first = W3, second = W4;
if (RegCache.Mapping[reg] != INVALID_REG)
{
first = MapReg(reg);
if (reg != 15)
RegCache.DirtyRegs |= 1 << reg;
}
if (RegCache.Mapping[*nextReg] != INVALID_REG)
{
second = MapReg(*nextReg);
if (*nextReg != 15)
RegCache.DirtyRegs |= 1 << *nextReg;
}
LDP(INDEX_SIGNED, EncodeRegTo64(second), EncodeRegTo64(first), SP, i * 8 - 8);
if (first == W3)
SaveReg(reg, W3);
if (second == W4)
SaveReg(*nextReg, W4);
it++;
i--;
}
else if (RegCache.Mapping[reg] != INVALID_REG)
{
ARM64Reg mapped = MapReg(reg);
LDR(INDEX_UNSIGNED, mapped, SP, i * 8);
if (reg != 15)
RegCache.DirtyRegs |= 1 << reg;
}
else
{
LDR(INDEX_UNSIGNED, W3, SP, i * 8);
SaveReg(reg, W3);
}
it++;
i--;
}
}
ADD(SP, SP, ((regsCount + 1) & ~1) * 8);
if (!store && regs[15])
{
ARM64Reg mapped = MapReg(15);
Comp_JumpTo(mapped, Num == 0, usermode);
}
return regsCount * 4 * (decrement ? -1 : 1);
}
void Compiler::A_Comp_LDM_STM()
{
BitSet16 regs(CurInstr.Instr & 0xFFFF);
bool load = CurInstr.Instr & (1 << 20);
bool pre = CurInstr.Instr & (1 << 24);
bool add = CurInstr.Instr & (1 << 23);
bool writeback = CurInstr.Instr & (1 << 21);
bool usermode = CurInstr.Instr & (1 << 22);
ARM64Reg rn = MapReg(CurInstr.A_Reg(16));
s32 offset = Comp_MemAccessBlock(CurInstr.A_Reg(16), regs, !load, pre, !add, usermode);
if (load && writeback && regs[CurInstr.A_Reg(16)])
writeback = Num == 0
? (!(regs & ~BitSet16(1 << CurInstr.A_Reg(16)))) || (regs & ~BitSet16((2 << CurInstr.A_Reg(16)) - 1))
: false;
if (writeback)
{
if (offset > 0)
ADD(rn, rn, offset);
else
SUB(rn, rn, -offset);
}
}
void Compiler::T_Comp_PUSH_POP()
{
bool load = CurInstr.Instr & (1 << 11);
BitSet16 regs(CurInstr.Instr & 0xFF);
if (CurInstr.Instr & (1 << 8))
{
if (load)
regs[15] = true;
else
regs[14] = true;
}
ARM64Reg sp = MapReg(13);
s32 offset = Comp_MemAccessBlock(13, regs, !load, !load, !load, false);
if (offset > 0)
ADD(sp, sp, offset);
else
SUB(sp, sp, -offset);
}
void Compiler::T_Comp_LDMIA_STMIA()
{
BitSet16 regs(CurInstr.Instr & 0xFF);
ARM64Reg rb = MapReg(CurInstr.T_Reg(8));
bool load = CurInstr.Instr & (1 << 11);
u32 regsCount = regs.Count();
s32 offset = Comp_MemAccessBlock(CurInstr.T_Reg(8), regs, !load, false, false, false);
if (!load || !regs[CurInstr.T_Reg(8)])
{
if (offset > 0)
ADD(rb, rb, offset);
else
SUB(rb, rb, -offset);
}
}
}

View File

@ -2,6 +2,8 @@
#include <stdio.h>
#include "Config.h"
namespace ARMInstrInfo
{
@ -363,7 +365,11 @@ Info Decode(bool thumb, u32 num, u32 instr)
res.SpecialKind = special_WriteMem;
if (res.Kind == ARMInstrInfo::tk_LDR_PCREL)
{
if (!Config::JIT_LiteralOptimisations)
res.SrcRegs |= 1 << 15;
res.SpecialKind = special_LoadLiteral;
}
if (res.Kind == tk_LDMIA || res.Kind == tk_POP)
{
@ -417,7 +423,6 @@ Info Decode(bool thumb, u32 num, u32 instr)
u32 cp = ((instr >> 8) & 0xF);
if ((num == 0 && cp != 15) || (num == 1 && cp != 14))
{
printf("happens\n");
data = A_UNK;
res.Kind = ak_UNK;
}

View File

@ -60,10 +60,31 @@ if (ENABLE_JIT)
ARMJIT_x64/ARMJIT_Branch.cpp
dolphin/CommonFuncs.cpp
dolphin/x64ABI.cpp
dolphin/x64CPUDetect.cpp
dolphin/x64Emitter.cpp
)
if (ARCHITECTURE STREQUAL x86_64)
target_sources(core PRIVATE
dolphin/x64ABI.cpp
dolphin/x64CPUDetect.cpp
dolphin/x64Emitter.cpp
ARMJIT_x64/ARMJIT_Compiler.cpp
ARMJIT_x64/ARMJIT_ALU.cpp
ARMJIT_x64/ARMJIT_LoadStore.cpp
ARMJIT_x64/ARMJIT_Branch.cpp
)
endif()
if (ARCHITECTURE STREQUAL ARM64)
target_sources(core PRIVATE
dolphin/Arm64Emitter.cpp
dolphin/MathUtil.cpp
ARMJIT_A64/ARMJIT_Compiler.cpp
ARMJIT_A64/ARMJIT_ALU.cpp
ARMJIT_A64/ARMJIT_LoadStore.cpp
ARMJIT_A64/ARMJIT_Branch.cpp
)
endif()
endif()
if (WIN32)

24
src/dolphin/Align.h Normal file
View File

@ -0,0 +1,24 @@
// This file is under the public domain.
#pragma once
#include <cstddef>
#include <type_traits>
namespace Common
{
template <typename T>
constexpr T AlignUp(T value, size_t size)
{
static_assert(std::is_unsigned<T>(), "T must be an unsigned value.");
return static_cast<T>(value + (size - value % size) % size);
}
template <typename T>
constexpr T AlignDown(T value, size_t size)
{
static_assert(std::is_unsigned<T>(), "T must be an unsigned value.");
return static_cast<T>(value - value % size);
}
} // namespace Common

4466
src/dolphin/Arm64Emitter.cpp Normal file

File diff suppressed because it is too large Load Diff

1152
src/dolphin/Arm64Emitter.h Normal file

File diff suppressed because it is too large Load Diff

27
src/dolphin/ArmCommon.h Normal file
View File

@ -0,0 +1,27 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "../types.h"
enum CCFlags
{
CC_EQ = 0, // Equal
CC_NEQ, // Not equal
CC_CS, // Carry Set
CC_CC, // Carry Clear
CC_MI, // Minus (Negative)
CC_PL, // Plus
CC_VS, // Overflow
CC_VC, // No Overflow
CC_HI, // Unsigned higher
CC_LS, // Unsigned lower or same
CC_GE, // Signed greater than or equal
CC_LT, // Signed less than
CC_GT, // Signed greater than
CC_LE, // Signed less than or equal
CC_AL, // Always (unconditional) 14
CC_HS = CC_CS, // Alias of CC_CS Unsigned higher or same
CC_LO = CC_CC, // Alias of CC_CC Unsigned lower
};
const u32 NO_COND = 0xE0000000;

254
src/dolphin/BitUtils.h Normal file
View File

@ -0,0 +1,254 @@
// Copyright 2017 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <climits>
#include <cstddef>
#include <cstring>
#include <type_traits>
namespace Common
{
///
/// Retrieves the size of a type in bits.
///
/// @tparam T Type to get the size of.
///
/// @return the size of the type in bits.
///
template <typename T>
constexpr size_t BitSize() noexcept
{
return sizeof(T) * CHAR_BIT;
}
///
/// Extracts a bit from a value.
///
/// @param src The value to extract a bit from.
/// @param bit The bit to extract.
///
/// @tparam T The type of the value.
///
/// @return The extracted bit.
///
template <typename T>
constexpr T ExtractBit(const T src, const size_t bit) noexcept
{
return (src >> bit) & static_cast<T>(1);
}
///
/// Extracts a bit from a value.
///
/// @param src The value to extract a bit from.
///
/// @tparam bit The bit to extract.
/// @tparam T The type of the value.
///
/// @return The extracted bit.
///
template <size_t bit, typename T>
constexpr T ExtractBit(const T src) noexcept
{
static_assert(bit < BitSize<T>(), "Specified bit must be within T's bit width.");
return ExtractBit(src, bit);
}
///
/// Extracts a range of bits from a value.
///
/// @param src The value to extract the bits from.
/// @param begin The beginning of the bit range. This is inclusive.
/// @param end The ending of the bit range. This is inclusive.
///
/// @tparam T The type of the value.
/// @tparam Result The returned result type. This is the unsigned analog
/// of a signed type if a signed type is passed as T.
///
/// @return The extracted bits.
///
template <typename T, typename Result = std::make_unsigned_t<T>>
constexpr Result ExtractBits(const T src, const size_t begin, const size_t end) noexcept
{
return static_cast<Result>(((static_cast<Result>(src) << ((BitSize<T>() - 1) - end)) >>
(BitSize<T>() - end + begin - 1)));
}
///
/// Extracts a range of bits from a value.
///
/// @param src The value to extract the bits from.
///
/// @tparam begin The beginning of the bit range. This is inclusive.
/// @tparam end The ending of the bit range. This is inclusive.
/// @tparam T The type of the value.
/// @tparam Result The returned result type. This is the unsigned analog
/// of a signed type if a signed type is passed as T.
///
/// @return The extracted bits.
///
template <size_t begin, size_t end, typename T, typename Result = std::make_unsigned_t<T>>
constexpr Result ExtractBits(const T src) noexcept
{
static_assert(begin < end, "Beginning bit must be less than the ending bit.");
static_assert(begin < BitSize<T>(), "Beginning bit is larger than T's bit width.");
static_assert(end < BitSize<T>(), "Ending bit is larger than T's bit width.");
return ExtractBits<T, Result>(src, begin, end);
}
///
/// Rotates a value left (ROL).
///
/// @param value The value to rotate.
/// @param amount The number of bits to rotate the value.
/// @tparam T An unsigned type.
///
/// @return The rotated value.
///
template <typename T>
constexpr T RotateLeft(const T value, size_t amount) noexcept
{
static_assert(std::is_unsigned<T>(), "Can only rotate unsigned types left.");
amount %= BitSize<T>();
if (amount == 0)
return value;
return static_cast<T>((value << amount) | (value >> (BitSize<T>() - amount)));
}
///
/// Rotates a value right (ROR).
///
/// @param value The value to rotate.
/// @param amount The number of bits to rotate the value.
/// @tparam T An unsigned type.
///
/// @return The rotated value.
///
template <typename T>
constexpr T RotateRight(const T value, size_t amount) noexcept
{
static_assert(std::is_unsigned<T>(), "Can only rotate unsigned types right.");
amount %= BitSize<T>();
if (amount == 0)
return value;
return static_cast<T>((value >> amount) | (value << (BitSize<T>() - amount)));
}
///
/// Verifies whether the supplied value is a valid bit mask of the form 0b00...0011...11.
/// Both edge cases of all zeros and all ones are considered valid masks, too.
///
/// @param mask The mask value to test for validity.
///
/// @tparam T The type of the value.
///
/// @return A bool indicating whether the mask is valid.
///
template <typename T>
constexpr bool IsValidLowMask(const T mask) noexcept
{
static_assert(std::is_integral<T>::value, "Mask must be an integral type.");
static_assert(std::is_unsigned<T>::value, "Signed masks can introduce hard to find bugs.");
// Can be efficiently determined without looping or bit counting. It's the counterpart
// to https://graphics.stanford.edu/~seander/bithacks.html#DetermineIfPowerOf2
// and doesn't require special casing either edge case.
return (mask & (mask + 1)) == 0;
}
///
/// Reinterpret objects of one type as another by bit-casting between object representations.
///
/// @remark This is the example implementation of std::bit_cast which is to be included
/// in C++2a. See http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/p0476r2.html
/// for more details. The only difference is this variant is not constexpr,
/// as the mechanism for bit_cast requires a compiler built-in to have that quality.
///
/// @param source The source object to convert to another representation.
///
/// @tparam To The type to reinterpret source as.
/// @tparam From The initial type representation of source.
///
/// @return The representation of type From as type To.
///
/// @pre Both To and From types must be the same size
/// @pre Both To and From types must satisfy the TriviallyCopyable concept.
///
template <typename To, typename From>
inline To BitCast(const From& source) noexcept
{
static_assert(sizeof(From) == sizeof(To),
"BitCast source and destination types must be equal in size.");
static_assert(std::is_trivially_copyable<From>(),
"BitCast source type must be trivially copyable.");
static_assert(std::is_trivially_copyable<To>(),
"BitCast destination type must be trivially copyable.");
std::aligned_storage_t<sizeof(To), alignof(To)> storage;
std::memcpy(&storage, &source, sizeof(storage));
return reinterpret_cast<To&>(storage);
}
template <typename T, typename PtrType>
class BitCastPtrType
{
public:
static_assert(std::is_trivially_copyable<PtrType>(),
"BitCastPtr source type must be trivially copyable.");
static_assert(std::is_trivially_copyable<T>(),
"BitCastPtr destination type must be trivially copyable.");
explicit BitCastPtrType(PtrType* ptr) : m_ptr(ptr) {}
// Enable operator= only for pointers to non-const data
template <typename S>
inline typename std::enable_if<std::is_same<S, T>() && !std::is_const<PtrType>()>::type
operator=(const S& source)
{
std::memcpy(m_ptr, &source, sizeof(source));
}
inline operator T() const
{
T result;
std::memcpy(&result, m_ptr, sizeof(result));
return result;
}
private:
PtrType* m_ptr;
};
// Provides an aliasing-safe alternative to reinterpret_cast'ing pointers to structs
// Conversion constructor and operator= provided for a convenient syntax.
// Usage: MyStruct s = BitCastPtr<MyStruct>(some_ptr);
// BitCastPtr<MyStruct>(some_ptr) = s;
template <typename T, typename PtrType>
inline auto BitCastPtr(PtrType* ptr) noexcept -> BitCastPtrType<T, PtrType>
{
return BitCastPtrType<T, PtrType>{ptr};
}
template <typename T>
void SetBit(T& value, size_t bit_number, bool bit_value)
{
static_assert(std::is_unsigned<T>(), "SetBit is only sane on unsigned types.");
if (bit_value)
value |= (T{1} << bit_number);
else
value &= ~(T{1} << bit_number);
}
} // namespace Common

View File

@ -61,3 +61,15 @@
{ \
printf(fmt "\n", ## __VA_ARGS__); \
} while (false)
#if __cplusplus < 201703L
// cheat
namespace std
{
template <typename T>
T clamp(const T& v, const T& lo, const T& hi)
{
return v < lo ? lo : (v > hi ? hi : v);
}
}
#endif

13
src/dolphin/MathUtil.cpp Normal file
View File

@ -0,0 +1,13 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "MathUtil.h"
#include <numeric>
// Calculate sum of a float list
float MathFloatVectorSum(const std::vector<float>& Vec)
{
return std::accumulate(Vec.begin(), Vec.end(), 0.0f);
}

121
src/dolphin/MathUtil.h Normal file
View File

@ -0,0 +1,121 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <algorithm>
#include <vector>
#include "Compat.h"
#include "../types.h"
#ifdef _MSC_VER
#include <intrin.h>
#endif
namespace MathUtil
{
constexpr double TAU = 6.2831853071795865;
constexpr double PI = TAU / 2;
template <typename T>
constexpr auto Sign(const T& val) -> decltype((T{} < val) - (val < T{}))
{
return (T{} < val) - (val < T{});
}
template <typename T, typename F>
constexpr auto Lerp(const T& x, const T& y, const F& a) -> decltype(x + (y - x) * a)
{
return x + (y - x) * a;
}
template <typename T>
constexpr bool IsPow2(T imm)
{
return imm > 0 && (imm & (imm - 1)) == 0;
}
constexpr u32 NextPowerOf2(u32 value)
{
--value;
value |= value >> 1;
value |= value >> 2;
value |= value >> 4;
value |= value >> 8;
value |= value >> 16;
++value;
return value;
}
template <class T>
struct Rectangle
{
T left{};
T top{};
T right{};
T bottom{};
constexpr Rectangle() = default;
constexpr Rectangle(T theLeft, T theTop, T theRight, T theBottom)
: left(theLeft), top(theTop), right(theRight), bottom(theBottom)
{
}
constexpr bool operator==(const Rectangle& r) const
{
return left == r.left && top == r.top && right == r.right && bottom == r.bottom;
}
T GetWidth() const { return abs(right - left); }
T GetHeight() const { return abs(bottom - top); }
// If the rectangle is in a coordinate system with a lower-left origin, use
// this Clamp.
void ClampLL(T x1, T y1, T x2, T y2)
{
left = std::clamp(left, x1, x2);
right = std::clamp(right, x1, x2);
top = std::clamp(top, y2, y1);
bottom = std::clamp(bottom, y2, y1);
}
// If the rectangle is in a coordinate system with an upper-left origin,
// use this Clamp.
void ClampUL(T x1, T y1, T x2, T y2)
{
left = std::clamp(left, x1, x2);
right = std::clamp(right, x1, x2);
top = std::clamp(top, y1, y2);
bottom = std::clamp(bottom, y1, y2);
}
};
} // namespace MathUtil
float MathFloatVectorSum(const std::vector<float>&);
// Rounds down. 0 -> undefined
inline int IntLog2(u64 val)
{
#if defined(__GNUC__)
return 63 - __builtin_clzll(val);
#elif defined(_MSC_VER)
unsigned long result = ULONG_MAX;
_BitScanReverse64(&result, val);
return result;
#else
int result = -1;
while (val != 0)
{
val >>= 1;
++result;
}
return result;
#endif
}