JIT: implemented most ALU instructions

This commit is contained in:
RSDuck 2019-06-25 17:09:27 +02:00
parent c692287eba
commit 2f6b46fd4f
8 changed files with 881 additions and 166 deletions

View File

@ -560,10 +560,10 @@ void ARMv5::Execute()
AddCycles_C();
}*/
if (!ARMJIT::IsMapped(Num, R[15] - ((CPSR&0x20)?2:4)))
printf("aaarg ungempappter raum %x\n", R[15]);
/*if (!ARMJIT::IsMapped(0, R[15] - ((CPSR&0x20)?2:4)))
printf("aaarg ungempappter raum %x\n", R[15]);*/
ARMJIT::CompiledBlock block = ARMJIT::LookUpBlock(Num, R[15] - ((CPSR&0x20)?2:4));
ARMJIT::CompiledBlock block = ARMJIT::LookUpBlock(0, R[15] - ((CPSR&0x20)?2:4));
if (block == NULL)
block = ARMJIT::CompileBlock(this);
Cycles += block();
@ -615,7 +615,7 @@ void ARMv4::Execute()
while (NDS::ARM7Timestamp < NDS::ARM7Target)
{
if (CPSR & 0x20) // THUMB
/*if (CPSR & 0x20) // THUMB
{
// prefetch
R[15] += 2;
@ -643,7 +643,15 @@ void ARMv4::Execute()
}
else
AddCycles_C();
}
}*/
/*if (!ARMJIT::IsMapped(1, R[15] - ((CPSR&0x20)?2:4)))
printf("aaarg ungempappter raum %x\n", R[15]);*/
ARMJIT::CompiledBlock block = ARMJIT::LookUpBlock(1, R[15] - ((CPSR&0x20)?2:4));
if (block == NULL)
block = ARMJIT::CompileBlock(this);
Cycles += block();
// TODO optimize this shit!!!
if (Halted)

View File

@ -1,5 +1,7 @@
#include "ARMJIT.h"
#include <string.h>
#include "ARMJIT_x64/ARMJIT_Compiler.h"
namespace ARMJIT
@ -8,7 +10,6 @@ namespace ARMJIT
Compiler* compiler;
BlockCache cache;
#define DUP2(x) x, x
static ptrdiff_t JIT_MEM[2][32] = {
@ -174,4 +175,17 @@ CompiledBlock CompileBlock(ARM* cpu)
return block;
}
void ResetBlocks()
{
memset(cache.MainRAM, 0, sizeof(cache.MainRAM));
memset(cache.SWRAM, 0, sizeof(cache.SWRAM));
memset(cache.ARM9_BIOS, 0, sizeof(cache.ARM9_BIOS));
memset(cache.ARM9_ITCM, 0, sizeof(cache.ARM9_ITCM));
memset(cache.ARM9_LCDC, 0, sizeof(cache.ARM9_LCDC));
memset(cache.ARM7_BIOS, 0, sizeof(cache.ARM7_BIOS));
memset(cache.ARM7_WIRAM, 0, sizeof(cache.ARM7_WIRAM));
memset(cache.ARM7_WRAM, 0, sizeof(cache.ARM7_WRAM));
memset(cache.ARM7_WVRAM, 0, sizeof(cache.ARM7_WVRAM));
}
}

View File

@ -3,8 +3,6 @@
#include "types.h"
#include <string.h>
#include "ARM.h"
#include "ARM_InstrInfo.h"
@ -13,14 +11,6 @@ namespace ARMJIT
typedef u32 (*CompiledBlock)();
class RegCache
{
static const int NativeRegAllocOrder[];
static const int NativeRegsCount;
};
struct FetchedInstr
{
u32 A_Reg(int pos) const
@ -117,24 +107,13 @@ inline void InsertBlock(u32 num, u32 addr, CompiledBlock func)
cache.AddrMapping[num][(addr & 0xFFFFFFF) >> 14][(addr & 0x3FFF) >> 1] = func;
}
inline void ResetBlocks()
{
memset(cache.MainRAM, 0, sizeof(cache.MainRAM));
memset(cache.SWRAM, 0, sizeof(cache.SWRAM));
memset(cache.ARM9_BIOS, 0, sizeof(cache.ARM9_BIOS));
memset(cache.ARM9_ITCM, 0, sizeof(cache.ARM9_ITCM));
memset(cache.ARM9_LCDC, 0, sizeof(cache.ARM9_LCDC));
memset(cache.ARM7_BIOS, 0, sizeof(cache.ARM7_BIOS));
memset(cache.ARM7_WIRAM, 0, sizeof(cache.ARM7_WIRAM));
memset(cache.ARM7_WRAM, 0, sizeof(cache.ARM7_WRAM));
memset(cache.ARM7_WVRAM, 0, sizeof(cache.ARM7_WVRAM));
}
void Init();
void DeInit();
CompiledBlock CompileBlock(ARM* cpu);
void ResetBlocks();
}
#endif

136
src/ARMJIT_RegCache.h Normal file
View File

@ -0,0 +1,136 @@
#ifndef ARMJIT_REGCACHE_H
#define ARMJIT_REGCACHE_H
#include "ARMJIT.h"
// TODO: replace this in the future
#include "dolphin/BitSet.h"
#include <assert.h>
namespace ARMJIT
{
template <typename T, typename Reg>
class RegCache
{
public:
RegCache()
{}
RegCache(T* compiler, FetchedInstr instrs[], int instrsCount)
: Compiler(compiler), Instrs(instrs), InstrsCount(instrsCount)
{
for (int i = 0; i < 16; i++)
Mapping[i] = (Reg)-1;
}
void UnloadRegister(int reg)
{
assert(Mapping[reg] != -1);
if (DirtyRegs & (1 << reg))
Compiler->UnloadReg(reg, Mapping[reg]);
DirtyRegs &= ~(1 << reg);
LoadedRegs &= ~(1 << reg);
NativeRegsUsed &= ~(1 << (int)Mapping[reg]);
Mapping[reg] = (Reg)-1;
}
void LoadRegister(int reg)
{
assert(Mapping[reg] == -1);
for (int i = 0; i < NativeRegsAvailable; i++)
{
Reg nativeReg = NativeRegAllocOrder[i];
if (!(NativeRegsUsed & (1 << nativeReg)))
{
Mapping[reg] = nativeReg;
NativeRegsUsed |= 1 << (int)nativeReg;
LoadedRegs |= 1 << reg;
Compiler->LoadReg(reg, nativeReg);
return;
}
}
assert("Welp!");
}
void Flush()
{
BitSet16 loadedSet(LoadedRegs);
for (int reg : loadedSet)
UnloadRegister(reg);
}
void Prepare(int i)
{
u16 futureNeeded = 0;
int ranking[16];
for (int j = 0; j < 16; j++)
ranking[j] = 0;
for (int j = i; j < InstrsCount; j++)
{
BitSet16 regsNeeded((Instrs[j].Info.SrcRegs & ~(1 << 15)) | Instrs[j].Info.DstRegs);
futureNeeded |= regsNeeded.m_val;
for (int reg : regsNeeded)
ranking[reg]++;
}
// we'll unload all registers which are never used again
BitSet16 neverNeededAgain(LoadedRegs & ~futureNeeded);
for (int reg : neverNeededAgain)
UnloadRegister(reg);
FetchedInstr Instr = Instrs[i];
u16 necessaryRegs = (Instr.Info.SrcRegs & ~(1 << 15)) | Instr.Info.DstRegs;
BitSet16 needToBeLoaded(necessaryRegs & ~LoadedRegs);
if (needToBeLoaded != BitSet16(0))
{
int neededCount = needToBeLoaded.Count();
BitSet16 loadedSet(LoadedRegs);
while (loadedSet.Count() + neededCount > NativeRegsAvailable)
{
int leastReg = -1;
int rank = 1000;
for (int reg : loadedSet)
{
if (!((1 << reg) & necessaryRegs) && ranking[reg] < rank)
{
leastReg = reg;
rank = ranking[reg];
}
}
assert(leastReg != -1);
UnloadRegister(leastReg);
loadedSet.m_val = LoadedRegs;
}
for (int reg : needToBeLoaded)
LoadRegister(reg);
}
DirtyRegs |= Instr.Info.DstRegs;
}
static const Reg NativeRegAllocOrder[];
static const int NativeRegsAvailable;
Reg Mapping[16];
u32 NativeRegsUsed = 0;
u16 LoadedRegs = 0;
u16 DirtyRegs = 0;
T* Compiler;
FetchedInstr* Instrs;
int InstrsCount;
};
}
#endif

View File

@ -0,0 +1,546 @@
#include "ARMJIT_Compiler.h"
using namespace Gen;
namespace ARMJIT
{
// uses RSCRATCH3
void Compiler::Comp_ArithTriOp(void (Compiler::*op)(int, const OpArg&, const OpArg&),
OpArg rd, OpArg rn, OpArg op2, bool carryUsed, int opFlags)
{
if (opFlags & opSyncCarry)
{
BT(32, R(RCPSR), Imm8(29));
if (opFlags & opInvertCarry)
CMC();
}
if (rd == rn && !(opFlags & opInvertOp2))
(this->*op)(32, rd, op2);
else if (opFlags & opSymmetric && op2 == R(RSCRATCH))
{
if (opFlags & opInvertOp2)
NOT(32, op2);
(this->*op)(32, op2, rn);
MOV(32, rd, op2);
}
else
{
if (opFlags & opInvertOp2)
{
if (op2 != R(RSCRATCH))
{
MOV(32, R(RSCRATCH), op2);
op2 = R(RSCRATCH);
}
NOT(32, op2);
}
MOV(32, R(RSCRATCH3), rn);
(this->*op)(32, R(RSCRATCH3), op2);
MOV(32, rd, R(RSCRATCH3));
}
if (opFlags & opSetsFlags)
Comp_RetriveFlags(opFlags & opInvertCarry, opFlags & opRetriveCV, carryUsed);
}
void Compiler::Comp_ArithTriOpReverse(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&),
Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags)
{
if (opFlags & opSyncCarry)
{
BT(32, R(RCPSR), Imm8(29));
if (opFlags & opInvertCarry)
CMC();
}
if (op2 != R(RSCRATCH))
{
MOV(32, R(RSCRATCH), op2);
op2 = R(RSCRATCH);
}
(this->*op)(32, op2, rn);
MOV(32, rd, op2);
if (opFlags & opSetsFlags)
Comp_RetriveFlags(opFlags & opInvertCarry, opFlags & opRetriveCV, carryUsed);
}
void Compiler::Comp_CmpOp(int op, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed)
{
switch (op)
{
case 0: // TST
if (rn.IsImm())
{
MOV(32, R(RSCRATCH3), rn);
rn = R(RSCRATCH3);
}
TEST(32, rn, op2);
break;
case 1: // TEQ
MOV(32, R(RSCRATCH3), rn);
XOR(32, R(RSCRATCH3), op2);
break;
case 2: // CMP
if (rn.IsImm())
{
MOV(32, R(RSCRATCH3), rn);
rn = R(RSCRATCH3);
}
CMP(32, rn, op2);
break;
case 3: // CMN
MOV(32, R(RSCRATCH3), rn);
ADD(32, R(RSCRATCH3), op2);
break;
}
Comp_RetriveFlags(op == 2, op >= 2, carryUsed);
}
// also calculates cycles
OpArg Compiler::A_Comp_GetALUOp2(bool S, bool& carryUsed)
{
if (CurrentInstr.Instr & (1 << 25))
{
Comp_AddCycles_C();
carryUsed = false;
return Imm32(ROR(CurrentInstr.Instr & 0xFF, (CurrentInstr.Instr >> 7) & 0x1E));
}
else
{
int op = (CurrentInstr.Instr >> 5) & 0x3;
if (CurrentInstr.Instr & (1 << 4))
{
Comp_AddCycles_CI(1);
OpArg rm = MapReg(CurrentInstr.A_Reg(0));
if (rm.IsImm() && CurrentInstr.A_Reg(0) == 15)
rm = Imm32(rm.Imm32() + 4);
return Comp_RegShiftReg(op, MapReg(CurrentInstr.A_Reg(8)), rm, S, carryUsed);
}
else
{
Comp_AddCycles_C();
return Comp_RegShiftImm(op, (CurrentInstr.Instr >> 7) & 0x1F,
MapReg(CurrentInstr.A_Reg(0)), S, carryUsed);
}
}
}
void Compiler::A_Comp_CmpOp()
{
u32 op = (CurrentInstr.Instr >> 21) & 0xF;
bool carryUsed;
OpArg rn = MapReg(CurrentInstr.A_Reg(16));
OpArg op2 = A_Comp_GetALUOp2((1 << op) & 0xF303, carryUsed);
Comp_CmpOp(op - 0x8, rn, op2, carryUsed);
}
void Compiler::A_Comp_Arith()
{
bool S = CurrentInstr.Instr & (1 << 20);
u32 op = (CurrentInstr.Instr >> 21) & 0xF;
bool carryUsed;
OpArg rn = MapReg(CurrentInstr.A_Reg(16));
OpArg rd = MapReg(CurrentInstr.A_Reg(12));
OpArg op2 = A_Comp_GetALUOp2(S && (1 << op) & 0xF303, carryUsed);
u32 sFlag = S ? opSetsFlags : 0;
switch (op)
{
case 0x0: // AND
Comp_ArithTriOp(AND, rd, rn, op2, carryUsed, opSymmetric|sFlag);
return;
case 0x1: // EOR
Comp_ArithTriOp(XOR, rd, rn, op2, carryUsed, opSymmetric|sFlag);
return;
case 0x2: // SUB
Comp_ArithTriOp(SUB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry);
return;
case 0x3: // RSB
if (op2.IsZero())
{
if (rd != rn)
MOV(32, rd, rn);
NEG(32, rd);
if (S)
Comp_RetriveFlags(true, true, false);
}
else
Comp_ArithTriOpReverse(SUB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry);
return;
case 0x4: // ADD
Comp_ArithTriOp(ADD, rd, rn, op2, carryUsed, opSymmetric|sFlag|opRetriveCV);
return;
case 0x5: // ADC
Comp_ArithTriOp(ADC, rd, rn, op2, carryUsed, opSymmetric|sFlag|opRetriveCV|opSyncCarry);
return;
case 0x6: // SBC
Comp_ArithTriOp(SBB, rd, rn, op2, carryUsed, opSymmetric|sFlag|opRetriveCV|opSyncCarry|opInvertCarry);
return;
case 0x7: // RSC
Comp_ArithTriOpReverse(SBB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry|opSyncCarry);
return;
case 0xC: // ORR
Comp_ArithTriOp(OR, rd, rn, op2, carryUsed, opSymmetric|sFlag);
return;
case 0xE: // BIC
Comp_ArithTriOp(AND, rd, rn, op2, carryUsed, sFlag|opSymmetric|opInvertOp2);
return;
default:
assert("unimplemented");
}
}
void Compiler::A_Comp_MovOp()
{
bool carryUsed;
bool S = CurrentInstr.Instr & (1 << 20);
OpArg op2 = A_Comp_GetALUOp2(S, carryUsed);
OpArg rd = MapReg(CurrentInstr.A_Reg(12));
if (rd != op2)
MOV(32, rd, op2);
if (((CurrentInstr.Instr >> 21) & 0xF) == 0xF)
NOT(32, rd);
if (S)
{
TEST(32, rd, rd);
Comp_RetriveFlags(false, false, carryUsed);
}
}
void Compiler::Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed)
{
CPSRDirty = true;
bool carryOnly = !retriveCV && carryUsed;
if (retriveCV)
{
SETcc(CC_O, R(RSCRATCH));
SETcc(sign ? CC_NC : CC_C, R(RSCRATCH3));
LEA(32, RSCRATCH2, MComplex(RSCRATCH, RSCRATCH3, SCALE_2, 0));
}
if (carryUsed == 983298)
printf("etwas ist faul im lande daenemark %x\n", CurrentInstr.Instr);
SETcc(CC_S, R(RSCRATCH));
SETcc(CC_Z, R(RSCRATCH3));
LEA(32, RSCRATCH, MComplex(RSCRATCH3, RSCRATCH, SCALE_2, 0));
int shiftAmount = 30;
if (retriveCV || carryUsed)
{
LEA(32, RSCRATCH, MComplex(RSCRATCH2, RSCRATCH, carryOnly ? SCALE_2 : SCALE_4, 0));
shiftAmount = carryOnly ? 29 : 28;
}
SHL(32, R(RSCRATCH), Imm8(shiftAmount));
AND(32, R(RCPSR), Imm32(0x3FFFFFFF & ~(carryUsed << 29) & ~((retriveCV ? 3 : 0) << 28)));
OR(32, R(RCPSR), R(RSCRATCH));
}
// always uses RSCRATCH, RSCRATCH2 only if S == true
OpArg Compiler::Comp_RegShiftReg(int op, Gen::OpArg rs, Gen::OpArg rm, bool S, bool& carryUsed)
{
carryUsed = S;
if (S)
{
XOR(32, R(RSCRATCH2), R(RSCRATCH2));
BT(32, R(RCPSR), Imm8(29));
SETcc(CC_C, R(RSCRATCH2));
}
MOV(32, R(RSCRATCH), rm);
static_assert(RSCRATCH3 == ECX);
MOV(32, R(ECX), rs);
AND(32, R(ECX), Imm32(0xFF));
FixupBranch zero = J_CC(CC_Z);
if (op < 3)
{
void (Compiler::*shiftOp)(int, const OpArg&, const OpArg&) = NULL;
if (op == 0)
shiftOp = SHL;
else if (op == 1)
shiftOp = SHR;
else if (op == 2)
shiftOp = SAR;
CMP(32, R(ECX), Imm8(32));
FixupBranch lt32 = J_CC(CC_L);
FixupBranch done1;
if (op < 2)
{
FixupBranch eq32 = J_CC(CC_E);
XOR(32, R(RSCRATCH), R(RSCRATCH));
if (S)
XOR(32, R(RSCRATCH2), R(RSCRATCH2));
done1 = J();
SetJumpTarget(eq32);
}
(this->*shiftOp)(32, R(RSCRATCH), Imm8(31));
(this->*shiftOp)(32, R(RSCRATCH), Imm8(1));
if (S)
SETcc(CC_C, R(RSCRATCH2));
FixupBranch done2 = J();
SetJumpTarget(lt32);
(this->*shiftOp)(32, R(RSCRATCH), R(ECX));
if (S)
SETcc(CC_C, R(RSCRATCH2));
if (op < 2)
SetJumpTarget(done1);
SetJumpTarget(done2);
}
else if (op == 3)
{
if (S)
BT(32, R(RSCRATCH), Imm8(31));
ROR_(32, R(RSCRATCH), R(ECX));
if (S)
SETcc(CC_C, R(RSCRATCH2));
}
SetJumpTarget(zero);
return R(RSCRATCH);
}
// may uses RSCRATCH for op2 and RSCRATCH2 for the carryValue
OpArg Compiler::Comp_RegShiftImm(int op, int amount, OpArg rm, bool S, bool& carryUsed)
{
carryUsed = true;
switch (op)
{
case 0: // LSL
if (amount > 0)
{
MOV(32, R(RSCRATCH), rm);
SHL(32, R(RSCRATCH), Imm8(amount));
if (S)
SETcc(CC_C, R(RSCRATCH2));
return R(RSCRATCH);
}
else
{
carryUsed = false;
return rm;
}
case 1: // LSR
if (amount > 0)
{
MOV(32, R(RSCRATCH), rm);
SHR(32, R(RSCRATCH), Imm8(amount));
if (S)
SETcc(CC_C, R(RSCRATCH2));
return R(RSCRATCH);
}
else
{
if (S)
{
MOV(32, R(RSCRATCH2), rm);
SHR(32, R(RSCRATCH2), Imm8(31));
}
return Imm32(0);
}
case 2: // ASR
MOV(32, R(RSCRATCH), rm);
SAR(32, R(RSCRATCH), Imm8(amount ? amount : 31));
if (S)
{
if (amount == 0)
BT(32, rm, Imm8(31));
SETcc(CC_C, R(RSCRATCH2));
}
return R(RSCRATCH);
case 3: // ROR
MOV(32, R(RSCRATCH), rm);
if (amount > 0)
ROR_(32, R(RSCRATCH), Imm8(amount));
else
{
BT(32, R(RCPSR), Imm8(29));
RCR(32, R(RSCRATCH), Imm8(1));
}
if (S)
SETcc(CC_C, R(RSCRATCH2));
return R(RSCRATCH);
}
assert(false);
}
void Compiler::T_Comp_ShiftImm()
{
OpArg rd = MapReg(CurrentInstr.T_Reg(0));
OpArg rs = MapReg(CurrentInstr.T_Reg(3));
int op = (CurrentInstr.Instr >> 11) & 0x3;
int amount = (CurrentInstr.Instr >> 6) & 0x1F;
Comp_AddCycles_C();
bool carryUsed;
OpArg shifted = Comp_RegShiftImm(op, amount, rs, true, carryUsed);
if (shifted != rd)
MOV(32, rd, shifted);
TEST(32, rd, rd);
Comp_RetriveFlags(false, false, carryUsed);
}
void Compiler::T_Comp_AddSub_()
{
OpArg rd = MapReg(CurrentInstr.T_Reg(0));
OpArg rs = MapReg(CurrentInstr.T_Reg(3));
int op = (CurrentInstr.Instr >> 9) & 0x3;
OpArg rn = op >= 2 ? Imm32((CurrentInstr.Instr >> 6) & 0x7) : MapReg(CurrentInstr.T_Reg(6));
Comp_AddCycles_C();
if (op & 1)
Comp_ArithTriOp(SUB, rd, rs, rn, false, opSetsFlags|opInvertCarry|opRetriveCV);
else
Comp_ArithTriOp(ADD, rd, rs, rn, false, opSetsFlags|opSymmetric|opRetriveCV);
}
void Compiler::T_Comp_ALU_Imm8()
{
OpArg rd = MapReg(CurrentInstr.T_Reg(8));
u32 op = (CurrentInstr.Instr >> 11) & 0x3;
OpArg imm = Imm32(CurrentInstr.Instr & 0xFF);
Comp_AddCycles_C();
switch (op)
{
case 0x0:
MOV(32, rd, imm);
TEST(32, rd, rd);
Comp_RetriveFlags(false, false, false);
return;
case 0x1:
Comp_CmpOp(2, rd, imm, false);
return;
case 0x2:
Comp_ArithTriOp(ADD, rd, rd, imm, false, opSetsFlags|opSymmetric|opRetriveCV);
return;
case 0x3:
Comp_ArithTriOp(SUB, rd, rd, imm, false, opSetsFlags|opInvertCarry|opRetriveCV);
return;
}
}
void Compiler::T_Comp_ALU()
{
OpArg rd = MapReg(CurrentInstr.T_Reg(0));
OpArg rs = MapReg(CurrentInstr.T_Reg(3));
u32 op = (CurrentInstr.Instr >> 6) & 0xF;
Comp_AddCycles_C();
switch (op)
{
case 0x0: // AND
Comp_ArithTriOp(AND, rd, rd, rs, false, opSetsFlags|opSymmetric);
return;
case 0x1: // EOR
Comp_ArithTriOp(XOR, rd, rd, rs, false, opSetsFlags|opSymmetric);
return;
case 0x2:
case 0x3:
case 0x4:
case 0x7:
{
int shiftOp = op == 7 ? 3 : op - 0x2;
bool carryUsed;
OpArg shifted = Comp_RegShiftReg(shiftOp, rs, rd, true, carryUsed);
TEST(32, shifted, shifted);
MOV(32, rd, shifted);
Comp_RetriveFlags(false, false, true);
}
return;
case 0x5: // ADC
Comp_ArithTriOp(ADC, rd, rd, rs, false, opSetsFlags|opSymmetric|opSyncCarry|opRetriveCV);
return;
case 0x6: // SBC
Comp_ArithTriOp(SBB, rd, rd, rs, false, opSetsFlags|opSyncCarry|opInvertCarry|opRetriveCV);
return;
case 0x8: // TST
Comp_CmpOp(0, rd, rs, false);
return;
case 0x9: // NEG
if (rd != rs)
MOV(32, rd, rs);
NEG(32, rd);
Comp_RetriveFlags(true, true, false);
return;
case 0xA: // CMP
Comp_CmpOp(2, rd, rs, false);
return;
case 0xB: // CMN
Comp_CmpOp(3, rd, rs, false);
return;
case 0xC: // ORR
Comp_ArithTriOp(OR, rd, rd, rs, false, opSetsFlags|opSymmetric);
return;
case 0xE: // BIC
Comp_ArithTriOp(AND, rd, rd, rs, false, opSetsFlags|opSymmetric|opInvertOp2);
return;
case 0xF: // MVN
if (rd != rs)
MOV(32, rd, rs);
NOT(32, rd);
Comp_RetriveFlags(false, false, false);
return;
default:
break;
}
}
void Compiler::T_Comp_ALU_HiReg()
{
OpArg rd = MapReg(((CurrentInstr.Instr & 0x7) | ((CurrentInstr.Instr >> 4) & 0x8)));
OpArg rs = MapReg((CurrentInstr.Instr >> 3) & 0xF);
u32 op = (CurrentInstr.Instr >> 8) & 0x3;
Comp_AddCycles_C();
switch (op)
{
case 0x0: // ADD
Comp_ArithTriOp(ADD, rd, rd, rs, false, opSymmetric|opRetriveCV);
return;
case 0x1: // CMP
Comp_CmpOp(2, rd, rs, false);
return;
case 0x2: // MOV
if (rd != rs)
MOV(32, rd, rs);
TEST(32, rd, rd);
Comp_RetriveFlags(false, false, false);
return;
}
}
}

View File

@ -8,18 +8,16 @@ using namespace Gen;
namespace ARMJIT
{
const int RegCache::NativeRegAllocOrder[] = {(int)RBX, (int)RSI, (int)RDI, (int)R12, (int)R13};
const int RegCache::NativeRegsCount = 5;
template <>
const X64Reg RegCache<Compiler, X64Reg>::NativeRegAllocOrder[] = {RBX, RSI, RDI, R12, R13};
template <>
const int RegCache<Compiler, X64Reg>::NativeRegsAvailable = 5;
Compiler::Compiler()
{
AllocCodeSpace(1024 * 1024 * 4);
}
typedef void (Compiler::*CompileFunc)();
typedef void (*InterpretFunc)(ARM*);
void Compiler::LoadCPSR()
{
assert(!CPSRDirty);
@ -36,6 +34,19 @@ void Compiler::SaveCPSR()
}
}
void Compiler::LoadReg(int reg, X64Reg nativeReg)
{
if (reg != 15)
MOV(32, R(nativeReg), MDisp(RCPU, offsetof(ARM, R[reg])));
else
MOV(32, R(nativeReg), Imm32(R15));
}
void Compiler::UnloadReg(int reg, X64Reg nativeReg)
{
MOV(32, MDisp(RCPU, offsetof(ARM, R[reg])), R(nativeReg));
}
CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrsCount)
{
if (IsAlmostFull())
@ -58,12 +69,18 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
LoadCPSR();
// TODO: this is ugly as a whole, do better
RegCache = ARMJIT::RegCache<Compiler, X64Reg>(this, instrs, instrsCount);
for (int i = 0; i < instrsCount; i++)
{
R15 += Thumb ? 2 : 4;
CurrentInstr = instrs[i];
CompileFunc comp = NULL;
CompileFunc comp = GetCompFunc(CurrentInstr.Info.Kind);
if (CurrentInstr.Info.Branches())
comp = NULL;
if (comp == NULL || i == instrsCount - 1)
{
@ -79,6 +96,11 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
SaveCPSR();
}
if (comp != NULL)
RegCache.Prepare(i);
else
RegCache.Flush();
if (Thumb)
{
if (comp == NULL)
@ -89,8 +111,7 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
ABI_CallFunction(ARMInterpreter::THUMBInstrTable[icode]);
}
else
{
}
(this->*comp)();
}
else
{
@ -101,7 +122,7 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
ABI_CallFunction(ARMInterpreter::A_BLX_IMM);
}
else if (cond == 0xF)
AddCycles_C();
Comp_AddCycles_C();
else
{
FixupBranch skipExecute;
@ -115,17 +136,17 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
MOV(32, R(RSCRATCH), Imm32(1));
SHL(32, R(RSCRATCH), R(RSCRATCH3));
TEST(32, R(RSCRATCH), Imm32(ARM::ConditionTable[cond]));
skipExecute = J_CC(CC_Z);
}
else
{
// could have used a LUT, but then where would be the fun?
BT(32, R(RCPSR), Imm8(28 + ((~(cond >> 1) & 1) << 1 | (cond >> 2 & 1) ^ (cond >> 1 & 1))));
skipExecute = J_CC(cond & 1 ? CC_C : CC_NC);
}
}
if (comp == NULL)
@ -136,8 +157,7 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
ABI_CallFunction(ARMInterpreter::ARMInstrTable[icode]);
}
else
{
}
(this->*comp)();
FixupBranch skipFailed;
if (CurrentInstr.Cond() < 0xE)
@ -145,7 +165,7 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
skipFailed = J();
SetJumpTarget(skipExecute);
AddCycles_C();
Comp_AddCycles_C();
SetJumpTarget(skipFailed);
}
@ -155,13 +175,14 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
/*
we don't need to collect the interpreted cycles,
since all functions only add to it, the dispatcher
can take care of it.
takes care of it.
*/
if (comp == NULL && i != instrsCount - 1)
LoadCPSR();
}
RegCache.Flush();
SaveCPSR();
LEA(32, RAX, MDisp(RCycles, ConstantCycles));
@ -172,42 +193,57 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
return res;
}
void Compiler::Compile(RegCache& regs, const FetchedInstr& instr)
CompileFunc Compiler::GetCompFunc(int kind)
{
// this might look like waste of space, so many repeatitions, but it's invaluable for debugging.
// see ARMInstrInfo.h for the order
const CompileFunc A_Comp[ARMInstrInfo::ak_Count] =
{
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
// AND
A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith,
A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith,
// EOR
A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith,
A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith,
// SUB
A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith,
A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith,
// RSB
A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith,
A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith,
// ADD
A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith,
A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith,
// ADC
A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith,
A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith,
// SBC
A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith,
A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith,
// RSC
A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith,
A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith,
// ORR
A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith,
A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith,
// MOV
A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp,
A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp,
// BIC
A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith,
A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith,
// MVN
A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp,
A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp,
// TST
A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp,
// TEQ
A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp,
// CMP
A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp,
// CMN
A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@ -227,21 +263,34 @@ void Compiler::Compile(RegCache& regs, const FetchedInstr& instr)
};
const CompileFunc T_Comp[ARMInstrInfo::tk_Count] = {
NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL,
// Shift imm
T_Comp_ShiftImm, T_Comp_ShiftImm, T_Comp_ShiftImm,
// Three operand ADD/SUB
T_Comp_AddSub_, T_Comp_AddSub_, T_Comp_AddSub_, T_Comp_AddSub_,
// 8 bit imm
T_Comp_ALU_Imm8, T_Comp_ALU_Imm8, T_Comp_ALU_Imm8, T_Comp_ALU_Imm8,
// general ALU
T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU,
T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU,
T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU,
T_Comp_ALU, NULL, T_Comp_ALU, T_Comp_ALU,
// hi reg
T_Comp_ALU_HiReg, T_Comp_ALU_HiReg, T_Comp_ALU_HiReg,
// pc/sp relative
NULL, NULL, NULL,
// mem...
NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL
};
return Thumb ? T_Comp[kind] : A_Comp[kind];
}
void Compiler::AddCycles_C()
void Compiler::Comp_AddCycles_C()
{
s32 cycles = Num ?
NDS::ARM7MemTimings[CurrentInstr.CodeCycles][Thumb ? 1 : 3]
@ -253,80 +302,16 @@ void Compiler::AddCycles_C()
ConstantCycles += cycles;
}
// may uses RSCRATCH for op2 and RSCRATCH2 for the carryValue
OpArg Compiler::Comp_ShiftRegImm(int op, int amount, Gen::X64Reg rm, bool S, bool& carryUsed)
{
carryUsed = true;
switch (op)
{
case 0: // LSL
if (amount > 0)
{
MOV(32, R(RSCRATCH), R(rm));
SHL(32, R(RSCRATCH), Imm8(amount));
if (S)
SETcc(CC_C, R(RSCRATCH2));
return R(RSCRATCH);
}
else
{
carryUsed = false;
return R(rm);
}
case 1: // LSR
if (amount > 0)
{
MOV(32, R(RSCRATCH), R(rm));
SHR(32, R(RSCRATCH), Imm8(amount));
if (S)
SETcc(CC_C, R(RSCRATCH2));
return R(RSCRATCH);
}
else
{
if (S)
{
MOV(32, R(RSCRATCH2), R(rm));
SHR(32, R(RSCRATCH2), Imm8(31));
}
return Imm32(0);
}
case 2: // ASR
MOV(32, R(RSCRATCH), R(rm));
SAR(32, R(RSCRATCH), Imm8(amount ? amount : 31));
if (S)
{
if (amount == 0)
{
MOV(32, R(RSCRATCH2), R(rm));
SHR(32, R(RSCRATCH2), Imm8(31));
}
else
SETcc(CC_C, R(RSCRATCH2));
}
return R(RSCRATCH);
case 3: // ROR
if (amount > 0)
{
MOV(32, R(RSCRATCH), R(rm));
ROR_(32, R(RSCRATCH), Imm8(amount));
}
else
{
BT(32, R(RCPSR), Imm8(29));
MOV(32, R(RSCRATCH), R(rm));
RCR(32, R(RSCRATCH), Imm8(1));
}
if (S)
SETcc(CC_C, R(RSCRATCH2));
return R(RSCRATCH);
}
}
void Compiler::A_Comp_ALU(const FetchedInstr& instr)
void Compiler::Comp_AddCycles_CI(u32 i)
{
s32 cycles = (Num ?
NDS::ARM7MemTimings[CurrentInstr.CodeCycles][Thumb ? 0 : 2]
: ((R15 & 0x2) ? 0 : CurrentInstr.CodeCycles)) + i;
if (CurrentInstr.Cond() < 0xE)
ADD(32, R(RCycles), Imm8(cycles));
else
ConstantCycles += cycles;
}
}

View File

@ -4,7 +4,7 @@
#include "../dolphin/x64Emitter.h"
#include "../ARMJIT.h"
#include "../ARMJIT_RegCache.h"
namespace ARMJIT
{
@ -17,6 +17,10 @@ const Gen::X64Reg RSCRATCH = Gen::EAX;
const Gen::X64Reg RSCRATCH2 = Gen::EDX;
const Gen::X64Reg RSCRATCH3 = Gen::ECX;
class Compiler;
typedef void (Compiler::*CompileFunc)();
class Compiler : public Gen::X64CodeBlock
{
public:
@ -24,24 +28,66 @@ public:
CompiledBlock CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrsCount);
void StartBlock(ARM* cpu);
CompiledBlock FinaliseBlock();
void LoadReg(int reg, Gen::X64Reg nativeReg);
void UnloadReg(int reg, Gen::X64Reg nativeReg);
void Compile(RegCache& regs, const FetchedInstr& instr);
private:
void AddCycles_C();
CompileFunc GetCompFunc(int kind);
Gen::OpArg Comp_ShiftRegImm(int op, int amount, Gen::X64Reg rm, bool S, bool& carryUsed);
void Comp_AddCycles_C();
void Comp_AddCycles_CI(u32 i);
void A_Comp_ALU(const FetchedInstr& instr);
enum
{
opSetsFlags = 1 << 0,
opSymmetric = 1 << 1,
opRetriveCV = 1 << 2,
opInvertCarry = 1 << 3,
opSyncCarry = 1 << 4,
opInvertOp2 = 1 << 5,
};
void A_Comp_Arith();
void A_Comp_MovOp();
void A_Comp_CmpOp();
void T_Comp_ShiftImm();
void T_Comp_AddSub_();
void T_Comp_ALU_Imm8();
void T_Comp_ALU();
void T_Comp_ALU_HiReg();
void Comp_ArithTriOp(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&),
Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags);
void Comp_ArithTriOpReverse(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&),
Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags);
void Comp_CmpOp(int op, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed);
void Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed);
Gen::OpArg Comp_RegShiftImm(int op, int amount, Gen::OpArg rm, bool S, bool& carryUsed);
Gen::OpArg Comp_RegShiftReg(int op, Gen::OpArg rs, Gen::OpArg rm, bool S, bool& carryUsed);
Gen::OpArg A_Comp_GetALUOp2(bool S, bool& carryUsed);
void LoadCPSR();
void SaveCPSR();
Gen::OpArg MapReg(int reg)
{
if (reg == 15 && RegCache.Mapping[reg] == Gen::INVALID_REG)
return Gen::Imm32(R15);
assert(RegCache.Mapping[reg] != Gen::INVALID_REG);
return Gen::R(RegCache.Mapping[reg]);
}
bool CPSRDirty = false;
FetchedInstr CurrentInstr;
RegCache<Compiler, Gen::X64Reg> RegCache;
bool Thumb;
u32 Num;
u32 R15;

View File

@ -52,6 +52,7 @@ add_library(core STATIC
ARMJIT.cpp
ARMJIT_x64/ARMJIT_Compiler.cpp
ARMJIT_x64/ARMJIT_ALU.cpp
dolphin/CommonFuncs.cpp
dolphin/x64ABI.cpp