JIT: most mem instructions working

+ branching
This commit is contained in:
RSDuck 2019-07-06 01:48:42 +02:00
parent 5f932cdf48
commit 2c44bf927c
10 changed files with 669 additions and 702 deletions

View File

@ -522,8 +522,9 @@ void ARMv5::Execute()
ARMJIT::CompiledBlock block = ARMJIT::LookUpBlock(0, R[15] - ((CPSR&0x20)?2:4)); ARMJIT::CompiledBlock block = ARMJIT::LookUpBlock(0, R[15] - ((CPSR&0x20)?2:4));
if (block == NULL) if (block == NULL)
block = ARMJIT::CompileBlock(this); ARMJIT::CompileBlock(this);
Cycles += block(); else
Cycles += block();
// TODO optimize this shit!!! // TODO optimize this shit!!!
if (Halted) if (Halted)
@ -607,8 +608,9 @@ void ARMv4::Execute()
ARMJIT::CompiledBlock block = ARMJIT::LookUpBlock(1, R[15] - ((CPSR&0x20)?2:4)); ARMJIT::CompiledBlock block = ARMJIT::LookUpBlock(1, R[15] - ((CPSR&0x20)?2:4));
if (block == NULL) if (block == NULL)
block = ARMJIT::CompileBlock(this); ARMJIT::CompileBlock(this);
Cycles += block(); else
Cycles += block();
// TODO optimize this shit!!! // TODO optimize this shit!!!
if (Halted) if (Halted)

View File

@ -121,12 +121,13 @@ void DeInit()
delete compiler; delete compiler;
} }
CompiledBlock CompileBlock(ARM* cpu) void CompileBlock(ARM* cpu)
{ {
bool thumb = cpu->CPSR & 0x20; bool thumb = cpu->CPSR & 0x20;
FetchedInstr instrs[12]; FetchedInstr instrs[12];
int i = 0; int i = 0;
u32 r15Initial = cpu->R[15];
u32 r15 = cpu->R[15]; u32 r15 = cpu->R[15];
u32 nextInstr[2] = {cpu->NextInstr[0], cpu->NextInstr[1]}; u32 nextInstr[2] = {cpu->NextInstr[0], cpu->NextInstr[1]};
//printf("block %x %d\n", r15, thumb); //printf("block %x %d\n", r15, thumb);
@ -169,9 +170,7 @@ CompiledBlock CompileBlock(ARM* cpu)
CompiledBlock block = compiler->CompileBlock(cpu, instrs, i); CompiledBlock block = compiler->CompileBlock(cpu, instrs, i);
InsertBlock(cpu->Num, cpu->R[15] - (thumb ? 2 : 4), block); InsertBlock(cpu->Num, r15Initial - (thumb ? 2 : 4), block);
return block;
} }
void ResetBlocks() void ResetBlocks()

View File

@ -109,7 +109,7 @@ inline void InsertBlock(u32 num, u32 addr, CompiledBlock func)
void Init(); void Init();
void DeInit(); void DeInit();
CompiledBlock CompileBlock(ARM* cpu); void CompileBlock(ARM* cpu);
void ResetBlocks(); void ResetBlocks();

View File

@ -114,7 +114,7 @@ public:
for (int reg : needToBeLoaded) for (int reg : needToBeLoaded)
LoadRegister(reg); LoadRegister(reg);
} }
DirtyRegs |= Instr.Info.DstRegs; DirtyRegs |= Instr.Info.DstRegs & ~(1 << 15);
} }
static const Reg NativeRegAllocOrder[]; static const Reg NativeRegAllocOrder[];

View File

@ -71,30 +71,30 @@ void Compiler::Comp_CmpOp(int op, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed)
{ {
switch (op) switch (op)
{ {
case 0: // TST case 0: // TST
if (rn.IsImm()) if (rn.IsImm())
{ {
MOV(32, R(RSCRATCH3), rn);
rn = R(RSCRATCH3);
}
TEST(32, rn, op2);
break;
case 1: // TEQ
MOV(32, R(RSCRATCH3), rn); MOV(32, R(RSCRATCH3), rn);
XOR(32, R(RSCRATCH3), op2); rn = R(RSCRATCH3);
break; }
case 2: // CMP TEST(32, rn, op2);
if (rn.IsImm()) break;
{ case 1: // TEQ
MOV(32, R(RSCRATCH3), rn); MOV(32, R(RSCRATCH3), rn);
rn = R(RSCRATCH3); XOR(32, R(RSCRATCH3), op2);
} break;
CMP(32, rn, op2); case 2: // CMP
break; if (rn.IsImm())
case 3: // CMN {
MOV(32, R(RSCRATCH3), rn); MOV(32, R(RSCRATCH3), rn);
ADD(32, R(RSCRATCH3), op2); rn = R(RSCRATCH3);
break; }
CMP(32, rn, op2);
break;
case 3: // CMN
MOV(32, R(RSCRATCH3), rn);
ADD(32, R(RSCRATCH3), op2);
break;
} }
Comp_RetriveFlags(op == 2, op >= 2, carryUsed); Comp_RetriveFlags(op == 2, op >= 2, carryUsed);
@ -103,38 +103,38 @@ void Compiler::Comp_CmpOp(int op, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed)
// also calculates cycles // also calculates cycles
OpArg Compiler::A_Comp_GetALUOp2(bool S, bool& carryUsed) OpArg Compiler::A_Comp_GetALUOp2(bool S, bool& carryUsed)
{ {
if (CurrentInstr.Instr & (1 << 25)) if (CurInstr.Instr & (1 << 25))
{ {
Comp_AddCycles_C(); Comp_AddCycles_C();
carryUsed = false; carryUsed = false;
return Imm32(ROR(CurrentInstr.Instr & 0xFF, (CurrentInstr.Instr >> 7) & 0x1E)); return Imm32(ROR(CurInstr.Instr & 0xFF, (CurInstr.Instr >> 7) & 0x1E));
} }
else else
{ {
int op = (CurrentInstr.Instr >> 5) & 0x3; int op = (CurInstr.Instr >> 5) & 0x3;
if (CurrentInstr.Instr & (1 << 4)) if (CurInstr.Instr & (1 << 4))
{ {
Comp_AddCycles_CI(1); Comp_AddCycles_CI(1);
OpArg rm = MapReg(CurrentInstr.A_Reg(0)); OpArg rm = MapReg(CurInstr.A_Reg(0));
if (rm.IsImm() && CurrentInstr.A_Reg(0) == 15) if (rm.IsImm() && CurInstr.A_Reg(0) == 15)
rm = Imm32(rm.Imm32() + 4); rm = Imm32(rm.Imm32() + 4);
return Comp_RegShiftReg(op, MapReg(CurrentInstr.A_Reg(8)), rm, S, carryUsed); return Comp_RegShiftReg(op, MapReg(CurInstr.A_Reg(8)), rm, S, carryUsed);
} }
else else
{ {
Comp_AddCycles_C(); Comp_AddCycles_C();
return Comp_RegShiftImm(op, (CurrentInstr.Instr >> 7) & 0x1F, return Comp_RegShiftImm(op, (CurInstr.Instr >> 7) & 0x1F,
MapReg(CurrentInstr.A_Reg(0)), S, carryUsed); MapReg(CurInstr.A_Reg(0)), S, carryUsed);
} }
} }
} }
void Compiler::A_Comp_CmpOp() void Compiler::A_Comp_CmpOp()
{ {
u32 op = (CurrentInstr.Instr >> 21) & 0xF; u32 op = (CurInstr.Instr >> 21) & 0xF;
bool carryUsed; bool carryUsed;
OpArg rn = MapReg(CurrentInstr.A_Reg(16)); OpArg rn = MapReg(CurInstr.A_Reg(16));
OpArg op2 = A_Comp_GetALUOp2((1 << op) & 0xF303, carryUsed); OpArg op2 = A_Comp_GetALUOp2((1 << op) & 0xF303, carryUsed);
Comp_CmpOp(op - 0x8, rn, op2, carryUsed); Comp_CmpOp(op - 0x8, rn, op2, carryUsed);
@ -142,12 +142,12 @@ void Compiler::A_Comp_CmpOp()
void Compiler::A_Comp_Arith() void Compiler::A_Comp_Arith()
{ {
bool S = CurrentInstr.Instr & (1 << 20); bool S = CurInstr.Instr & (1 << 20);
u32 op = (CurrentInstr.Instr >> 21) & 0xF; u32 op = (CurInstr.Instr >> 21) & 0xF;
bool carryUsed; bool carryUsed;
OpArg rn = MapReg(CurrentInstr.A_Reg(16)); OpArg rn = MapReg(CurInstr.A_Reg(16));
OpArg rd = MapReg(CurrentInstr.A_Reg(12)); OpArg rd = MapReg(CurInstr.A_Reg(12));
OpArg op2 = A_Comp_GetALUOp2(S && (1 << op) & 0xF303, carryUsed); OpArg op2 = A_Comp_GetALUOp2(S && (1 << op) & 0xF303, carryUsed);
u32 sFlag = S ? opSetsFlags : 0; u32 sFlag = S ? opSetsFlags : 0;
@ -155,13 +155,13 @@ void Compiler::A_Comp_Arith()
{ {
case 0x0: // AND case 0x0: // AND
Comp_ArithTriOp(AND, rd, rn, op2, carryUsed, opSymmetric|sFlag); Comp_ArithTriOp(AND, rd, rn, op2, carryUsed, opSymmetric|sFlag);
return; break;
case 0x1: // EOR case 0x1: // EOR
Comp_ArithTriOp(XOR, rd, rn, op2, carryUsed, opSymmetric|sFlag); Comp_ArithTriOp(XOR, rd, rn, op2, carryUsed, opSymmetric|sFlag);
return; break;
case 0x2: // SUB case 0x2: // SUB
Comp_ArithTriOp(SUB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry); Comp_ArithTriOp(SUB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry);
return; break;
case 0x3: // RSB case 0x3: // RSB
if (op2.IsZero()) if (op2.IsZero())
{ {
@ -173,41 +173,44 @@ void Compiler::A_Comp_Arith()
} }
else else
Comp_ArithTriOpReverse(SUB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry); Comp_ArithTriOpReverse(SUB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry);
return; break;
case 0x4: // ADD case 0x4: // ADD
Comp_ArithTriOp(ADD, rd, rn, op2, carryUsed, opSymmetric|sFlag|opRetriveCV); Comp_ArithTriOp(ADD, rd, rn, op2, carryUsed, opSymmetric|sFlag|opRetriveCV);
return; break;
case 0x5: // ADC case 0x5: // ADC
Comp_ArithTriOp(ADC, rd, rn, op2, carryUsed, opSymmetric|sFlag|opRetriveCV|opSyncCarry); Comp_ArithTriOp(ADC, rd, rn, op2, carryUsed, opSymmetric|sFlag|opRetriveCV|opSyncCarry);
return; break;
case 0x6: // SBC case 0x6: // SBC
Comp_ArithTriOp(SBB, rd, rn, op2, carryUsed, opSymmetric|sFlag|opRetriveCV|opSyncCarry|opInvertCarry); Comp_ArithTriOp(SBB, rd, rn, op2, carryUsed, opSymmetric|sFlag|opRetriveCV|opSyncCarry|opInvertCarry);
return; break;
case 0x7: // RSC case 0x7: // RSC
Comp_ArithTriOpReverse(SBB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry|opSyncCarry); Comp_ArithTriOpReverse(SBB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry|opSyncCarry);
return; break;
case 0xC: // ORR case 0xC: // ORR
Comp_ArithTriOp(OR, rd, rn, op2, carryUsed, opSymmetric|sFlag); Comp_ArithTriOp(OR, rd, rn, op2, carryUsed, opSymmetric|sFlag);
return; break;
case 0xE: // BIC case 0xE: // BIC
Comp_ArithTriOp(AND, rd, rn, op2, carryUsed, sFlag|opSymmetric|opInvertOp2); Comp_ArithTriOp(AND, rd, rn, op2, carryUsed, sFlag|opSymmetric|opInvertOp2);
return; break;
default: default:
assert("unimplemented"); assert("unimplemented");
} }
if (CurInstr.A_Reg(12) == 15)
Comp_JumpTo(rd.GetSimpleReg(), S);
} }
void Compiler::A_Comp_MovOp() void Compiler::A_Comp_MovOp()
{ {
bool carryUsed; bool carryUsed;
bool S = CurrentInstr.Instr & (1 << 20); bool S = CurInstr.Instr & (1 << 20);
OpArg op2 = A_Comp_GetALUOp2(S, carryUsed); OpArg op2 = A_Comp_GetALUOp2(S, carryUsed);
OpArg rd = MapReg(CurrentInstr.A_Reg(12)); OpArg rd = MapReg(CurInstr.A_Reg(12));
if (rd != op2) if (rd != op2)
MOV(32, rd, op2); MOV(32, rd, op2);
if (((CurrentInstr.Instr >> 21) & 0xF) == 0xF) if (((CurInstr.Instr >> 21) & 0xF) == 0xF)
NOT(32, rd); NOT(32, rd);
if (S) if (S)
@ -215,6 +218,9 @@ void Compiler::A_Comp_MovOp()
TEST(32, rd, rd); TEST(32, rd, rd);
Comp_RetriveFlags(false, false, carryUsed); Comp_RetriveFlags(false, false, carryUsed);
} }
if (CurInstr.A_Reg(12) == 15)
Comp_JumpTo(rd.GetSimpleReg(), S);
} }
void Compiler::Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed) void Compiler::Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed)
@ -230,7 +236,7 @@ void Compiler::Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed)
} }
if (carryUsed == 983298) if (carryUsed == 983298)
printf("etwas ist faul im lande daenemark %x\n", CurrentInstr.Instr); printf("etwas ist faul im lande daenemark %x\n", CurInstr.Instr);
SETcc(CC_S, R(RSCRATCH)); SETcc(CC_S, R(RSCRATCH));
SETcc(CC_Z, R(RSCRATCH3)); SETcc(CC_Z, R(RSCRATCH3));
@ -324,61 +330,61 @@ OpArg Compiler::Comp_RegShiftImm(int op, int amount, OpArg rm, bool S, bool& car
switch (op) switch (op)
{ {
case 0: // LSL case 0: // LSL
if (amount > 0) if (amount > 0)
{ {
MOV(32, R(RSCRATCH), rm); MOV(32, R(RSCRATCH), rm);
SHL(32, R(RSCRATCH), Imm8(amount)); SHL(32, R(RSCRATCH), Imm8(amount));
if (S) if (S)
SETcc(CC_C, R(RSCRATCH2)); SETcc(CC_C, R(RSCRATCH2));
return R(RSCRATCH);
}
else
{
carryUsed = false;
return rm;
}
case 1: // LSR
if (amount > 0)
{
MOV(32, R(RSCRATCH), rm);
SHR(32, R(RSCRATCH), Imm8(amount));
if (S)
SETcc(CC_C, R(RSCRATCH2));
return R(RSCRATCH);
}
else
{
if (S)
{
MOV(32, R(RSCRATCH2), rm);
SHR(32, R(RSCRATCH2), Imm8(31));
}
return Imm32(0);
}
case 2: // ASR
MOV(32, R(RSCRATCH), rm);
SAR(32, R(RSCRATCH), Imm8(amount ? amount : 31));
if (S)
{
if (amount == 0)
BT(32, rm, Imm8(31));
SETcc(CC_C, R(RSCRATCH2));
}
return R(RSCRATCH); return R(RSCRATCH);
case 3: // ROR }
else
{
carryUsed = false;
return rm;
}
case 1: // LSR
if (amount > 0)
{
MOV(32, R(RSCRATCH), rm); MOV(32, R(RSCRATCH), rm);
if (amount > 0) SHR(32, R(RSCRATCH), Imm8(amount));
ROR_(32, R(RSCRATCH), Imm8(amount));
else
{
BT(32, R(RCPSR), Imm8(29));
RCR(32, R(RSCRATCH), Imm8(1));
}
if (S) if (S)
SETcc(CC_C, R(RSCRATCH2)); SETcc(CC_C, R(RSCRATCH2));
return R(RSCRATCH); return R(RSCRATCH);
}
else
{
if (S)
{
MOV(32, R(RSCRATCH2), rm);
SHR(32, R(RSCRATCH2), Imm8(31));
}
return Imm32(0);
}
case 2: // ASR
MOV(32, R(RSCRATCH), rm);
SAR(32, R(RSCRATCH), Imm8(amount ? amount : 31));
if (S)
{
if (amount == 0)
BT(32, rm, Imm8(31));
SETcc(CC_C, R(RSCRATCH2));
}
return R(RSCRATCH);
case 3: // ROR
MOV(32, R(RSCRATCH), rm);
if (amount > 0)
ROR_(32, R(RSCRATCH), Imm8(amount));
else
{
BT(32, R(RCPSR), Imm8(29));
RCR(32, R(RSCRATCH), Imm8(1));
}
if (S)
SETcc(CC_C, R(RSCRATCH2));
return R(RSCRATCH);
} }
assert(false); assert(false);
@ -386,11 +392,11 @@ OpArg Compiler::Comp_RegShiftImm(int op, int amount, OpArg rm, bool S, bool& car
void Compiler::T_Comp_ShiftImm() void Compiler::T_Comp_ShiftImm()
{ {
OpArg rd = MapReg(CurrentInstr.T_Reg(0)); OpArg rd = MapReg(CurInstr.T_Reg(0));
OpArg rs = MapReg(CurrentInstr.T_Reg(3)); OpArg rs = MapReg(CurInstr.T_Reg(3));
int op = (CurrentInstr.Instr >> 11) & 0x3; int op = (CurInstr.Instr >> 11) & 0x3;
int amount = (CurrentInstr.Instr >> 6) & 0x1F; int amount = (CurInstr.Instr >> 6) & 0x1F;
Comp_AddCycles_C(); Comp_AddCycles_C();
@ -406,12 +412,12 @@ void Compiler::T_Comp_ShiftImm()
void Compiler::T_Comp_AddSub_() void Compiler::T_Comp_AddSub_()
{ {
OpArg rd = MapReg(CurrentInstr.T_Reg(0)); OpArg rd = MapReg(CurInstr.T_Reg(0));
OpArg rs = MapReg(CurrentInstr.T_Reg(3)); OpArg rs = MapReg(CurInstr.T_Reg(3));
int op = (CurrentInstr.Instr >> 9) & 0x3; int op = (CurInstr.Instr >> 9) & 0x3;
OpArg rn = op >= 2 ? Imm32((CurrentInstr.Instr >> 6) & 0x7) : MapReg(CurrentInstr.T_Reg(6)); OpArg rn = op >= 2 ? Imm32((CurInstr.Instr >> 6) & 0x7) : MapReg(CurInstr.T_Reg(6));
Comp_AddCycles_C(); Comp_AddCycles_C();
@ -423,38 +429,38 @@ void Compiler::T_Comp_AddSub_()
void Compiler::T_Comp_ALU_Imm8() void Compiler::T_Comp_ALU_Imm8()
{ {
OpArg rd = MapReg(CurrentInstr.T_Reg(8)); OpArg rd = MapReg(CurInstr.T_Reg(8));
u32 op = (CurrentInstr.Instr >> 11) & 0x3; u32 op = (CurInstr.Instr >> 11) & 0x3;
OpArg imm = Imm32(CurrentInstr.Instr & 0xFF); OpArg imm = Imm32(CurInstr.Instr & 0xFF);
Comp_AddCycles_C(); Comp_AddCycles_C();
switch (op) switch (op)
{ {
case 0x0: case 0x0:
MOV(32, rd, imm); MOV(32, rd, imm);
TEST(32, rd, rd); TEST(32, rd, rd);
Comp_RetriveFlags(false, false, false); Comp_RetriveFlags(false, false, false);
return; return;
case 0x1: case 0x1:
Comp_CmpOp(2, rd, imm, false); Comp_CmpOp(2, rd, imm, false);
return; return;
case 0x2: case 0x2:
Comp_ArithTriOp(ADD, rd, rd, imm, false, opSetsFlags|opSymmetric|opRetriveCV); Comp_ArithTriOp(ADD, rd, rd, imm, false, opSetsFlags|opSymmetric|opRetriveCV);
return; return;
case 0x3: case 0x3:
Comp_ArithTriOp(SUB, rd, rd, imm, false, opSetsFlags|opInvertCarry|opRetriveCV); Comp_ArithTriOp(SUB, rd, rd, imm, false, opSetsFlags|opInvertCarry|opRetriveCV);
return; return;
} }
} }
void Compiler::T_Comp_ALU() void Compiler::T_Comp_ALU()
{ {
OpArg rd = MapReg(CurrentInstr.T_Reg(0)); OpArg rd = MapReg(CurInstr.T_Reg(0));
OpArg rs = MapReg(CurrentInstr.T_Reg(3)); OpArg rs = MapReg(CurInstr.T_Reg(3));
u32 op = (CurrentInstr.Instr >> 6) & 0xF; u32 op = (CurInstr.Instr >> 6) & 0xF;
if ((op >= 0x2 && op < 0x4) || op == 0x7) if ((op >= 0x2 && op < 0x4) || op == 0x7)
Comp_AddCycles_CI(1); Comp_AddCycles_CI(1);
@ -522,28 +528,62 @@ void Compiler::T_Comp_ALU()
void Compiler::T_Comp_ALU_HiReg() void Compiler::T_Comp_ALU_HiReg()
{ {
OpArg rd = MapReg(((CurrentInstr.Instr & 0x7) | ((CurrentInstr.Instr >> 4) & 0x8))); u32 rd = ((CurInstr.Instr & 0x7) | ((CurInstr.Instr >> 4) & 0x8));
OpArg rs = MapReg((CurrentInstr.Instr >> 3) & 0xF); OpArg rdMapped = MapReg(rd);
OpArg rs = MapReg((CurInstr.Instr >> 3) & 0xF);
u32 op = (CurrentInstr.Instr >> 8) & 0x3; u32 op = (CurInstr.Instr >> 8) & 0x3;
Comp_AddCycles_C(); Comp_AddCycles_C();
switch (op) switch (op)
{ {
case 0x0: // ADD case 0x0: // ADD
Comp_ArithTriOp(ADD, rd, rd, rs, false, opSymmetric|opRetriveCV); Comp_ArithTriOp(ADD, rdMapped, rdMapped, rs, false, opSymmetric|opRetriveCV);
return; break;
case 0x1: // CMP case 0x1: // CMP
Comp_CmpOp(2, rd, rs, false); Comp_CmpOp(2, rdMapped, rs, false);
return; return; // this is on purpose
case 0x2: // MOV case 0x2: // MOV
if (rd != rs) if (rdMapped != rs)
MOV(32, rd, rs); MOV(32, rdMapped, rs);
TEST(32, rd, rd); TEST(32, rdMapped, rdMapped);
Comp_RetriveFlags(false, false, false); Comp_RetriveFlags(false, false, false);
return; break;
}
if (rd == 15)
{
OR(32, rdMapped, Imm8(1));
Comp_JumpTo(rdMapped.GetSimpleReg());
} }
} }
void Compiler::T_Comp_AddSP()
{
Comp_AddCycles_C();
OpArg sp = MapReg(13);
OpArg offset = Imm32((CurInstr.Instr & 0x7F) << 2);
if (CurInstr.Instr & (1 << 7))
SUB(32, sp, offset);
else
ADD(32, sp, offset);
}
void Compiler::T_Comp_RelAddr()
{
Comp_AddCycles_C();
OpArg rd = MapReg(CurInstr.T_Reg(8));
u32 offset = (CurInstr.Instr & 0xFF) << 2;
if (CurInstr.Instr & (1 << 11))
{
OpArg sp = MapReg(13);
LEA(32, rd.GetSimpleReg(), MDisp(sp.GetSimpleReg(), offset));
}
else
MOV(32, rd, Imm32((R15 & ~2) + offset));
}
} }

View File

@ -9,7 +9,7 @@ using namespace Gen;
namespace ARMJIT namespace ARMJIT
{ {
template <> template <>
const X64Reg RegCache<Compiler, X64Reg>::NativeRegAllocOrder[] = const X64Reg RegCache<Compiler, X64Reg>::NativeRegAllocOrder[] =
{ {
#ifdef _WIN32 #ifdef _WIN32
RBX, RSI, RDI, R12, R13 RBX, RSI, RDI, R12, R13
@ -18,7 +18,7 @@ const X64Reg RegCache<Compiler, X64Reg>::NativeRegAllocOrder[] =
#endif #endif
}; };
template <> template <>
const int RegCache<Compiler, X64Reg>::NativeRegsAvailable = const int RegCache<Compiler, X64Reg>::NativeRegsAvailable =
#ifdef _WIN32 #ifdef _WIN32
5 5
#else #else
@ -30,24 +30,33 @@ Compiler::Compiler()
{ {
AllocCodeSpace(1024 * 1024 * 16); AllocCodeSpace(1024 * 1024 * 16);
for (int i = 0; i < 15; i++) for (int i = 0; i < 3; i++)
{ {
ReadMemFuncs9[i] = Gen_MemoryRoutine9(false, 32, 0x1000000 * i);
WriteMemFuncs9[i] = Gen_MemoryRoutine9(true, 32, 0x1000000 * i);
for (int j = 0; j < 2; j++) for (int j = 0; j < 2; j++)
{ {
ReadMemFuncs7[j][i] = Gen_MemoryRoutine7(false, 32, j, 0x1000000 * i); MemoryFuncs9[i][j] = Gen_MemoryRoutine9(j, 8 << i);
WriteMemFuncs7[j][i] = Gen_MemoryRoutine7(true, 32, j, 0x1000000 * i); MemoryFuncs7[i][j][0] = Gen_MemoryRoutine7(j, false, 8 << i);
MemoryFuncs7[i][j][1] = Gen_MemoryRoutine7(j, true, 8 << i);
} }
} }
ReadMemFuncs9[15] = Gen_MemoryRoutine9(false, 32, 0xFF000000);
WriteMemFuncs9[15] = Gen_MemoryRoutine9(true, 32, 0xFF000000);
ReadMemFuncs7[15][0] = ReadMemFuncs7[15][1] = Gen_MemoryRoutine7(false, 32, false, 0xFF000000);
WriteMemFuncs7[15][0] = WriteMemFuncs7[15][1] = Gen_MemoryRoutine7(true, 32, false, 0xFF000000);
ResetStart = GetWritableCodePtr(); ResetStart = GetWritableCodePtr();
} }
DataRegion Compiler::ClassifyAddress(u32 addr)
{
if (Num == 0 && addr >= ((ARMv5*)CurCPU)->DTCMBase && addr < ((ARMv5*)CurCPU)->DTCMBase)
return dataRegionDTCM;
switch (addr & 0xFF000000)
{
case 0x02000000: return dataRegionMainRAM;
case 0x03000000: return Num == 1 && (addr & 0xF00000) == 0x800000 ? dataRegionWRAM7 : dataRegionSWRAM;
case 0x04000000: return dataRegionIO;
case 0x06000000: return dataRegionVRAM;
}
return dataRegionGeneric;
}
void Compiler::LoadCPSR() void Compiler::LoadCPSR()
{ {
assert(!CPSRDirty); assert(!CPSRDirty);
@ -92,6 +101,7 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
Num = cpu->Num; Num = cpu->Num;
R15 = cpu->R[15]; R15 = cpu->R[15];
CodeRegion = cpu->CodeRegion; CodeRegion = cpu->CodeRegion;
CurCPU = cpu;
ABI_PushRegistersAndAdjustStack({ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS}, 8, 16); ABI_PushRegistersAndAdjustStack({ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS}, 8, 16);
@ -106,27 +116,32 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
for (int i = 0; i < instrsCount; i++) for (int i = 0; i < instrsCount; i++)
{ {
R15 += Thumb ? 2 : 4; R15 += Thumb ? 2 : 4;
CurrentInstr = instrs[i]; CurInstr = instrs[i];
CompileFunc comp = GetCompFunc(CurrentInstr.Info.Kind); CompileFunc comp = GetCompFunc(CurInstr.Info.Kind);
if (CurrentInstr.Info.Branches())
comp = NULL;
if (comp == NULL || i == instrsCount - 1) if (comp == NULL || i == instrsCount - 1)
{ {
MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(R15)); MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(R15));
MOV(32, MDisp(RCPU, offsetof(ARM, CodeCycles)), Imm32(CurrentInstr.CodeCycles)); MOV(32, MDisp(RCPU, offsetof(ARM, CodeCycles)), Imm32(CurInstr.CodeCycles));
MOV(32, MDisp(RCPU, offsetof(ARM, CurInstr)), Imm32(CurrentInstr.Instr)); MOV(32, MDisp(RCPU, offsetof(ARM, CurInstr)), Imm32(CurInstr.Instr));
if (i == instrsCount - 1) if (i == instrsCount - 1)
{ {
MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[0])), Imm32(CurrentInstr.NextInstr[0])); MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[0])), Imm32(CurInstr.NextInstr[0]));
MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[1])), Imm32(CurrentInstr.NextInstr[1])); MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[1])), Imm32(CurInstr.NextInstr[1]));
} }
SaveCPSR(); if (comp == NULL || CurInstr.Info.Branches())
SaveCPSR();
} }
// run interpreter
cpu->CodeCycles = CurInstr.CodeCycles;
cpu->R[15] = R15;
cpu->CurInstr = CurInstr.Instr;
cpu->NextInstr[0] = CurInstr.NextInstr[0];
cpu->NextInstr[1] = CurInstr.NextInstr[1];
if (comp != NULL) if (comp != NULL)
RegCache.Prepare(i); RegCache.Prepare(i);
else else
@ -134,26 +149,33 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
if (Thumb) if (Thumb)
{ {
u32 icode = (CurInstr.Instr >> 6) & 0x3FF;
if (comp == NULL) if (comp == NULL)
{ {
MOV(64, R(ABI_PARAM1), R(RCPU)); MOV(64, R(ABI_PARAM1), R(RCPU));
u32 icode = (CurrentInstr.Instr >> 6) & 0x3FF;
ABI_CallFunction(ARMInterpreter::THUMBInstrTable[icode]); ABI_CallFunction(ARMInterpreter::THUMBInstrTable[icode]);
} }
else else
(this->*comp)(); (this->*comp)();
ARMInterpreter::THUMBInstrTable[icode](cpu);
} }
else else
{ {
u32 cond = CurrentInstr.Cond(); u32 cond = CurInstr.Cond();
if (CurrentInstr.Info.Kind == ARMInstrInfo::ak_BLX_IMM) if (CurInstr.Info.Kind == ARMInstrInfo::ak_BLX_IMM)
{ {
MOV(64, R(ABI_PARAM1), R(RCPU)); MOV(64, R(ABI_PARAM1), R(RCPU));
ABI_CallFunction(ARMInterpreter::A_BLX_IMM); ABI_CallFunction(ARMInterpreter::A_BLX_IMM);
ARMInterpreter::A_BLX_IMM(cpu);
} }
else if (cond == 0xF) else if (cond == 0xF)
{
Comp_AddCycles_C(); Comp_AddCycles_C();
cpu->AddCycles_C();
}
else else
{ {
FixupBranch skipExecute; FixupBranch skipExecute;
@ -180,18 +202,18 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
} }
u32 icode = ((CurInstr.Instr >> 4) & 0xF) | ((CurInstr.Instr >> 16) & 0xFF0);
if (comp == NULL) if (comp == NULL)
{ {
MOV(64, R(ABI_PARAM1), R(RCPU)); MOV(64, R(ABI_PARAM1), R(RCPU));
u32 icode = ((CurrentInstr.Instr >> 4) & 0xF) | ((CurrentInstr.Instr >> 16) & 0xFF0);
ABI_CallFunction(ARMInterpreter::ARMInstrTable[icode]); ABI_CallFunction(ARMInterpreter::ARMInstrTable[icode]);
} }
else else
(this->*comp)(); (this->*comp)();
FixupBranch skipFailed; FixupBranch skipFailed;
if (CurrentInstr.Cond() < 0xE) if (CurInstr.Cond() < 0xE)
{ {
skipFailed = J(); skipFailed = J();
SetJumpTarget(skipExecute); SetJumpTarget(skipExecute);
@ -200,13 +222,17 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
SetJumpTarget(skipFailed); SetJumpTarget(skipFailed);
} }
if (cpu->CheckCondition(cond))
ARMInterpreter::ARMInstrTable[icode](cpu);
else
cpu->AddCycles_C();
} }
} }
/* /*
we don't need to collect the interpreted cycles, we don't need to collect the interpreted cycles,
since all functions only add to it, the dispatcher since cpu->Cycles is taken into account by the dispatcher.
takes care of it.
*/ */
if (comp == NULL && i != instrsCount - 1) if (comp == NULL && i != instrsCount - 1)
@ -277,29 +303,29 @@ CompileFunc Compiler::GetCompFunc(int kind)
// Mul // Mul
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
// ARMv5 stuff // ARMv5 stuff
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
// STR // STR
A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB,
// STRB // STRB
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB,
// LDR // LDR
A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB,
// LDRB // LDRB
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB,
// STRH // STRH
NULL, NULL, NULL, NULL, A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf,
// LDRD // LDRD
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
// STRD // STRD
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
// LDRH // LDRH
NULL, NULL, NULL, NULL, A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf,
// LDRSB // LDRSB
NULL, NULL, NULL, NULL, A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf,
// LDRSH // LDRSH
NULL, NULL, NULL, NULL, A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf,
// swap // swap
NULL, NULL, NULL, NULL,
// LDM/STM // LDM/STM
NULL, NULL, NULL, NULL,
// Branch // Branch
@ -314,26 +340,26 @@ CompileFunc Compiler::GetCompFunc(int kind)
// Three operand ADD/SUB // Three operand ADD/SUB
T_Comp_AddSub_, T_Comp_AddSub_, T_Comp_AddSub_, T_Comp_AddSub_, T_Comp_AddSub_, T_Comp_AddSub_, T_Comp_AddSub_, T_Comp_AddSub_,
// 8 bit imm // 8 bit imm
T_Comp_ALU_Imm8, T_Comp_ALU_Imm8, T_Comp_ALU_Imm8, T_Comp_ALU_Imm8, T_Comp_ALU_Imm8, T_Comp_ALU_Imm8, T_Comp_ALU_Imm8, T_Comp_ALU_Imm8,
// general ALU // general ALU
T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU,
T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU,
T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU,
T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU,
T_Comp_ALU, NULL, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, NULL, T_Comp_ALU, T_Comp_ALU,
// hi reg // hi reg
T_Comp_ALU_HiReg, T_Comp_ALU_HiReg, T_Comp_ALU_HiReg, T_Comp_ALU_HiReg, T_Comp_ALU_HiReg, T_Comp_ALU_HiReg,
// pc/sp relative // pc/sp relative
NULL, NULL, NULL, T_Comp_RelAddr, T_Comp_RelAddr, T_Comp_AddSP,
// LDR pcrel // LDR pcrel
NULL, NULL,
// LDR/STR reg offset // LDR/STR reg offset
T_Comp_MemReg, NULL, T_Comp_MemReg, NULL, T_Comp_MemReg, T_Comp_MemReg, T_Comp_MemReg, T_Comp_MemReg,
// LDR/STR sign extended, half // LDR/STR sign extended, half
NULL, NULL, NULL, NULL, T_Comp_MemRegHalf, T_Comp_MemRegHalf, T_Comp_MemRegHalf, T_Comp_MemRegHalf,
// LDR/STR imm offset // LDR/STR imm offset
T_Comp_MemImm, T_Comp_MemImm, NULL, NULL, T_Comp_MemImm, T_Comp_MemImm, T_Comp_MemImm, T_Comp_MemImm,
// LDR/STR half imm offset // LDR/STR half imm offset
NULL, NULL, T_Comp_MemImmHalf, T_Comp_MemImmHalf,
// branch, etc. // branch, etc.
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@ -346,10 +372,10 @@ CompileFunc Compiler::GetCompFunc(int kind)
void Compiler::Comp_AddCycles_C() void Compiler::Comp_AddCycles_C()
{ {
s32 cycles = Num ? s32 cycles = Num ?
NDS::ARM7MemTimings[CurrentInstr.CodeCycles][Thumb ? 1 : 3] NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 1 : 3]
: ((R15 & 0x2) ? 0 : CurrentInstr.CodeCycles); : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles);
if (CurrentInstr.Cond() < 0xE) if (CurInstr.Cond() < 0xE)
ADD(32, R(RCycles), Imm8(cycles)); ADD(32, R(RCycles), Imm8(cycles));
else else
ConstantCycles += cycles; ConstantCycles += cycles;
@ -358,13 +384,26 @@ void Compiler::Comp_AddCycles_C()
void Compiler::Comp_AddCycles_CI(u32 i) void Compiler::Comp_AddCycles_CI(u32 i)
{ {
s32 cycles = (Num ? s32 cycles = (Num ?
NDS::ARM7MemTimings[CurrentInstr.CodeCycles][Thumb ? 0 : 2] NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
: ((R15 & 0x2) ? 0 : CurrentInstr.CodeCycles)) + i; : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + i;
if (CurrentInstr.Cond() < 0xE) if (CurInstr.Cond() < 0xE)
ADD(32, R(RCycles), Imm8(cycles)); ADD(32, R(RCycles), Imm8(cycles));
else else
ConstantCycles += cycles; ConstantCycles += cycles;
} }
void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
{
SaveCPSR();
MOV(64, R(ABI_PARAM1), R(RCPU));
MOV(32, R(ABI_PARAM2), R(addr));
MOV(32, R(ABI_PARAM3), Imm32(restoreCPSR));
if (Num == 0)
CALL((void*)&ARMv5::JumpTo);
else
CALL((void*)&ARMv4::JumpTo);
}
} }

View File

@ -6,6 +6,8 @@
#include "../ARMJIT.h" #include "../ARMJIT.h"
#include "../ARMJIT_RegCache.h" #include "../ARMJIT_RegCache.h"
#include <tuple>
namespace ARMJIT namespace ARMJIT
{ {
@ -21,6 +23,19 @@ class Compiler;
typedef void (Compiler::*CompileFunc)(); typedef void (Compiler::*CompileFunc)();
enum DataRegion
{
dataRegionGeneric, // hey, that's me!
dataRegionMainRAM,
dataRegionSWRAM,
dataRegionVRAM,
dataRegionIO,
dataRegionExclusive,
dataRegionsCount,
dataRegionDTCM = dataRegionExclusive,
dataRegionWRAM7 = dataRegionExclusive,
};
class Compiler : public Gen::X64CodeBlock class Compiler : public Gen::X64CodeBlock
{ {
public: public:
@ -34,6 +49,8 @@ public:
private: private:
CompileFunc GetCompFunc(int kind); CompileFunc GetCompFunc(int kind);
void Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR = false);
void Comp_AddCycles_C(); void Comp_AddCycles_C();
void Comp_AddCycles_CI(u32 i); void Comp_AddCycles_CI(u32 i);
@ -47,11 +64,14 @@ private:
opInvertOp2 = 1 << 5, opInvertOp2 = 1 << 5,
}; };
DataRegion ClassifyAddress(u32 addr);
void A_Comp_Arith(); void A_Comp_Arith();
void A_Comp_MovOp(); void A_Comp_MovOp();
void A_Comp_CmpOp(); void A_Comp_CmpOp();
void A_Comp_MemWB(); void A_Comp_MemWB();
void A_Comp_MemHalf();
void T_Comp_ShiftImm(); void T_Comp_ShiftImm();
void T_Comp_AddSub_(); void T_Comp_AddSub_();
@ -59,8 +79,15 @@ private:
void T_Comp_ALU(); void T_Comp_ALU();
void T_Comp_ALU_HiReg(); void T_Comp_ALU_HiReg();
void T_Comp_RelAddr();
void T_Comp_AddSP();
void T_Comp_MemReg(); void T_Comp_MemReg();
void T_Comp_MemImm(); void T_Comp_MemImm();
void T_Comp_MemRegHalf();
void T_Comp_MemImmHalf();
void Comp_MemAccess(Gen::OpArg rd, bool signExtend, bool store, int size);
void Comp_ArithTriOp(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&), void Comp_ArithTriOp(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&),
Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags); Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags);
@ -70,8 +97,8 @@ private:
void Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed); void Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed);
void* Gen_MemoryRoutine9(bool store, int size, u32 region); void* Gen_MemoryRoutine9(bool store, int size);
void* Gen_MemoryRoutine7(bool store, int size, bool mainRAMCode, u32 region); void* Gen_MemoryRoutine7(bool store, bool codeMainRAM, int size);
Gen::OpArg Comp_RegShiftImm(int op, int amount, Gen::OpArg rm, bool S, bool& carryUsed); Gen::OpArg Comp_RegShiftImm(int op, int amount, Gen::OpArg rm, bool S, bool& carryUsed);
Gen::OpArg Comp_RegShiftReg(int op, Gen::OpArg rs, Gen::OpArg rm, bool S, bool& carryUsed); Gen::OpArg Comp_RegShiftReg(int op, Gen::OpArg rs, Gen::OpArg rm, bool S, bool& carryUsed);
@ -92,10 +119,12 @@ private:
} }
void* ResetStart; void* ResetStart;
void* MemoryFuncs9[3][2];
void* MemoryFuncs7[3][2][2];
bool CPSRDirty = false; bool CPSRDirty = false;
FetchedInstr CurrentInstr; FetchedInstr CurInstr;
RegCache<Compiler, Gen::X64Reg> RegCache; RegCache<Compiler, Gen::X64Reg> RegCache;
@ -105,12 +134,9 @@ private:
u32 CodeRegion; u32 CodeRegion;
u32 ConstantCycles; u32 ConstantCycles;
};
extern void* ReadMemFuncs9[16]; ARM* CurCPU;
extern void* ReadMemFuncs7[2][16]; };
extern void* WriteMemFuncs9[16];
extern void* WriteMemFuncs7[2][16];
} }

View File

@ -5,7 +5,6 @@
namespace NDS namespace NDS
{ {
#define MAIN_RAM_SIZE 0x400000
extern u8* SWRAM_ARM9; extern u8* SWRAM_ARM9;
extern u32 SWRAM_ARM9Mask; extern u32 SWRAM_ARM9Mask;
extern u8* SWRAM_ARM7; extern u8* SWRAM_ARM7;
@ -19,11 +18,6 @@ using namespace Gen;
namespace ARMJIT namespace ARMJIT
{ {
void* ReadMemFuncs9[16];
void* ReadMemFuncs7[2][16];
void* WriteMemFuncs9[16];
void* WriteMemFuncs7[2][16];
template <typename T> template <typename T>
int squeezePointer(T* ptr) int squeezePointer(T* ptr)
{ {
@ -32,569 +26,434 @@ int squeezePointer(T* ptr)
return truncated; return truncated;
} }
u32 ReadVRAM9(u32 addr) /*
{ According to DeSmuME and my own research, approx. 99% (seriously, that's an empirical number)
switch (addr & 0x00E00000) of all memory load and store instructions always access addresses in the same region as
{ during the their first execution.
case 0x00000000: return GPU::ReadVRAM_ABG<u32>(addr);
case 0x00200000: return GPU::ReadVRAM_BBG<u32>(addr);
case 0x00400000: return GPU::ReadVRAM_AOBJ<u32>(addr);
case 0x00600000: return GPU::ReadVRAM_BOBJ<u32>(addr);
default: return GPU::ReadVRAM_LCDC<u32>(addr);
}
}
void WriteVRAM9(u32 addr, u32 val) I tried multiple optimisations, which would benefit from this behaviour
{ (having fast paths for the first region, ), though none of them yielded a measureable
switch (addr & 0x00E00000) improvement.
{ */
case 0x00000000: GPU::WriteVRAM_ABG<u32>(addr, val); return;
case 0x00200000: GPU::WriteVRAM_BBG<u32>(addr, val); return;
case 0x00400000: GPU::WriteVRAM_AOBJ<u32>(addr, val); return;
case 0x00600000: GPU::WriteVRAM_BOBJ<u32>(addr, val); return;
default: GPU::WriteVRAM_LCDC<u32>(addr, val); return;
}
}
/* /*
R11 - data to write (store only) address - ABI_PARAM1 (a.k.a. ECX = RSCRATCH3 on Windows)
RSCRATCH2 - address store value - ABI_PARAM2 (a.k.a. RDX = RSCRATCH2 on Windows)
RSCRATCH3 - code cycles code cycles - ABI_PARAM3
*/ */
void* Compiler::Gen_MemoryRoutine9(bool store, int size, u32 region) void* Compiler::Gen_MemoryRoutine9(bool store, int size)
{ {
u32 addressMask = ~(size == 32 ? 3 : (size == 16 ? 1 : 0));
AlignCode4(); AlignCode4();
void* res = (void*)GetWritableCodePtr(); void* res = GetWritableCodePtr();
if (!store) MOV(32, R(RSCRATCH), R(ABI_PARAM1));
{ SUB(32, R(RSCRATCH), MDisp(RCPU, offsetof(ARMv5, DTCMBase)));
MOV(32, R(RSCRATCH), R(RSCRATCH2)); CMP(32, R(RSCRATCH), MDisp(RCPU, offsetof(ARMv5, DTCMSize)));
AND(32, R(RSCRATCH), Imm8(0x3)); FixupBranch insideDTCM = J_CC(CC_B);
SHL(32, R(RSCRATCH), Imm8(3));
// enter the shadow realm! CMP(32, R(ABI_PARAM1), MDisp(RCPU, offsetof(ARMv5, ITCMSize)));
MOV(32, MDisp(RSP, 8), R(RSCRATCH)); FixupBranch insideITCM = J_CC(CC_B);
}
// cycle counting! // cycle counting!
// this is AddCycles_CDI MOV(32, R(RSCRATCH), R(ABI_PARAM1));
MOV(32, R(R10), R(RSCRATCH2)); SHR(32, R(RSCRATCH), Imm8(12));
SHR(32, R(R10), Imm8(12)); MOVZX(32, 8, RSCRATCH, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, MemTimings) + (size == 32 ? 2 : 0)));
MOVZX(32, 8, R10, MComplex(RCPU, R10, SCALE_1, offsetof(ARMv5, MemTimings) + 2)); LEA(32, ABI_PARAM4, MComplex(RSCRATCH, ABI_PARAM3, SCALE_1, -6));
LEA(32, RSCRATCH, MComplex(RSCRATCH3, R10, SCALE_1, -6)); CMP(32, R(ABI_PARAM3), R(RSCRATCH));
CMP(32, R(R10), R(RSCRATCH3)); CMOVcc(32, RSCRATCH, R(ABI_PARAM3), CC_G);
CMOVcc(32, RSCRATCH3, R(R10), CC_G); CMP(32, R(ABI_PARAM4), R(RSCRATCH));
CMP(32, R(RSCRATCH), R(RSCRATCH3)); CMOVcc(32, RSCRATCH, R(ABI_PARAM4), CC_G);
CMOVcc(32, RSCRATCH3, R(RSCRATCH), CC_G); ADD(32, R(RCycles), R(RSCRATCH));
ADD(32, R(RCycles), R(RSCRATCH3));
if (!store)
XOR(32, R(RSCRATCH), R(RSCRATCH));
AND(32, R(RSCRATCH2), Imm32(~3));
if (store)
{ {
MOV(32, R(RSCRATCH3), R(RSCRATCH2)); if (size > 8)
SUB(32, R(RSCRATCH2), MDisp(RCPU, offsetof(ARMv5, DTCMBase))); AND(32, R(ABI_PARAM1), Imm32(addressMask));
CMP(32, R(RSCRATCH2), MDisp(RCPU, offsetof(ARMv5, DTCMSize))); switch (size)
FixupBranch outsideDTCM = J_CC(CC_AE);
AND(32, R(RSCRATCH2), Imm32(0x3FFF));
if (!store)
{ {
MOV(32, R(RSCRATCH), MComplex(RCPU, RSCRATCH2, SCALE_1, offsetof(ARMv5, DTCM))); case 32: JMP((u8*)NDS::ARM9Write32, true); break;
MOV(32, R(ECX), MDisp(RSP, 8)); case 16: JMP((u8*)NDS::ARM9Write16, true); break;
case 8: JMP((u8*)NDS::ARM9Write8, true); break;
}
}
else
{
if (size == 32)
{
ABI_PushRegistersAndAdjustStack({ABI_PARAM1}, 8);
AND(32, R(ABI_PARAM1), Imm32(addressMask));
// everything's already in the appropriate register
ABI_CallFunction(NDS::ARM9Read32);
ABI_PopRegistersAndAdjustStack({ECX}, 8);
AND(32, R(ECX), Imm8(3));
SHL(32, R(ECX), Imm8(3));
ROR_(32, R(RSCRATCH), R(ECX));
RET();
}
else if (size == 16)
{
AND(32, R(ABI_PARAM1), Imm32(addressMask));
JMP((u8*)NDS::ARM9Read16, true);
}
else
JMP((u8*)NDS::ARM9Read8, true);
}
SetJumpTarget(insideDTCM);
ADD(32, R(RCycles), R(ABI_PARAM3));
AND(32, R(RSCRATCH), Imm32(0x3FFF & addressMask));
if (store)
MOV(size, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM)), R(ABI_PARAM2));
else
{
MOVZX(32, size, RSCRATCH, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM)));
if (size == 32)
{
if (ABI_PARAM1 != ECX)
MOV(32, R(ECX), R(ABI_PARAM1));
AND(32, R(ECX), Imm8(3));
SHL(32, R(ECX), Imm8(3));
ROR_(32, R(RSCRATCH), R(ECX)); ROR_(32, R(RSCRATCH), R(ECX));
} }
else
MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_1, offsetof(ARMv5, DTCM)), R(R11));
RET();
SetJumpTarget(outsideDTCM);
MOV(32, R(RSCRATCH2), R(RSCRATCH3));
} }
switch (region)
{
case 0x00000000:
case 0x01000000:
{
CMP(32, R(RSCRATCH2), MDisp(RCPU, offsetof(ARMv5, ITCMSize)));
FixupBranch insideITCM = J_CC(CC_B);
RET();
SetJumpTarget(insideITCM);
AND(32, R(RSCRATCH2), Imm32(0x7FFF));
if (!store)
MOV(32, R(RSCRATCH), MComplex(RCPU, RSCRATCH2, SCALE_1, offsetof(ARMv5, ITCM)));
else
{
MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_1, offsetof(ARMv5, ITCM)), R(R11));
MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.ARM9_ITCM)), Imm32(0));
MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.ARM9_ITCM) + 8), Imm32(0));
}
}
break;
case 0x02000000:
AND(32, R(RSCRATCH2), Imm32(MAIN_RAM_SIZE - 1));
if (!store)
MOV(32, R(RSCRATCH), MDisp(RSCRATCH2, squeezePointer(NDS::MainRAM)));
else
{
MOV(32, MDisp(RSCRATCH2, squeezePointer(NDS::MainRAM)), R(R11));
MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.MainRAM)), Imm32(0));
MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.MainRAM) + 8), Imm32(0));
}
break;
case 0x03000000:
{
MOV(64, R(RSCRATCH3), M(&NDS::SWRAM_ARM9));
TEST(64, R(RSCRATCH3), R(RSCRATCH3));
FixupBranch notMapped = J_CC(CC_Z);
AND(32, R(RSCRATCH2), M(&NDS::SWRAM_ARM9Mask));
if (!store)
MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH3));
else
{
MOV(32, MRegSum(RSCRATCH2, RSCRATCH3), R(R11));
MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.SWRAM)), Imm32(0));
MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.SWRAM) + 8), Imm32(0));
}
SetJumpTarget(notMapped);
}
break;
case 0x04000000:
MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
if (!store)
{
ABI_PushRegistersAndAdjustStack({}, 8, 0);
ABI_CallFunction(NDS::ARM9IORead32);
ABI_PopRegistersAndAdjustStack({}, 8, 0);
}
else
{
MOV(32, R(ABI_PARAM2), R(R11));
JMP((u8*)NDS::ARM9IOWrite32, true);
}
break;
case 0x05000000:
{
MOV(32, R(RSCRATCH), Imm32(1<<1));
MOV(32, R(RSCRATCH3), Imm32(1<<9));
TEST(32, R(RSCRATCH2), Imm32(0x400));
CMOVcc(32, RSCRATCH, R(RSCRATCH3), CC_NZ);
TEST(16, R(RSCRATCH), M(&NDS::PowerControl9));
FixupBranch available = J_CC(CC_NZ);
RET();
SetJumpTarget(available);
AND(32, R(RSCRATCH2), Imm32(0x7FF));
if (!store)
MOV(32, R(RSCRATCH), MDisp(RSCRATCH2, squeezePointer(GPU::Palette)));
else
MOV(32, MDisp(RSCRATCH2, squeezePointer(GPU::Palette)), R(R11));
}
break;
case 0x06000000:
MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
if (!store)
{
ABI_PushRegistersAndAdjustStack({}, 8);
ABI_CallFunction(ReadVRAM9);
ABI_PopRegistersAndAdjustStack({}, 8);
}
else
{
MOV(32, R(ABI_PARAM2), R(R11));
JMP((u8*)WriteVRAM9, true);
}
break;
case 0x07000000:
{
MOV(32, R(RSCRATCH), Imm32(1<<1));
MOV(32, R(RSCRATCH3), Imm32(1<<9));
TEST(32, R(RSCRATCH2), Imm32(0x400));
CMOVcc(32, RSCRATCH, R(RSCRATCH3), CC_NZ);
TEST(16, R(RSCRATCH), M(&NDS::PowerControl9));
FixupBranch available = J_CC(CC_NZ);
RET();
SetJumpTarget(available);
AND(32, R(RSCRATCH2), Imm32(0x7FF));
if (!store)
MOV(32, R(RSCRATCH), MDisp(RSCRATCH2, squeezePointer(GPU::OAM)));
else
MOV(32, MDisp(RSCRATCH2, squeezePointer(GPU::OAM)), R(R11));
}
break;
case 0x08000000:
case 0x09000000:
case 0x0A000000:
if (!store)
MOV(32, R(RSCRATCH), Imm32(0xFFFFFFFF));
break;
case 0xFF000000:
if (!store)
{
AND(32, R(RSCRATCH2), Imm32(0xFFF));
MOV(32, R(RSCRATCH), MDisp(RSCRATCH2, squeezePointer(NDS::ARM9BIOS)));
}
break;
default:
MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
if (!store)
{
ABI_PushRegistersAndAdjustStack({}, 8, 0);
ABI_CallFunction(NDS::ARM9Read32);
ABI_PopRegistersAndAdjustStack({}, 8, 0);
}
else
{
MOV(32, R(ABI_PARAM2), R(R11));
JMP((u8*)NDS::ARM9Write32, true);
}
break;
}
if (!store)
{
MOV(32, R(ECX), MDisp(RSP, 8));
ROR_(32, R(RSCRATCH), R(ECX));
}
RET(); RET();
SetJumpTarget(insideITCM);
ADD(32, R(RCycles), R(ABI_PARAM3));
MOV(32, R(ABI_PARAM3), R(ABI_PARAM1)); // free up ECX
AND(32, R(ABI_PARAM3), Imm32(0x7FFF & addressMask));
if (store)
{
MOV(size, MComplex(RCPU, ABI_PARAM3, SCALE_1, offsetof(ARMv5, ITCM)), R(ABI_PARAM2));
XOR(32, R(RSCRATCH), R(RSCRATCH));
MOV(64, MScaled(ABI_PARAM3, SCALE_4, squeezePointer(cache.ARM9_ITCM)), R(RSCRATCH));
if (size == 32)
MOV(64, MScaled(ABI_PARAM3, SCALE_4, squeezePointer(cache.ARM9_ITCM) + 8), R(RSCRATCH));
}
else
{
MOVZX(32, size, RSCRATCH, MComplex(RCPU, ABI_PARAM3, SCALE_1, offsetof(ARMv5, ITCM)));
if (size == 32)
{
if (ABI_PARAM1 != ECX)
MOV(32, R(ECX), R(ABI_PARAM1));
AND(32, R(ECX), Imm8(3));
SHL(32, R(ECX), Imm8(3));
ROR_(32, R(RSCRATCH), R(ECX));
}
}
RET();
static_assert(RSCRATCH == EAX);
return res; return res;
} }
void* Compiler::Gen_MemoryRoutine7(bool store, int size, bool mainRAMCode, u32 region) void* Compiler::Gen_MemoryRoutine7(bool store, bool codeMainRAM, int size)
{ {
u32 addressMask = ~(size == 32 ? 3 : (size == 16 ? 1 : 0));
AlignCode4(); AlignCode4();
void* res = GetWritableCodePtr(); void* res = GetWritableCodePtr();
if (!store) MOV(32, R(RSCRATCH), R(ABI_PARAM1));
{
MOV(32, R(RSCRATCH), R(RSCRATCH2));
AND(32, R(RSCRATCH), Imm8(0x3));
SHL(32, R(RSCRATCH), Imm8(3));
// enter the shadow realm!
MOV(32, MDisp(RSP, 8), R(RSCRATCH));
}
// AddCycles_CDI
MOV(32, R(RSCRATCH), R(RSCRATCH2));
SHR(32, R(RSCRATCH), Imm8(15)); SHR(32, R(RSCRATCH), Imm8(15));
MOVZX(32, 8, RSCRATCH, MDisp(RSCRATCH, squeezePointer(NDS::ARM7MemTimings + 2))); MOVZX(32, 8, ABI_PARAM4, MDisp(RSCRATCH, (size == 32 ? 2 : 0) + squeezePointer(NDS::ARM7MemTimings)));
if ((region == 0x02000000 && mainRAMCode) || (region != 0x02000000 && !mainRAMCode))
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
AND(32, R(RSCRATCH), Imm32(0xFF000000));
CMP(32, R(RSCRATCH), Imm32(0x02000000));
FixupBranch outsideMainRAM = J_CC(CC_NE);
if (codeMainRAM)
{ {
if (!store && region != 0x02000000) LEA(32, RSCRATCH, MRegSum(ABI_PARAM4, ABI_PARAM3));
LEA(32, RSCRATCH3, MComplex(RSCRATCH, RSCRATCH3, SCALE_1, 1)); ADD(32, R(RCycles), R(RSCRATCH));
ADD(32, R(RCycles), R(RSCRATCH3));
} }
else else
{ {
if (!store) if (!store)
ADD(32, R(region == 0x02000000 ? RSCRATCH2 : RSCRATCH), Imm8(1)); ADD(32, R(ABI_PARAM3), Imm8(1));
LEA(32, R10, MComplex(RSCRATCH, RSCRATCH3, SCALE_1, -3)); LEA(32, RSCRATCH, MComplex(ABI_PARAM4, ABI_PARAM3, SCALE_1, -3));
CMP(32, R(RSCRATCH3), R(RSCRATCH)); CMP(32, R(ABI_PARAM4), R(ABI_PARAM3));
CMOVcc(32, RSCRATCH, R(RSCRATCH3), CC_G); CMOVcc(32, ABI_PARAM3, R(ABI_PARAM4), CC_G);
CMP(32, R(R10), R(RSCRATCH)); CMP(32, R(ABI_PARAM3), R(RSCRATCH));
CMOVcc(32, RSCRATCH, R(R10), CC_G); CMOVcc(32, RSCRATCH, R(ABI_PARAM3), CC_G);
ADD(32, R(RCycles), R(RSCRATCH)); ADD(32, R(RCycles), R(RSCRATCH));
} }
MOV(32, R(ABI_PARAM3), R(ABI_PARAM1));
if (!store) AND(32, R(ABI_PARAM3), Imm32((MAIN_RAM_SIZE - 1) & addressMask));
if (store)
{
MOV(size, MDisp(ABI_PARAM3, squeezePointer(NDS::MainRAM)), R(ABI_PARAM2));
XOR(32, R(RSCRATCH), R(RSCRATCH)); XOR(32, R(RSCRATCH), R(RSCRATCH));
AND(32, R(RSCRATCH2), Imm32(~3)); MOV(64, MScaled(ABI_PARAM3, SCALE_4, squeezePointer(cache.MainRAM)), R(RSCRATCH));
if (size == 32)
switch (region) MOV(64, MScaled(ABI_PARAM3, SCALE_4, squeezePointer(cache.MainRAM) + 8), R(RSCRATCH));
{
case 0x00000000:
if (!store) {
CMP(32, R(RSCRATCH2), Imm32(0x4000));
FixupBranch outsideBIOS1 = J_CC(CC_AE);
MOV(32, R(RSCRATCH), MDisp(RCPU, offsetof(ARM, R[15])));
CMP(32, R(RSCRATCH), Imm32(0x4000));
FixupBranch outsideBIOS2 = J_CC(CC_AE);
MOV(32, R(RSCRATCH3), M(&NDS::ARM7BIOSProt));
CMP(32, R(RSCRATCH2), R(RSCRATCH3));
FixupBranch notDenied1 = J_CC(CC_AE);
CMP(32, R(RSCRATCH), R(RSCRATCH3));
FixupBranch notDenied2 = J_CC(CC_B);
SetJumpTarget(outsideBIOS2);
MOV(32, R(RSCRATCH), Imm32(0xFFFFFFFF));
RET();
SetJumpTarget(notDenied1);
SetJumpTarget(notDenied2);
MOV(32, R(RSCRATCH), MDisp(RSCRATCH2, squeezePointer(NDS::ARM7BIOS)));
MOV(32, R(ECX), MDisp(RSP, 8));
ROR_(32, R(RSCRATCH), R(ECX));
RET();
SetJumpTarget(outsideBIOS1);
}
break;
case 0x02000000:
AND(32, R(RSCRATCH2), Imm32(MAIN_RAM_SIZE - 1));
if (!store)
MOV(32, R(RSCRATCH), MDisp(RSCRATCH2, squeezePointer(NDS::MainRAM)));
else
{
MOV(32, MDisp(RSCRATCH2, squeezePointer(NDS::MainRAM)), R(R11));
MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.MainRAM)), Imm32(0));
MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.MainRAM) + 8), Imm32(0));
}
break;
case 0x03000000:
{
TEST(32, R(RSCRATCH2), Imm32(0x800000));
FixupBranch region = J_CC(CC_NZ);
MOV(64, R(RSCRATCH), M(&NDS::SWRAM_ARM7));
TEST(64, R(RSCRATCH), R(RSCRATCH));
FixupBranch notMapped = J_CC(CC_Z);
AND(32, R(RSCRATCH2), M(&NDS::SWRAM_ARM7Mask));
if (!store)
{
MOV(32, R(RSCRATCH), MRegSum(RSCRATCH, RSCRATCH2));
MOV(32, R(ECX), MDisp(RSP, 8));
ROR_(32, R(RSCRATCH), R(ECX));
}
else
{
MOV(32, MRegSum(RSCRATCH, RSCRATCH2), R(R11));
MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.SWRAM)), Imm32(0));
MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.SWRAM) + 8), Imm32(0));
}
RET();
SetJumpTarget(region);
SetJumpTarget(notMapped);
AND(32, R(RSCRATCH2), Imm32(0xFFFF));
if (!store)
MOV(32, R(RSCRATCH), MDisp(RSCRATCH2, squeezePointer(NDS::ARM7WRAM)));
else
{
MOV(32, MDisp(RSCRATCH2, squeezePointer(NDS::ARM7WRAM)), R(R11));
MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.ARM7_WRAM)), Imm32(0));
MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.ARM7_WRAM) + 8), Imm32(0));
}
}
break;
case 0x04000000:
{
TEST(32, R(RSCRATCH2), Imm32(0x800000));
FixupBranch region = J_CC(CC_NZ);
MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
if (!store)
{
ABI_PushRegistersAndAdjustStack({}, 8);
ABI_CallFunction(NDS::ARM7IORead32);
ABI_PopRegistersAndAdjustStack({}, 8);
MOV(32, R(ECX), MDisp(RSP, 8));
ROR_(32, R(RSCRATCH), R(ECX));
RET();
}
else
{
MOV(32, R(ABI_PARAM2), R(R11));
JMP((u8*)NDS::ARM7IOWrite32, true);
}
SetJumpTarget(region);
if (!store)
{
ABI_PushRegistersAndAdjustStack({RSCRATCH2}, 8);
MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
ABI_CallFunction(Wifi::Read);
ABI_PopRegistersAndAdjustStack({RSCRATCH2}, 8);
ADD(32, R(RSCRATCH2), Imm8(2));
ABI_PushRegistersAndAdjustStack({EAX}, 8);
MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
ABI_CallFunction(Wifi::Read);
MOV(32, R(RSCRATCH2), R(EAX));
SHL(32, R(RSCRATCH2), Imm8(16));
ABI_PopRegistersAndAdjustStack({EAX}, 8);
OR(32, R(EAX), R(RSCRATCH2));
}
else
{
ABI_PushRegistersAndAdjustStack({RSCRATCH2, R11}, 8);
MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
MOVZX(32, 16, ABI_PARAM2, R(R11));
ABI_CallFunction(Wifi::Write);
ABI_PopRegistersAndAdjustStack({RSCRATCH2, R11}, 8);
SHR(32, R(R11), Imm8(16));
ADD(32, R(RSCRATCH2), Imm8(2));
ABI_PushRegistersAndAdjustStack({RSCRATCH2, R11}, 8);
MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
MOVZX(32, 16, ABI_PARAM2, R(R11));
ABI_CallFunction(Wifi::Write);
ABI_PopRegistersAndAdjustStack({RSCRATCH2, R11}, 8);
}
}
break;
case 0x06000000:
MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
if (!store)
{
ABI_PushRegistersAndAdjustStack({}, 8);
ABI_CallFunction(GPU::ReadVRAM_ARM7<u32>);
ABI_PopRegistersAndAdjustStack({}, 8);
}
else
{
AND(32, R(ABI_PARAM1), Imm32(0x40000 - 1));
MOV(64, MScaled(ABI_PARAM1, SCALE_4, squeezePointer(cache.ARM7_WVRAM)), Imm32(0));
MOV(64, MScaled(ABI_PARAM1, SCALE_4, squeezePointer(cache.ARM7_WVRAM) + 8), Imm32(0));
MOV(32, R(ABI_PARAM2), R(R11));
JMP((u8*)GPU::WriteVRAM_ARM7<u32>, true);
}
break;
case 0x08000000:
case 0x09000000:
case 0x0A000000:
if (!store)
MOV(32, R(RSCRATCH), Imm32(0xFFFFFFFF));
break;
/*default:
ABI_PushRegistersAndAdjustStack({}, 8, 0);
MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
ABI_CallFunction(NDS::ARM7Read32);
ABI_PopRegistersAndAdjustStack({}, 8, 0);
break;*/
} }
else
if (!store)
{ {
MOV(32, R(ECX), MDisp(RSP, 8)); MOVZX(32, size, RSCRATCH, MDisp(ABI_PARAM3, squeezePointer(NDS::MainRAM)));
ROR_(32, R(RSCRATCH), R(ECX)); if (size == 32)
{
if (ABI_PARAM1 != ECX)
MOV(32, R(ECX), R(ABI_PARAM1));
AND(32, R(ECX), Imm8(3));
SHL(32, R(ECX), Imm8(3));
ROR_(32, R(RSCRATCH), R(ECX));
}
} }
RET(); RET();
SetJumpTarget(outsideMainRAM);
if (codeMainRAM)
{
if (!store)
ADD(32, R(ABI_PARAM4), Imm8(1));
LEA(32, RSCRATCH, MComplex(ABI_PARAM4, ABI_PARAM3, SCALE_1, -3));
CMP(32, R(ABI_PARAM4), R(ABI_PARAM3));
CMOVcc(32, ABI_PARAM3, R(ABI_PARAM4), CC_G);
CMP(32, R(ABI_PARAM3), R(RSCRATCH));
CMOVcc(32, RSCRATCH, R(ABI_PARAM3), CC_G);
ADD(32, R(RCycles), R(RSCRATCH));
}
else
{
LEA(32, RSCRATCH, MComplex(ABI_PARAM4, ABI_PARAM3, SCALE_1, store ? 0 : 1));
ADD(32, R(RCycles), R(RSCRATCH));
}
if (store)
{
if (size > 8)
AND(32, R(ABI_PARAM1), Imm32(addressMask));
switch (size)
{
case 32: JMP((u8*)NDS::ARM7Write32, true); break;
case 16: JMP((u8*)NDS::ARM7Write16, true); break;
case 8: JMP((u8*)NDS::ARM7Write8, true); break;
}
}
else
{
if (size == 32)
{
ABI_PushRegistersAndAdjustStack({ABI_PARAM1}, 8);
AND(32, R(ABI_PARAM1), Imm32(addressMask));
ABI_CallFunction(NDS::ARM7Read32);
ABI_PopRegistersAndAdjustStack({ECX}, 8);
AND(32, R(ECX), Imm8(3));
SHL(32, R(ECX), Imm8(3));
ROR_(32, R(RSCRATCH), R(ECX));
RET();
}
else if (size == 16)
{
AND(32, R(ABI_PARAM1), Imm32(addressMask));
JMP((u8*)NDS::ARM7Read16, true);
}
else
JMP((u8*)NDS::ARM7Read8, true);
}
return res; return res;
} }
void Compiler::Comp_MemAccess(Gen::OpArg rd, bool signExtend, bool store, int size)
{
if (store)
MOV(32, R(ABI_PARAM2), rd);
u32 cycles = Num
? NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
: (R15 & 0x2 ? 0 : CurInstr.CodeCycles);
MOV(32, R(ABI_PARAM3), Imm32(cycles));
CALL(Num == 0
? MemoryFuncs9[size >> 4][store]
: MemoryFuncs7[size >> 4][store][CodeRegion == 0x02]);
if (!store)
{
if (signExtend)
MOVSX(32, size, rd.GetSimpleReg(), R(RSCRATCH));
else
MOVZX(32, size, rd.GetSimpleReg(), R(RSCRATCH));
}
}
OpArg Compiler::A_Comp_GetMemWBOffset() OpArg Compiler::A_Comp_GetMemWBOffset()
{ {
if (!(CurrentInstr.Instr & (1 << 25))) if (!(CurInstr.Instr & (1 << 25)))
return Imm32(CurrentInstr.Instr & 0xFFF); {
u32 imm = CurInstr.Instr & 0xFFF;
return Imm32(imm);
}
else else
{ {
int op = (CurrentInstr.Instr >> 5) & 0x3; int op = (CurInstr.Instr >> 5) & 0x3;
int amount = (CurrentInstr.Instr >> 7) & 0x1F; int amount = (CurInstr.Instr >> 7) & 0x1F;
OpArg rm = MapReg(CurrentInstr.A_Reg(0)); OpArg rm = MapReg(CurInstr.A_Reg(0));
bool carryUsed; bool carryUsed;
return Comp_RegShiftImm(op, amount, rm, false, carryUsed); return Comp_RegShiftImm(op, amount, rm, false, carryUsed);
} }
} }
void Compiler::A_Comp_MemWB() void Compiler::A_Comp_MemWB()
{ {
OpArg rn = MapReg(CurrentInstr.A_Reg(16)); OpArg rn = MapReg(CurInstr.A_Reg(16));
OpArg rd = MapReg(CurrentInstr.A_Reg(12)); OpArg rd = MapReg(CurInstr.A_Reg(12));
bool load = CurrentInstr.Instr & (1 << 20); bool load = CurInstr.Instr & (1 << 20);
bool byte = CurInstr.Instr & (1 << 22);
int size = byte ? 8 : 32;
MOV(32, R(RSCRATCH2), rn); if (CurInstr.Instr & (1 << 24))
if (CurrentInstr.Instr & (1 << 24))
{ {
OpArg offset = A_Comp_GetMemWBOffset(); OpArg offset = A_Comp_GetMemWBOffset();
if (CurrentInstr.Instr & (1 << 23)) if (CurInstr.Instr & (1 << 23))
ADD(32, R(RSCRATCH2), offset); MOV_sum(32, ABI_PARAM1, rn, offset);
else else
SUB(32, R(RSCRATCH2), offset); {
MOV(32, R(ABI_PARAM1), rn);
SUB(32, R(ABI_PARAM1), offset);
}
if (CurrentInstr.Instr & (1 << 21)) if (CurInstr.Instr & (1 << 21))
MOV(32, rn, R(RSCRATCH2)); MOV(32, rn, R(ABI_PARAM1));
} }
u32 cycles = Num ? NDS::ARM7MemTimings[CurrentInstr.CodeCycles][2] : CurrentInstr.CodeCycles;
MOV(32, R(RSCRATCH3), Imm32(cycles));
MOV(32, R(RSCRATCH), R(RSCRATCH2));
SHR(32, R(RSCRATCH), Imm8(24));
AND(32, R(RSCRATCH), Imm8(0xF));
void** funcArray;
if (load)
funcArray = Num ? ReadMemFuncs7[CodeRegion == 0x02] : ReadMemFuncs9;
else else
{ MOV(32, R(ABI_PARAM1), rn);
funcArray = Num ? WriteMemFuncs7[CodeRegion == 0x02] : WriteMemFuncs9;
MOV(32, R(R11), rd);
}
CALLptr(MScaled(RSCRATCH, SCALE_8, squeezePointer(funcArray)));
if (load) if (!(CurInstr.Instr & (1 << 24)))
MOV(32, R(RSCRATCH2), R(RSCRATCH));
if (!(CurrentInstr.Instr & (1 << 24)))
{ {
OpArg offset = A_Comp_GetMemWBOffset(); OpArg offset = A_Comp_GetMemWBOffset();
if (CurrentInstr.Instr & (1 << 23)) if (CurInstr.Instr & (1 << 23))
ADD(32, rn, offset); ADD(32, rn, offset);
else else
SUB(32, rn, offset); SUB(32, rn, offset);
} }
if (load) Comp_MemAccess(rd, false, !load, byte ? 8 : 32);
MOV(32, rd, R(RSCRATCH2)); if (load && CurInstr.A_Reg(12) == 15)
{
if (byte)
printf("!!! LDRB PC %08X\n", R15);
else
{
if (Num == 1)
AND(32, rd, Imm8(0xFE)); // immediate is sign extended
Comp_JumpTo(rd.GetSimpleReg());
}
}
}
void Compiler::A_Comp_MemHalf()
{
OpArg rn = MapReg(CurInstr.A_Reg(16));
OpArg rd = MapReg(CurInstr.A_Reg(12));
OpArg offset = CurInstr.Instr & (1 << 22)
? Imm32(CurInstr.Instr & 0xF | ((CurInstr.Instr >> 4) & 0xF0))
: MapReg(CurInstr.A_Reg(0));
if (CurInstr.Instr & (1 << 24))
{
if (CurInstr.Instr & (1 << 23))
MOV_sum(32, ABI_PARAM1, rn, offset);
else
{
MOV(32, R(ABI_PARAM1), rn);
SUB(32, R(ABI_PARAM1), offset);
}
if (CurInstr.Instr & (1 << 21))
MOV(32, rn, R(ABI_PARAM1));
}
else
MOV(32, R(ABI_PARAM1), rn);
int op = (CurInstr.Instr >> 5) & 0x3;
bool load = CurInstr.Instr & (1 << 20);
bool signExtend = false;
int size;
if (!load && op == 1)
size = 16;
else if (load)
{
size = op == 2 ? 8 : 16;
signExtend = op > 1;
}
if (!(CurInstr.Instr & (1 << 24)))
{
if (CurInstr.Instr & (1 << 23))
ADD(32, rn, offset);
else
SUB(32, rn, offset);
}
Comp_MemAccess(rd, signExtend, !load, size);
if (load && CurInstr.A_Reg(12) == 15)
printf("!!! MemHalf op PC %08X\n", R15);;
} }
void Compiler::T_Comp_MemReg() void Compiler::T_Comp_MemReg()
{ {
OpArg rd = MapReg(CurrentInstr.T_Reg(0)); OpArg rd = MapReg(CurInstr.T_Reg(0));
OpArg rb = MapReg(CurrentInstr.T_Reg(3)); OpArg rb = MapReg(CurInstr.T_Reg(3));
OpArg ro = MapReg(CurrentInstr.T_Reg(6)); OpArg ro = MapReg(CurInstr.T_Reg(6));
int op = (CurrentInstr.Instr >> 10) & 0x3; int op = (CurInstr.Instr >> 10) & 0x3;
bool load = op & 0x2; bool load = op & 0x2;
bool byte = op & 0x1;
MOV(32, R(RSCRATCH2), rb);
ADD(32, R(RSCRATCH2), ro);
u32 cycles = Num ? NDS::ARM7MemTimings[CurrentInstr.CodeCycles][0] : (R15 & 0x2 ? 0 : CurrentInstr.CodeCycles); MOV_sum(32, ABI_PARAM1, rb, ro);
MOV(32, R(RSCRATCH3), Imm32(cycles));
MOV(32, R(RSCRATCH), R(RSCRATCH2));
SHR(32, R(RSCRATCH), Imm8(24));
AND(32, R(RSCRATCH), Imm8(0xF));
void** funcArray;
if (load)
funcArray = Num ? ReadMemFuncs7[CodeRegion == 0x02] : ReadMemFuncs9;
else
{
funcArray = Num ? WriteMemFuncs7[CodeRegion == 0x02] : WriteMemFuncs9;
MOV(32, R(R11), rd);
}
CALLptr(MScaled(RSCRATCH, SCALE_8, squeezePointer(funcArray)));
if (load) Comp_MemAccess(rd, false, !load, byte ? 8 : 32);
MOV(32, rd, R(RSCRATCH));
} }
void Compiler::T_Comp_MemImm() void Compiler::T_Comp_MemImm()
{ {
// TODO: aufräumen!!! OpArg rd = MapReg(CurInstr.T_Reg(0));
OpArg rd = MapReg(CurrentInstr.T_Reg(0)); OpArg rb = MapReg(CurInstr.T_Reg(3));
OpArg rb = MapReg(CurrentInstr.T_Reg(3));
int op = (CurInstr.Instr >> 11) & 0x3;
int op = (CurrentInstr.Instr >> 11) & 0x3;
u32 offset = ((CurrentInstr.Instr >> 6) & 0x1F) * 4;
bool load = op & 0x1; bool load = op & 0x1;
bool byte = op & 0x2;
u32 offset = ((CurInstr.Instr >> 6) & 0x1F) * (byte ? 1 : 4);
LEA(32, RSCRATCH2, MDisp(rb.GetSimpleReg(), offset)); LEA(32, ABI_PARAM1, MDisp(rb.GetSimpleReg(), offset));
u32 cycles = Num ? NDS::ARM7MemTimings[CurrentInstr.CodeCycles][0] : (R15 & 0x2 ? 0 : CurrentInstr.CodeCycles);
MOV(32, R(RSCRATCH3), Imm32(cycles));
MOV(32, R(RSCRATCH), R(RSCRATCH2));
SHR(32, R(RSCRATCH), Imm8(24));
AND(32, R(RSCRATCH), Imm8(0xF));
void** funcArray;
if (load)
funcArray = Num ? ReadMemFuncs7[CodeRegion == 0x02] : ReadMemFuncs9;
else
{
funcArray = Num ? WriteMemFuncs7[CodeRegion == 0x02] : WriteMemFuncs9;
MOV(32, R(R11), rd);
}
CALLptr(MScaled(RSCRATCH, SCALE_8, squeezePointer(funcArray)));
if (load) Comp_MemAccess(rd, false, !load, byte ? 8 : 32);
MOV(32, rd, R(RSCRATCH)); }
void Compiler::T_Comp_MemRegHalf()
{
OpArg rd = MapReg(CurInstr.T_Reg(0));
OpArg rb = MapReg(CurInstr.T_Reg(3));
OpArg ro = MapReg(CurInstr.T_Reg(6));
int op = (CurInstr.Instr >> 10) & 0x3;
bool load = op != 0;
int size = op != 1 ? 16 : 8;
bool signExtend = op & 1;
MOV_sum(32, ABI_PARAM1, rb, ro);
Comp_MemAccess(rd, signExtend, !load, size);
}
void Compiler::T_Comp_MemImmHalf()
{
OpArg rd = MapReg(CurInstr.T_Reg(0));
OpArg rb = MapReg(CurInstr.T_Reg(3));
u32 offset = (CurInstr.Instr >> 5) & 0x3E;
bool load = CurInstr.Instr & (1 << 11);
LEA(32, ABI_PARAM1, MDisp(rb.GetSimpleReg(), offset));
Comp_MemAccess(rd, false, !load, 16);
} }
} }

View File

@ -317,7 +317,7 @@ Info Decode(bool thumb, u32 num, u32 instr)
else else
{ {
u32 data = ARMInstrTable[((instr >> 4) & 0xF) | ((instr >> 16) & 0xFF0)]; u32 data = ARMInstrTable[((instr >> 4) & 0xF) | ((instr >> 16) & 0xFF0)];
if ((instr & 0xFE000000) == 0xFA000000) if (num == 0 && (instr & 0xFE000000) == 0xFA000000)
data = A_BLX_IMM; data = A_BLX_IMM;
if (data & A_ARM9Only && num != 0) if (data & A_ARM9Only && num != 0)

View File

@ -524,6 +524,8 @@ void Reset()
KeyCnt = 0; KeyCnt = 0;
RCnt = 0; RCnt = 0;
ARMJIT::ResetBlocks();
NDSCart::Reset(); NDSCart::Reset();
GBACart::Reset(); GBACart::Reset();
GPU::Reset(); GPU::Reset();