Merge pull request #667 from Arisotura/generic_jit

merge jit
This commit is contained in:
Arisotura 2020-07-01 00:01:11 +02:00 committed by GitHub
commit 62c6e2f703
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
66 changed files with 27779 additions and 456 deletions

2
.gitignore vendored
View File

@ -9,3 +9,5 @@ melon_grc.h
cmake-build
cmake-build-debug
.idea
*.exe

View File

@ -14,6 +14,42 @@ if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
include(CheckSymbolExists)
function(detect_architecture symbol arch)
if (NOT DEFINED ARCHITECTURE)
set(CMAKE_REQUIRED_QUIET 1)
check_symbol_exists("${symbol}" "" ARCHITECTURE_${arch})
unset(CMAKE_REQUIRED_QUIET)
# The output variable needs to be unique across invocations otherwise
# CMake's crazy scope rules will keep it defined
if (ARCHITECTURE_${arch})
set(ARCHITECTURE "${arch}" PARENT_SCOPE)
set(ARCHITECTURE_${arch} 1 PARENT_SCOPE)
add_definitions(-DARCHITECTURE_${arch}=1)
endif()
endif()
endfunction()
detect_architecture("__x86_64__" x86_64)
detect_architecture("__i386__" x86)
detect_architecture("__arm__" ARM)
detect_architecture("__aarch64__" ARM64)
if (ARCHITECTURE STREQUAL x86_64 OR ARCHITECTURE STREQUAL ARM64)
option(ENABLE_JIT "Enable x64 JIT recompiler" ON)
endif()
if (ENABLE_JIT)
add_definitions(-DJIT_ENABLED)
endif()
if (CMAKE_BUILD_TYPE STREQUAL Release)
option(ENABLE_LTO "Enable link-time optimization" ON)
else()
option(ENABLE_LTO "Enable link-time optimization" OFF)
endif()
if (CMAKE_BUILD_TYPE STREQUAL Debug)
add_compile_options(-Og)
endif()

View File

@ -21,8 +21,15 @@
#include "DSi.h"
#include "ARM.h"
#include "ARMInterpreter.h"
#include "Config.h"
#include "AREngine.h"
#include "ARMJIT.h"
#include "Config.h"
#ifdef JIT_ENABLED
#include "ARMJIT.h"
#include "ARMJIT_Memory.h"
#endif
// instruction timing notes
//
@ -72,7 +79,9 @@ ARM::~ARM()
ARMv5::ARMv5() : ARM(0)
{
//
#ifndef JIT_ENABLED
DTCM = new u8[DTCMSize];
#endif
}
ARMv4::ARMv4() : ARM(1)
@ -80,6 +89,13 @@ ARMv4::ARMv4() : ARM(1)
//
}
ARMv5::~ARMv5()
{
#ifndef JIT_ENABLED
delete[] DTCM;
#endif
}
void ARM::Reset()
{
Cycles = 0;
@ -96,6 +112,12 @@ void ARM::Reset()
CodeMem.Mem = NULL;
#ifdef JIT_ENABLED
FastBlockLookup = NULL;
FastBlockLookupStart = 0;
FastBlockLookupSize = 0;
#endif
// zorp
JumpTo(ExceptionBase);
}
@ -123,7 +145,6 @@ void ARMv5::Reset()
GetMemRegion = NDS::ARM9GetMemRegion;
}
CP15Reset();
ARM::Reset();
}
@ -158,7 +179,11 @@ void ARM::DoSavestate(Savestate* file)
file->Var32((u32*)&Cycles);
//file->Var32((u32*)&CyclesToRun);
file->Var32(&Halted);
// hack to make save states compatible
u32 halted = Halted;
file->Var32(&halted);
Halted = halted;
file->VarArray(R, 16*sizeof(u32));
file->Var32(&CPSR);
@ -168,6 +193,15 @@ void ARM::DoSavestate(Savestate* file)
file->VarArray(R_IRQ, 3*sizeof(u32));
file->VarArray(R_UND, 3*sizeof(u32));
file->Var32(&CurInstr);
#ifdef JIT_ENABLED
if (!file->Saving && Config::JIT_Enable)
{
// hack, the JIT doesn't really pipeline
// but we still want JIT save states to be
// loaded while running the interpreter
FillPipeline();
}
#endif
file->VarArray(NextInstr, 2*sizeof(u32));
file->Var32(&ExceptionBase);
@ -240,15 +274,15 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr)
if (addr & 0x2)
{
NextInstr[0] = CodeRead32(addr-2, true) >> 16;
Cycles += CodeCycles;
Cycles -= CodeCycles;
NextInstr[1] = CodeRead32(addr+2, false);
Cycles += CodeCycles;
Cycles -= CodeCycles;
}
else
{
NextInstr[0] = CodeRead32(addr, true);
NextInstr[1] = NextInstr[0] >> 16;
Cycles += CodeCycles;
Cycles -= CodeCycles;
}
CPSR |= 0x20;
@ -261,9 +295,9 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr)
if (newregion != oldregion) SetupCodeMem(addr);
NextInstr[0] = CodeRead32(addr, true);
Cycles += CodeCycles;
Cycles -= CodeCycles;
NextInstr[1] = CodeRead32(addr+4, false);
Cycles += CodeCycles;
Cycles -= CodeCycles;
CPSR &= ~0x20;
}
@ -303,7 +337,7 @@ void ARMv4::JumpTo(u32 addr, bool restorecpsr)
NextInstr[0] = CodeRead16(addr);
NextInstr[1] = CodeRead16(addr+2);
Cycles += NDS::ARM7MemTimings[CodeCycles][0] + NDS::ARM7MemTimings[CodeCycles][1];
Cycles -= NDS::ARM7MemTimings[CodeCycles][0] + NDS::ARM7MemTimings[CodeCycles][1];
CPSR |= 0x20;
}
@ -316,7 +350,7 @@ void ARMv4::JumpTo(u32 addr, bool restorecpsr)
NextInstr[0] = CodeRead32(addr);
NextInstr[1] = CodeRead32(addr+4);
Cycles += NDS::ARM7MemTimings[CodeCycles][2] + NDS::ARM7MemTimings[CodeCycles][3];
Cycles -= NDS::ARM7MemTimings[CodeCycles][2] + NDS::ARM7MemTimings[CodeCycles][3];
CPSR &= ~0x20;
}
@ -558,7 +592,7 @@ void ARMv5::Execute()
else
AddCycles_C();
}
// TODO optimize this shit!!!
if (Halted)
{
@ -575,7 +609,7 @@ void ARMv5::Execute()
}*/
if (IRQ) TriggerIRQ();
NDS::ARM9Timestamp += Cycles;
NDS::ARM9Timestamp -= Cycles;
Cycles = 0;
}
@ -583,6 +617,75 @@ void ARMv5::Execute()
Halted = 0;
}
#ifdef JIT_ENABLED
void ARMv5::ExecuteJIT()
{
if (Halted)
{
if (Halted == 2)
{
Halted = 0;
}
else if (NDS::HaltInterrupted(0))
{
Halted = 0;
if (NDS::IME[0] & 0x1)
TriggerIRQ();
}
else
{
NDS::ARM9Timestamp = NDS::ARM9Target;
return;
}
}
while (NDS::ARM9Timestamp < NDS::ARM9Target)
{
u32 instrAddr = R[15] - ((CPSR&0x20)?2:4);
// hack so Cycles <= 0 becomes Cycles < 0
Cycles = NDS::ARM9Target - NDS::ARM9Timestamp - 1;
if ((instrAddr < FastBlockLookupStart || instrAddr >= (FastBlockLookupStart + FastBlockLookupSize))
&& !ARMJIT::SetupExecutableRegion(0, instrAddr, FastBlockLookup, FastBlockLookupStart, FastBlockLookupSize))
{
NDS::ARM9Timestamp = NDS::ARM9Target;
printf("ARMv5 PC in non executable region %08X\n", R[15]);
return;
}
ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlock(0, FastBlockLookup,
instrAddr - FastBlockLookupStart, instrAddr);
if (block)
ARM_Dispatch(this, block);
else
ARMJIT::CompileBlock(this);
NDS::ARM9Timestamp = NDS::ARM9Target - Cycles - 1;
if (StopExecution)
{
if (IRQ)
TriggerIRQ();
if (Halted || IdleLoop)
{
bool idleLoop = IdleLoop;
IdleLoop = 0;
if ((Halted == 1 || idleLoop) && NDS::ARM9Timestamp < NDS::ARM9Target)
{
NDS::ARM9Timestamp = NDS::ARM9Target;
}
break;
}
}
}
if (Halted == 2)
Halted = 0;
}
#endif
void ARMv4::Execute()
{
if (Halted)
@ -652,10 +755,131 @@ void ARMv4::Execute()
}*/
if (IRQ) TriggerIRQ();
NDS::ARM7Timestamp += Cycles;
NDS::ARM7Timestamp -= Cycles;
Cycles = 0;
}
if (Halted == 2)
Halted = 0;
if (Halted == 4)
{
DSi::SoftReset();
Halted = 2;
}
}
#ifdef JIT_ENABLED
void ARMv4::ExecuteJIT()
{
if (Halted)
{
if (Halted == 2)
{
Halted = 0;
}
else if (NDS::HaltInterrupted(1))
{
Halted = 0;
if (NDS::IME[1] & 0x1)
TriggerIRQ();
}
else
{
NDS::ARM7Timestamp = NDS::ARM7Target;
return;
}
}
while (NDS::ARM7Timestamp < NDS::ARM7Target)
{
u32 instrAddr = R[15] - ((CPSR&0x20)?2:4);
Cycles = NDS::ARM7Target - NDS::ARM7Timestamp - 1;
if ((instrAddr < FastBlockLookupStart || instrAddr >= (FastBlockLookupStart + FastBlockLookupSize))
&& !ARMJIT::SetupExecutableRegion(1, instrAddr, FastBlockLookup, FastBlockLookupStart, FastBlockLookupSize))
{
NDS::ARM7Timestamp = NDS::ARM7Target;
printf("ARMv4 PC in non executable region %08X\n", R[15]);
return;
}
ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlock(1, FastBlockLookup,
instrAddr - FastBlockLookupStart, instrAddr);
if (block)
ARM_Dispatch(this, block);
else
ARMJIT::CompileBlock(this);
NDS::ARM7Timestamp = NDS::ARM7Target - Cycles - 1;
// TODO optimize this shit!!!
if (StopExecution)
{
if (IRQ)
TriggerIRQ();
if (Halted || IdleLoop)
{
bool idleLoop = IdleLoop;
IdleLoop = 0;
if ((Halted == 1 || idleLoop) && NDS::ARM7Timestamp < NDS::ARM7Target)
{
NDS::ARM7Timestamp = NDS::ARM7Target;
}
break;
}
}
}
if (Halted == 2)
Halted = 0;
if (Halted == 4)
{
DSi::SoftReset();
Halted = 2;
}
}
#endif
void ARMv5::FillPipeline()
{
SetupCodeMem(R[15]);
if (CPSR & 0x20)
{
if ((R[15] - 2) & 0x2)
{
NextInstr[0] = CodeRead32(R[15] - 4, false) >> 16;
NextInstr[1] = CodeRead32(R[15], false);
}
else
{
NextInstr[0] = CodeRead32(R[15] - 2, false);
NextInstr[1] = NextInstr[0] >> 16;
}
}
else
{
NextInstr[0] = CodeRead32(R[15] - 4, false);
NextInstr[1] = CodeRead32(R[15], false);
}
}
void ARMv4::FillPipeline()
{
SetupCodeMem(R[15]);
if (CPSR & 0x20)
{
NextInstr[0] = CodeRead16(R[15] - 2);
NextInstr[1] = CodeRead16(R[15]);
}
else
{
NextInstr[0] = CodeRead32(R[15] - 4);
NextInstr[1] = CodeRead32(R[15]);
}
}

120
src/ARM.h
View File

@ -32,16 +32,21 @@ enum
RWFlags_ForceUser = (1<<21),
};
const u32 ITCMPhysicalSize = 0x8000;
const u32 DTCMPhysicalSize = 0x4000;
class ARM
{
public:
ARM(u32 num);
~ARM(); // destroy shit
virtual ~ARM(); // destroy shit
virtual void Reset();
virtual void DoSavestate(Savestate* file);
virtual void FillPipeline() = 0;
virtual void JumpTo(u32 addr, bool restorecpsr = false) = 0;
void RestoreCPSR();
@ -52,6 +57,9 @@ public:
}
virtual void Execute() = 0;
#ifdef ENABLE_JIT
virtual void ExecuteJIT() = 0;
#endif
bool CheckCondition(u32 code)
{
@ -107,9 +115,16 @@ public:
u32 Num;
s32 Cycles;
u32 Halted;
u32 IRQ; // nonzero to trigger IRQ
union
{
struct
{
u8 Halted;
u8 IRQ; // nonzero to trigger IRQ
u8 IdleLoop;
};
u32 StopExecution;
};
u32 CodeRegion;
s32 CodeCycles;
@ -131,6 +146,11 @@ public:
NDS::MemRegion CodeMem;
#ifdef JIT_ENABLED
u32 FastBlockLookupStart, FastBlockLookupSize;
u64* FastBlockLookup;
#endif
static u32 ConditionTable[16];
protected:
@ -146,6 +166,7 @@ class ARMv5 : public ARM
{
public:
ARMv5();
~ARMv5();
void Reset();
@ -153,12 +174,17 @@ public:
void UpdateRegionTimings(u32 addrstart, u32 addrend);
void FillPipeline();
void JumpTo(u32 addr, bool restorecpsr = false);
void PrefetchAbort();
void DataAbort();
void Execute();
#ifdef JIT_ENABLED
void ExecuteJIT();
#endif
// all code accesses are forced nonseq 32bit
u32 CodeRead32(u32 addr, bool branch);
@ -176,14 +202,14 @@ public:
{
// code only. always nonseq 32-bit for ARM9.
s32 numC = (R[15] & 0x2) ? 0 : CodeCycles;
Cycles += numC;
Cycles -= numC;
}
void AddCycles_CI(s32 numI)
{
// code+internal
s32 numC = (R[15] & 0x2) ? 0 : CodeCycles;
Cycles += numC + numI;
Cycles -= numC + numI;
}
void AddCycles_CDI()
@ -194,9 +220,9 @@ public:
s32 numD = DataCycles;
//if (DataRegion != CodeRegion)
Cycles += std::max(numC + numD - 6, std::max(numC, numD));
Cycles -= std::max(numC + numD - 6, std::max(numC, numD));
//else
// Cycles += numC + numD;
// Cycles -= numC + numD;
}
void AddCycles_CD()
@ -206,9 +232,9 @@ public:
s32 numD = DataCycles;
//if (DataRegion != CodeRegion)
Cycles += std::max(numC + numD - 6, std::max(numC, numD));
Cycles -= std::max(numC + numD - 6, std::max(numC, numD));
//else
// Cycles += numC + numD;
// Cycles -= numC + numD;
}
void GetCodeMemRegion(u32 addr, NDS::MemRegion* region);
@ -237,10 +263,14 @@ public:
u32 DTCMSetting, ITCMSetting;
u8 ITCM[0x8000];
// for aarch64 JIT they need to go up here
// to be addressable by a 12-bit immediate
u32 ITCMSize;
u8 DTCM[0x4000];
u32 DTCMBase, DTCMSize;
s32 RegionCodeCycles;
u8 ITCM[ITCMPhysicalSize];
u8* DTCM;
u8 ICache[0x2000];
u32 ICacheTags[64*4];
@ -265,7 +295,6 @@ public:
// code/16N/32N/32S
u8 MemTimings[0x100000][4];
s32 RegionCodeCycles;
u8* CurICacheLine;
bool (*GetMemRegion)(u32 addr, bool write, NDS::MemRegion* region);
@ -278,9 +307,14 @@ public:
void Reset();
void FillPipeline();
void JumpTo(u32 addr, bool restorecpsr = false);
void Execute();
#ifdef JIT_ENABLED
void ExecuteJIT();
#endif
u16 CodeRead16(u32 addr)
{
@ -295,8 +329,8 @@ public:
void DataRead8(u32 addr, u32* val)
{
*val = BusRead8(addr);
DataRegion = addr >> 24;
DataCycles = NDS::ARM7MemTimings[DataRegion][0];
DataRegion = addr;
DataCycles = NDS::ARM7MemTimings[addr >> 15][0];
}
void DataRead16(u32 addr, u32* val)
@ -304,8 +338,8 @@ public:
addr &= ~1;
*val = BusRead16(addr);
DataRegion = addr >> 24;
DataCycles = NDS::ARM7MemTimings[DataRegion][0];
DataRegion = addr;
DataCycles = NDS::ARM7MemTimings[addr >> 15][0];
}
void DataRead32(u32 addr, u32* val)
@ -313,8 +347,8 @@ public:
addr &= ~3;
*val = BusRead32(addr);
DataRegion = addr >> 24;
DataCycles = NDS::ARM7MemTimings[DataRegion][2];
DataRegion = addr;
DataCycles = NDS::ARM7MemTimings[addr >> 15][2];
}
void DataRead32S(u32 addr, u32* val)
@ -322,14 +356,14 @@ public:
addr &= ~3;
*val = BusRead32(addr);
DataCycles += NDS::ARM7MemTimings[DataRegion][3];
DataCycles += NDS::ARM7MemTimings[addr >> 15][3];
}
void DataWrite8(u32 addr, u8 val)
{
BusWrite8(addr, val);
DataRegion = addr >> 24;
DataCycles = NDS::ARM7MemTimings[DataRegion][0];
DataRegion = addr;
DataCycles = NDS::ARM7MemTimings[addr >> 15][0];
}
void DataWrite16(u32 addr, u16 val)
@ -337,8 +371,8 @@ public:
addr &= ~1;
BusWrite16(addr, val);
DataRegion = addr >> 24;
DataCycles = NDS::ARM7MemTimings[DataRegion][0];
DataRegion = addr;
DataCycles = NDS::ARM7MemTimings[addr >> 15][0];
}
void DataWrite32(u32 addr, u32 val)
@ -346,8 +380,8 @@ public:
addr &= ~3;
BusWrite32(addr, val);
DataRegion = addr >> 24;
DataCycles = NDS::ARM7MemTimings[DataRegion][2];
DataRegion = addr;
DataCycles = NDS::ARM7MemTimings[addr >> 15][2];
}
void DataWrite32S(u32 addr, u32 val)
@ -355,20 +389,20 @@ public:
addr &= ~3;
BusWrite32(addr, val);
DataCycles += NDS::ARM7MemTimings[DataRegion][3];
DataCycles += NDS::ARM7MemTimings[addr >> 15][3];
}
void AddCycles_C()
{
// code only. this code fetch is sequential.
Cycles += NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?1:3];
Cycles -= NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?1:3];
}
void AddCycles_CI(s32 num)
{
// code+internal. results in a nonseq code fetch.
Cycles += NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?0:2] + num;
Cycles -= NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?0:2] + num;
}
void AddCycles_CDI()
@ -377,24 +411,24 @@ public:
s32 numC = NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?0:2];
s32 numD = DataCycles;
if (DataRegion == 0x02) // mainRAM
if ((DataRegion >> 24) == 0x02) // mainRAM
{
if (CodeRegion == 0x02)
Cycles += numC + numD;
Cycles -= numC + numD;
else
{
numC++;
Cycles += std::max(numC + numD - 3, std::max(numC, numD));
Cycles -= std::max(numC + numD - 3, std::max(numC, numD));
}
}
else if (CodeRegion == 0x02)
{
numD++;
Cycles += std::max(numC + numD - 3, std::max(numC, numD));
Cycles -= std::max(numC + numD - 3, std::max(numC, numD));
}
else
{
Cycles += numC + numD + 1;
Cycles -= numC + numD + 1;
}
}
@ -404,20 +438,20 @@ public:
s32 numC = NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?0:2];
s32 numD = DataCycles;
if (DataRegion == 0x02)
if ((DataRegion >> 24) == 0x02)
{
if (CodeRegion == 0x02)
Cycles += numC + numD;
Cycles -= numC + numD;
else
Cycles += std::max(numC + numD - 3, std::max(numC, numD));
Cycles -= std::max(numC + numD - 3, std::max(numC, numD));
}
else if (CodeRegion == 0x02)
{
Cycles += std::max(numC + numD - 3, std::max(numC, numD));
Cycles -= std::max(numC + numD - 3, std::max(numC, numD));
}
else
{
Cycles += numC + numD;
Cycles -= numC + numD;
}
}
};
@ -430,4 +464,12 @@ void T_UNK(ARM* cpu);
}
namespace NDS
{
extern ARMv5* ARM9;
extern ARMv4* ARM7;
}
#endif // ARM_H

View File

@ -28,6 +28,15 @@ namespace ARMInterpreter
extern void (*ARMInstrTable[4096])(ARM* cpu);
extern void (*THUMBInstrTable[1024])(ARM* cpu);
void A_MSR_IMM(ARM* cpu);
void A_MSR_REG(ARM* cpu);
void A_MRS(ARM* cpu);
void A_MCR(ARM* cpu);
void A_MRC(ARM* cpu);
void A_SVC(ARM* cpu);
void T_SVC(ARM* cpu);
void A_BLX_IMM(ARM* cpu); // I'm a special one look at me
}

1204
src/ARMJIT.cpp Normal file

File diff suppressed because it is too large Load Diff

37
src/ARMJIT.h Normal file
View File

@ -0,0 +1,37 @@
#ifndef ARMJIT_H
#define ARMJIT_H
#include "types.h"
#include "ARM.h"
#include "ARM_InstrInfo.h"
namespace ARMJIT
{
typedef void (*JitBlockEntry)();
void Init();
void DeInit();
void Reset();
void CheckAndInvalidateITCM();
void InvalidateByAddr(u32 pseudoPhysical);
template <u32 num, int region>
void CheckAndInvalidate(u32 addr);
void CompileBlock(ARM* cpu);
void ResetBlockCache();
JitBlockEntry LookUpBlock(u32 num, u64* entries, u32 offset, u32 addr);
bool SetupExecutableRegion(u32 num, u32 blockAddr, u64*& entry, u32& start, u32& size);
}
extern "C" void ARM_Dispatch(ARM* cpu, ARMJIT::JitBlockEntry entry);
#endif

View File

@ -0,0 +1,930 @@
#include "ARMJIT_Compiler.h"
using namespace Arm64Gen;
namespace ARMJIT
{
void Compiler::Comp_RegShiftReg(int op, bool S, Op2& op2, ARM64Reg rs)
{
if (!(CurInstr.SetFlags & 0x2))
S = false;
CPSRDirty |= S;
UBFX(W1, rs, 0, 8);
if (!S)
{
if (op == 3)
RORV(W0, op2.Reg.Rm, W1);
else
{
CMP(W1, 32);
if (op == 2)
{
MOVI2R(W2, 31);
CSEL(W1, W2, W1, CC_GE);
ASRV(W0, op2.Reg.Rm, W1);
}
else
{
if (op == 0)
LSLV(W0, op2.Reg.Rm, W1);
else if (op == 1)
LSRV(W0, op2.Reg.Rm, W1);
CSEL(W0, WZR, W0, CC_GE);
}
}
}
else
{
MOV(W0, op2.Reg.Rm);
FixupBranch zero = CBZ(W1);
SUB(W1, W1, 1);
if (op == 3)
{
RORV(W0, op2.Reg.Rm, W1);
BFI(RCPSR, W0, 29, 1);
}
else
{
CMP(W1, 31);
if (op == 2)
{
MOVI2R(W2, 31);
CSEL(W1, W2, W1, CC_GT);
ASRV(W0, op2.Reg.Rm, W1);
BFI(RCPSR, W0, 29, 1);
}
else
{
if (op == 0)
{
LSLV(W0, op2.Reg.Rm, W1);
UBFX(W1, W0, 31, 1);
}
else if (op == 1)
LSRV(W0, op2.Reg.Rm, W1);
CSEL(W1, WZR, op ? W0 : W1, CC_GT);
BFI(RCPSR, W1, 29, 1);
CSEL(W0, WZR, W0, CC_GE);
}
}
MOV(W0, W0, ArithOption(W0, (ShiftType)op, 1));
SetJumpTarget(zero);
}
op2 = Op2(W0, ST_LSL, 0);
}
void Compiler::Comp_RegShiftImm(int op, int amount, bool S, Op2& op2, ARM64Reg tmp)
{
if (!(CurInstr.SetFlags & 0x2))
S = false;
CPSRDirty |= S;
switch (op)
{
case 0: // LSL
if (S && amount)
{
UBFX(tmp, op2.Reg.Rm, 32 - amount, 1);
BFI(RCPSR, tmp, 29, 1);
}
op2 = Op2(op2.Reg.Rm, ST_LSL, amount);
return;
case 1: // LSR
if (S)
{
UBFX(tmp, op2.Reg.Rm, (amount ? amount : 32) - 1, 1);
BFI(RCPSR, tmp, 29, 1);
}
if (amount == 0)
{
op2 = Op2(0);
return;
}
op2 = Op2(op2.Reg.Rm, ST_LSR, amount);
return;
case 2: // ASR
if (S)
{
UBFX(tmp, op2.Reg.Rm, (amount ? amount : 32) - 1, 1);
BFI(RCPSR, tmp, 29, 1);
}
op2 = Op2(op2.Reg.Rm, ST_ASR, amount ? amount : 31);
return;
case 3: // ROR
if (amount == 0)
{
UBFX(tmp, RCPSR, 29, 1);
LSL(tmp, tmp, 31);
if (S)
BFI(RCPSR, op2.Reg.Rm, 29, 1);
ORR(tmp, tmp, op2.Reg.Rm, ArithOption(tmp, ST_LSR, 1));
op2 = Op2(tmp, ST_LSL, 0);
}
else
{
if (S)
{
UBFX(tmp, op2.Reg.Rm, amount - 1, 1);
BFI(RCPSR, tmp, 29, 1);
}
op2 = Op2(op2.Reg.Rm, ST_ROR, amount);
}
return;
}
}
void Compiler::Comp_RetriveFlags(bool retriveCV)
{
if (CurInstr.SetFlags)
CPSRDirty = true;
if (CurInstr.SetFlags & 0x4)
{
CSET(W0, CC_EQ);
BFI(RCPSR, W0, 30, 1);
}
if (CurInstr.SetFlags & 0x8)
{
CSET(W0, CC_MI);
BFI(RCPSR, W0, 31, 1);
}
if (retriveCV)
{
if (CurInstr.SetFlags & 0x2)
{
CSET(W0, CC_CS);
BFI(RCPSR, W0, 29, 1);
}
if (CurInstr.SetFlags & 0x1)
{
CSET(W0, CC_VS);
BFI(RCPSR, W0, 28, 1);
}
}
}
void Compiler::Comp_Logical(int op, bool S, ARM64Reg rd, ARM64Reg rn, Op2 op2)
{
if (S && !CurInstr.SetFlags)
S = false;
switch (op)
{
case 0x0: // AND
if (S)
{
if (op2.IsImm)
ANDSI2R(rd, rn, op2.Imm, W0);
else
ANDS(rd, rn, op2.Reg.Rm, op2.ToArithOption());
}
else
{
if (op2.IsImm)
ANDI2R(rd, rn, op2.Imm, W0);
else
AND(rd, rn, op2.Reg.Rm, op2.ToArithOption());
}
break;
case 0x1: // EOR
if (op2.IsImm)
EORI2R(rd, rn, op2.Imm, W0);
else
EOR(rd, rn, op2.Reg.Rm, op2.ToArithOption());
if (S && FlagsNZNeeded())
TST(rd, rd);
break;
case 0xC: // ORR
if (op2.IsImm)
ORRI2R(rd, rn, op2.Imm, W0);
else
ORR(rd, rn, op2.Reg.Rm, op2.ToArithOption());
if (S && FlagsNZNeeded())
TST(rd, rd);
break;
case 0xE: // BIC
if (S)
{
if (op2.IsImm)
ANDSI2R(rd, rn, ~op2.Imm, W0);
else
BICS(rd, rn, op2.Reg.Rm, op2.ToArithOption());
}
else
{
if (op2.IsImm)
ANDI2R(rd, rn, ~op2.Imm, W0);
else
BIC(rd, rn, op2.Reg.Rm, op2.ToArithOption());
}
break;
}
if (S)
Comp_RetriveFlags(false);
}
void Compiler::Comp_Arithmetic(int op, bool S, ARM64Reg rd, ARM64Reg rn, Op2 op2)
{
if (!op2.IsImm && op2.Reg.ShiftType == ST_ROR)
{
MOV(W0, op2.Reg.Rm, op2.ToArithOption());
op2 = Op2(W0, ST_LSL, 0);
}
if (S && !CurInstr.SetFlags)
S = false;
bool CVInGPR = false;
switch (op)
{
case 0x2: // SUB
if (S)
{
if (op2.IsImm)
SUBSI2R(rd, rn, op2.Imm, W0);
else
SUBS(rd, rn, op2.Reg.Rm, op2.ToArithOption());
}
else
{
if (op2.IsImm)
{
MOVI2R(W2, op2.Imm);
SUBI2R(rd, rn, op2.Imm, W0);
}
else
SUB(rd, rn, op2.Reg.Rm, op2.ToArithOption());
}
break;
case 0x3: // RSB
if (op2.IsZero())
{
op2 = Op2(WZR);
}
else if (op2.IsImm)
{
MOVI2R(W1, op2.Imm);
op2 = Op2(W1);
}
else if (op2.Reg.ShiftAmount != 0)
{
MOV(W1, op2.Reg.Rm, op2.ToArithOption());
op2 = Op2(W1);
}
if (S)
SUBS(rd, op2.Reg.Rm, rn);
else
SUB(rd, op2.Reg.Rm, rn);
break;
case 0x4: // ADD
if (S)
{
if (op2.IsImm)
ADDSI2R(rd, rn, op2.Imm, W0);
else
ADDS(rd, rn, op2.Reg.Rm, op2.ToArithOption());
}
else
{
if (op2.IsImm)
ADDI2R(rd, rn, op2.Imm, W0);
else
ADD(rd, rn, op2.Reg.Rm, op2.ToArithOption());
}
break;
case 0x5: // ADC
UBFX(W2, RCPSR, 29, 1);
if (S)
{
CVInGPR = true;
ADDS(W1, rn, W2);
CSET(W2, CC_CS);
CSET(W3, CC_VS);
if (op2.IsImm)
ADDSI2R(rd, W1, op2.Imm, W0);
else
ADDS(rd, W1, op2.Reg.Rm, op2.ToArithOption());
CSINC(W2, W2, WZR, CC_CC);
CSINC(W3, W3, WZR, CC_VC);
}
else
{
ADD(W1, rn, W2);
if (op2.IsImm)
ADDI2R(rd, W1, op2.Imm, W0);
else
ADD(rd, W1, op2.Reg.Rm, op2.ToArithOption());
}
break;
case 0x6: // SBC
UBFX(W2, RCPSR, 29, 1);
// W1 = -op2 - 1
if (op2.IsImm)
MOVI2R(W1, ~op2.Imm);
else
ORN(W1, WZR, op2.Reg.Rm, op2.ToArithOption());
if (S)
{
CVInGPR = true;
ADDS(W1, W2, W1);
CSET(W2, CC_CS);
CSET(W3, CC_VS);
ADDS(rd, rn, W1);
CSINC(W2, W2, WZR, CC_CC);
CSINC(W3, W3, WZR, CC_VC);
}
else
{
ADD(W1, W2, W1);
ADD(rd, rn, W1);
}
break;
case 0x7: // RSC
UBFX(W2, RCPSR, 29, 1);
// W1 = -rn - 1
MVN(W1, rn);
if (S)
{
CVInGPR = true;
ADDS(W1, W2, W1);
CSET(W2, CC_CS);
CSET(W3, CC_VS);
if (op2.IsImm)
ADDSI2R(rd, W1, op2.Imm);
else
ADDS(rd, W1, op2.Reg.Rm, op2.ToArithOption());
CSINC(W2, W2, WZR, CC_CC);
CSINC(W3, W3, WZR, CC_VC);
}
else
{
ADD(W1, W2, W1);
if (op2.IsImm)
ADDI2R(rd, W1, op2.Imm);
else
ADD(rd, W1, op2.Reg.Rm, op2.ToArithOption());
}
break;
}
if (S)
{
if (CVInGPR)
{
BFI(RCPSR, W2, 29, 1);
BFI(RCPSR, W3, 28, 1);
}
Comp_RetriveFlags(!CVInGPR);
}
}
void Compiler::Comp_Compare(int op, ARM64Reg rn, Op2 op2)
{
if (!op2.IsImm && op2.Reg.ShiftType == ST_ROR)
{
MOV(W0, op2.Reg.Rm, op2.ToArithOption());
op2 = Op2(W0, ST_LSL, 0);
}
switch (op)
{
case 0x8: // TST
if (op2.IsImm)
TSTI2R(rn, op2.Imm, W0);
else
ANDS(WZR, rn, op2.Reg.Rm, op2.ToArithOption());
break;
case 0x9: // TEQ
if (op2.IsImm)
EORI2R(W0, rn, op2.Imm, W0);
else
EOR(W0, rn, op2.Reg.Rm, op2.ToArithOption());
TST(W0, W0);
break;
case 0xA: // CMP
if (op2.IsImm)
CMPI2R(rn, op2.Imm, W0);
else
CMP(rn, op2.Reg.Rm, op2.ToArithOption());
break;
case 0xB: // CMN
if (op2.IsImm)
ADDSI2R(WZR, rn, op2.Imm, W0);
else
CMN(rn, op2.Reg.Rm, op2.ToArithOption());
break;
}
Comp_RetriveFlags(op >= 0xA);
}
// also counts cycles!
void Compiler::A_Comp_GetOp2(bool S, Op2& op2)
{
if (CurInstr.Instr & (1 << 25))
{
Comp_AddCycles_C();
op2 = Op2(ROR(CurInstr.Instr & 0xFF, (CurInstr.Instr >> 7) & 0x1E));
}
else
{
int op = (CurInstr.Instr >> 5) & 0x3;
op2.Reg.Rm = MapReg(CurInstr.A_Reg(0));
if (CurInstr.Instr & (1 << 4))
{
Comp_AddCycles_CI(1);
ARM64Reg rs = MapReg(CurInstr.A_Reg(8));
if (CurInstr.A_Reg(0) == 15)
{
ADD(W0, op2.Reg.Rm, 4);
op2.Reg.Rm = W0;
}
Comp_RegShiftReg(op, S, op2, rs);
}
else
{
Comp_AddCycles_C();
int amount = (CurInstr.Instr >> 7) & 0x1F;
Comp_RegShiftImm(op, amount, S, op2);
}
}
}
void Compiler::A_Comp_ALUCmpOp()
{
u32 op = (CurInstr.Instr >> 21) & 0xF;
ARM64Reg rn = MapReg(CurInstr.A_Reg(16));
Op2 op2;
A_Comp_GetOp2(op <= 0x9, op2);
Comp_Compare(op, rn, op2);
}
void Compiler::A_Comp_ALUMovOp()
{
bool S = CurInstr.Instr & (1 << 20);
u32 op = (CurInstr.Instr >> 21) & 0xF;
ARM64Reg rd = MapReg(CurInstr.A_Reg(12));
Op2 op2;
A_Comp_GetOp2(S, op2);
if (op == 0xF) // MVN
{
if (op2.IsImm)
{
if (CurInstr.Cond() == 0xE)
RegCache.PutLiteral(CurInstr.A_Reg(12), ~op2.Imm);
MOVI2R(rd, ~op2.Imm);
}
else
ORN(rd, WZR, op2.Reg.Rm, op2.ToArithOption());
}
else // MOV
{
if (op2.IsImm)
{
if (CurInstr.Cond() == 0xE)
RegCache.PutLiteral(CurInstr.A_Reg(12), op2.Imm);
MOVI2R(rd, op2.Imm);
}
else
{
// ORR with shifted operand has cycles latency
if (op2.Reg.ShiftAmount > 0)
{
switch (op2.Reg.ShiftType)
{
case ST_LSL: LSL(rd, op2.Reg.Rm, op2.Reg.ShiftAmount); break;
case ST_LSR: LSR(rd, op2.Reg.Rm, op2.Reg.ShiftAmount); break;
case ST_ASR: ASR(rd, op2.Reg.Rm, op2.Reg.ShiftAmount); break;
case ST_ROR: ROR_(rd, op2.Reg.Rm, op2.Reg.ShiftAmount); break;
}
}
else
{
MOV(rd, op2.Reg.Rm, op2.ToArithOption());
}
}
}
if (S)
{
if (FlagsNZNeeded())
TST(rd, rd);
Comp_RetriveFlags(false);
}
if (CurInstr.Info.Branches())
Comp_JumpTo(rd, true, S);
}
void Compiler::A_Comp_ALUTriOp()
{
bool S = CurInstr.Instr & (1 << 20);
u32 op = (CurInstr.Instr >> 21) & 0xF;
bool logical = (1 << op) & 0xF303;
ARM64Reg rd = MapReg(CurInstr.A_Reg(12));
ARM64Reg rn = MapReg(CurInstr.A_Reg(16));
Op2 op2;
A_Comp_GetOp2(S && logical, op2);
if (op2.IsImm && op2.Imm == 0)
op2 = Op2(WZR, ST_LSL, 0);
if (logical)
Comp_Logical(op, S, rd, rn, op2);
else
Comp_Arithmetic(op, S, rd, rn, op2);
if (CurInstr.Info.Branches())
Comp_JumpTo(rd, true, S);
}
void Compiler::A_Comp_Clz()
{
Comp_AddCycles_C();
ARM64Reg rd = MapReg(CurInstr.A_Reg(12));
ARM64Reg rm = MapReg(CurInstr.A_Reg(0));
CLZ(rd, rm);
assert(Num == 0);
}
void Compiler::Comp_Mul_Mla(bool S, bool mla, ARM64Reg rd, ARM64Reg rm, ARM64Reg rs, ARM64Reg rn)
{
if (Num == 0)
{
Comp_AddCycles_CI(S ? 3 : 1);
}
else
{
CLS(W0, rs);
Comp_AddCycles_CI(mla ? 1 : 0, W0, ArithOption(W0, ST_LSR, 3));
}
if (mla)
MADD(rd, rm, rs, rn);
else
MUL(rd, rm, rs);
if (S && FlagsNZNeeded())
{
TST(rd, rd);
Comp_RetriveFlags(false);
}
}
void Compiler::A_Comp_Mul_Long()
{
ARM64Reg rd = MapReg(CurInstr.A_Reg(16));
ARM64Reg rm = MapReg(CurInstr.A_Reg(0));
ARM64Reg rs = MapReg(CurInstr.A_Reg(8));
ARM64Reg rn = MapReg(CurInstr.A_Reg(12));
bool S = CurInstr.Instr & (1 << 20);
bool add = CurInstr.Instr & (1 << 21);
bool sign = CurInstr.Instr & (1 << 22);
if (Num == 0)
{
Comp_AddCycles_CI(S ? 3 : 1);
}
else
{
if (sign)
CLS(W0, rs);
else
CLZ(W0, rs);
Comp_AddCycles_CI(0, W0, ArithOption(W0, ST_LSR, 3));
}
if (add)
{
MOV(W0, rn);
BFI(X0, EncodeRegTo64(rd), 32, 32);
if (sign)
SMADDL(EncodeRegTo64(rn), rm, rs, X0);
else
UMADDL(EncodeRegTo64(rn), rm, rs, X0);
if (S && FlagsNZNeeded())
TST(EncodeRegTo64(rn), EncodeRegTo64(rn));
UBFX(EncodeRegTo64(rd), EncodeRegTo64(rn), 32, 32);
}
else
{
if (sign)
SMULL(EncodeRegTo64(rn), rm, rs);
else
UMULL(EncodeRegTo64(rn), rm, rs);
if (S && FlagsNZNeeded())
TST(EncodeRegTo64(rn), EncodeRegTo64(rn));
UBFX(EncodeRegTo64(rd), EncodeRegTo64(rn), 32, 32);
}
if (S)
Comp_RetriveFlags(false);
}
void Compiler::A_Comp_Mul_Short()
{
ARM64Reg rd = MapReg(CurInstr.A_Reg(16));
ARM64Reg rm = MapReg(CurInstr.A_Reg(0));
ARM64Reg rs = MapReg(CurInstr.A_Reg(8));
u32 op = (CurInstr.Instr >> 21) & 0xF;
bool x = CurInstr.Instr & (1 << 5);
bool y = CurInstr.Instr & (1 << 6);
SBFX(W1, rs, y ? 16 : 0, 16);
if (op == 0b1000)
{
// SMLAxy
SBFX(W0, rm, x ? 16 : 0, 16);
MUL(W0, W0, W1);
ORRI2R(W1, RCPSR, 0x08000000);
ARM64Reg rn = MapReg(CurInstr.A_Reg(12));
ADDS(rd, W0, rn);
CSEL(RCPSR, W1, RCPSR, CC_VS);
CPSRDirty = true;
Comp_AddCycles_C();
}
else if (op == 0b1011)
{
// SMULxy
SBFX(W0, rm, x ? 16 : 0, 16);
MUL(rd, W0, W1);
Comp_AddCycles_C();
}
else if (op == 0b1010)
{
// SMLALxy
ARM64Reg rn = MapReg(CurInstr.A_Reg(12));
MOV(W2, rn);
BFI(X2, rd, 32, 32);
SBFX(W0, rm, x ? 16 : 0, 16);
SMADDL(EncodeRegTo64(rn), W0, W1, X2);
UBFX(EncodeRegTo64(rd), EncodeRegTo64(rn), 32, 32);
Comp_AddCycles_CI(1);
}
else if (op == 0b1001)
{
// SMLAWy/SMULWy
SMULL(X0, rm, W1);
ASR(x ? EncodeRegTo64(rd) : X0, X0, 16);
if (!x)
{
ORRI2R(W1, RCPSR, 0x08000000);
ARM64Reg rn = MapReg(CurInstr.A_Reg(12));
ADDS(rd, W0, rn);
CSEL(RCPSR, W1, RCPSR, CC_VS);
CPSRDirty = true;
}
Comp_AddCycles_C();
}
}
void Compiler::A_Comp_Mul()
{
ARM64Reg rd = MapReg(CurInstr.A_Reg(16));
ARM64Reg rm = MapReg(CurInstr.A_Reg(0));
ARM64Reg rs = MapReg(CurInstr.A_Reg(8));
bool S = CurInstr.Instr & (1 << 20);
bool mla = CurInstr.Instr & (1 << 21);
ARM64Reg rn = INVALID_REG;
if (mla)
rn = MapReg(CurInstr.A_Reg(12));
Comp_Mul_Mla(S, mla, rd, rm, rs, rn);
}
void Compiler::T_Comp_ShiftImm()
{
Comp_AddCycles_C();
u32 op = (CurInstr.Instr >> 11) & 0x3;
int amount = (CurInstr.Instr >> 6) & 0x1F;
ARM64Reg rd = MapReg(CurInstr.T_Reg(0));
Op2 op2;
op2.Reg.Rm = MapReg(CurInstr.T_Reg(3));
Comp_RegShiftImm(op, amount, true, op2);
if (op2.IsImm)
MOVI2R(rd, op2.Imm);
else
MOV(rd, op2.Reg.Rm, op2.ToArithOption());
if (FlagsNZNeeded())
TST(rd, rd);
Comp_RetriveFlags(false);
}
void Compiler::T_Comp_AddSub_()
{
Comp_AddCycles_C();
Op2 op2;
if (CurInstr.Instr & (1 << 10))
op2 = Op2((CurInstr.Instr >> 6) & 0x7);
else
op2 = Op2(MapReg(CurInstr.T_Reg(6)));
Comp_Arithmetic(
CurInstr.Instr & (1 << 9) ? 0x2 : 0x4,
true,
MapReg(CurInstr.T_Reg(0)),
MapReg(CurInstr.T_Reg(3)),
op2);
}
void Compiler::T_Comp_ALUImm8()
{
Comp_AddCycles_C();
u32 imm = CurInstr.Instr & 0xFF;
int op = (CurInstr.Instr >> 11) & 0x3;
ARM64Reg rd = MapReg(CurInstr.T_Reg(8));
switch (op)
{
case 0:
MOVI2R(rd, imm);
if (FlagsNZNeeded())
TST(rd, rd);
Comp_RetriveFlags(false);
break;
case 1:
Comp_Compare(0xA, rd, Op2(imm));
break;
case 2:
case 3:
Comp_Arithmetic(op == 2 ? 0x4 : 0x2, true, rd, rd, Op2(imm));
break;
}
}
void Compiler::T_Comp_ALU()
{
int op = (CurInstr.Instr >> 6) & 0xF;
ARM64Reg rd = MapReg(CurInstr.T_Reg(0));
ARM64Reg rs = MapReg(CurInstr.T_Reg(3));
if ((op >= 0x2 && op <= 0x4) || op == 0x7)
Comp_AddCycles_CI(1);
else
Comp_AddCycles_C();
switch (op)
{
case 0x0:
Comp_Logical(0x0, true, rd, rd, Op2(rs));
break;
case 0x1:
Comp_Logical(0x1, true, rd, rd, Op2(rs));
break;
case 0x2:
case 0x3:
case 0x4:
case 0x7:
{
Op2 op2;
op2.Reg.Rm = rd;
Comp_RegShiftReg(op == 0x7 ? 3 : (op - 0x2), true, op2, rs);
MOV(rd, op2.Reg.Rm, op2.ToArithOption());
if (FlagsNZNeeded())
TST(rd, rd);
Comp_RetriveFlags(false);
}
break;
case 0x5:
Comp_Arithmetic(0x5, true, rd, rd, Op2(rs));
break;
case 0x6:
Comp_Arithmetic(0x6, true, rd, rd, Op2(rs));
break;
case 0x8:
Comp_Compare(0x8, rd, Op2(rs));
break;
case 0x9:
Comp_Arithmetic(0x3, true, rd, rs, Op2(0));
break;
case 0xA:
Comp_Compare(0xA, rd, Op2(rs));
break;
case 0xB:
Comp_Compare(0xB, rd, Op2(rs));
break;
case 0xC:
Comp_Logical(0xC, true, rd, rd, Op2(rs));
break;
case 0xD:
Comp_Mul_Mla(true, false, rd, rd, rs, INVALID_REG);
break;
case 0xE:
Comp_Logical(0xE, true, rd, rd, Op2(rs));
break;
case 0xF:
MVN(rd, rs);
if (FlagsNZNeeded())
TST(rd, rd);
Comp_RetriveFlags(false);
break;
}
}
void Compiler::T_Comp_ALU_HiReg()
{
u32 rd = ((CurInstr.Instr & 0x7) | ((CurInstr.Instr >> 4) & 0x8));
ARM64Reg rdMapped = MapReg(rd);
ARM64Reg rs = MapReg((CurInstr.Instr >> 3) & 0xF);
u32 op = (CurInstr.Instr >> 8) & 0x3;
Comp_AddCycles_C();
switch (op)
{
case 0:
Comp_Arithmetic(0x4, false, rdMapped, rdMapped, Op2(rs));
break;
case 1:
Comp_Compare(0xA, rdMapped, rs);
return;
case 2:
MOV(rdMapped, rs);
break;
}
if (rd == 15)
{
Comp_JumpTo(rdMapped, false, false);
}
}
void Compiler::T_Comp_AddSP()
{
Comp_AddCycles_C();
ARM64Reg sp = MapReg(13);
u32 offset = (CurInstr.Instr & 0x7F) << 2;
if (CurInstr.Instr & (1 << 7))
SUB(sp, sp, offset);
else
ADD(sp, sp, offset);
}
void Compiler::T_Comp_RelAddr()
{
Comp_AddCycles_C();
ARM64Reg rd = MapReg(CurInstr.T_Reg(8));
u32 offset = (CurInstr.Instr & 0xFF) << 2;
if (CurInstr.Instr & (1 << 11))
{
ARM64Reg sp = MapReg(13);
ADD(rd, sp, offset);
}
else
MOVI2R(rd, (R15 & ~2) + offset);
}
}

View File

@ -0,0 +1,421 @@
#include "ARMJIT_Compiler.h"
using namespace Arm64Gen;
// hack
const int kCodeCacheTiming = 3;
namespace ARMJIT
{
template <typename T>
void jumpToTrampoline(T* cpu, u32 addr, bool changeCPSR)
{
cpu->JumpTo(addr, changeCPSR);
}
void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
{
// we can simplify constant branches by a lot
// it's not completely safe to assume stuff like, which instructions to preload
// we'll see how it works out
IrregularCycles = true;
u32 newPC;
u32 cycles = 0;
bool setupRegion = false;
if (addr & 0x1 && !Thumb)
{
CPSRDirty = true;
ORRI2R(RCPSR, RCPSR, 0x20);
}
else if (!(addr & 0x1) && Thumb)
{
CPSRDirty = true;
ANDI2R(RCPSR, RCPSR, ~0x20);
}
if (Num == 0)
{
ARMv5* cpu9 = (ARMv5*)CurCPU;
u32 oldregion = R15 >> 24;
u32 newregion = addr >> 24;
u32 regionCodeCycles = cpu9->MemTimings[addr >> 12][0];
u32 compileTimeCodeCycles = cpu9->RegionCodeCycles;
cpu9->RegionCodeCycles = regionCodeCycles;
MOVI2R(W0, regionCodeCycles);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARMv5, RegionCodeCycles));
setupRegion = newregion != oldregion;
if (setupRegion)
cpu9->SetupCodeMem(addr);
if (addr & 0x1)
{
addr &= ~0x1;
newPC = addr+2;
// two-opcodes-at-once fetch
// doesn't matter if we put garbage in the MSbs there
if (addr & 0x2)
{
cpu9->CodeRead32(addr-2, true) >> 16;
cycles += cpu9->CodeCycles;
cpu9->CodeRead32(addr+2, false);
cycles += CurCPU->CodeCycles;
}
else
{
cpu9->CodeRead32(addr, true);
cycles += cpu9->CodeCycles;
}
}
else
{
addr &= ~0x3;
newPC = addr+4;
cpu9->CodeRead32(addr, true);
cycles += cpu9->CodeCycles;
cpu9->CodeRead32(addr+4, false);
cycles += cpu9->CodeCycles;
}
cpu9->RegionCodeCycles = compileTimeCodeCycles;
if (setupRegion)
cpu9->SetupCodeMem(R15);
}
else
{
ARMv4* cpu7 = (ARMv4*)CurCPU;
u32 codeRegion = addr >> 24;
u32 codeCycles = addr >> 15; // cheato
cpu7->CodeRegion = codeRegion;
cpu7->CodeCycles = codeCycles;
MOVI2R(W0, codeRegion);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CodeRegion));
MOVI2R(W0, codeCycles);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CodeCycles));
if (addr & 0x1)
{
addr &= ~0x1;
newPC = addr+2;
// this is necessary because ARM7 bios protection
u32 compileTimePC = CurCPU->R[15];
CurCPU->R[15] = newPC;
cycles += NDS::ARM7MemTimings[codeCycles][0] + NDS::ARM7MemTimings[codeCycles][1];
CurCPU->R[15] = compileTimePC;
}
else
{
addr &= ~0x3;
newPC = addr+4;
u32 compileTimePC = CurCPU->R[15];
CurCPU->R[15] = newPC;
cycles += NDS::ARM7MemTimings[codeCycles][2] + NDS::ARM7MemTimings[codeCycles][3];
CurCPU->R[15] = compileTimePC;
}
cpu7->CodeRegion = R15 >> 24;
cpu7->CodeCycles = addr >> 15;
}
if (Exit)
{
MOVI2R(W0, newPC);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, R[15]));
}
if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles)
ConstantCycles += cycles;
else
SUB(RCycles, RCycles, cycles);
}
void* Compiler::Gen_JumpTo9(int kind)
{
AlignCode16();
void* res = GetRXPtr();
LSR(W1, W0, 12);
ADDI2R(W1, W1, offsetof(ARMv5, MemTimings), W2);
LDRB(W1, RCPU, W1);
LDR(INDEX_UNSIGNED, W2, RCPU, offsetof(ARMv5, ITCMSize));
STR(INDEX_UNSIGNED, W1, RCPU, offsetof(ARMv5, RegionCodeCycles));
CMP(W1, 0xFF);
MOVI2R(W3, kCodeCacheTiming);
CSEL(W1, W3, W1, CC_EQ);
CMP(W0, W2);
CSINC(W1, W1, WZR, CC_HS);
FixupBranch switchToThumb;
if (kind == 0)
switchToThumb = TBNZ(W0, 0);
if (kind == 0 || kind == 1)
{
// ARM
if (kind == 0)
ANDI2R(RCPSR, RCPSR, ~0x20);
ANDI2R(W0, W0, ~3);
ADD(W0, W0, 4);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARMv5, R[15]));
ADD(W1, W1, W1);
SUB(RCycles, RCycles, W1);
RET();
}
if (kind == 0 || kind == 2)
{
// Thumb
if (kind == 0)
{
SetJumpTarget(switchToThumb);
ORRI2R(RCPSR, RCPSR, 0x20);
}
ANDI2R(W0, W0, ~1);
ADD(W0, W0, 2);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARMv5, R[15]));
ADD(W2, W1, W1);
TSTI2R(W0, 0x2);
CSEL(W1, W1, W2, CC_EQ);
SUB(RCycles, RCycles, W1);
RET();
}
return res;
}
void* Compiler::Gen_JumpTo7(int kind)
{
void* res = GetRXPtr();
LSR(W1, W0, 24);
STR(INDEX_UNSIGNED, W1, RCPU, offsetof(ARM, CodeRegion));
LSR(W1, W0, 15);
STR(INDEX_UNSIGNED, W1, RCPU, offsetof(ARM, CodeCycles));
MOVP2R(X2, NDS::ARM7MemTimings);
LDR(W3, X2, ArithOption(W1, true));
FixupBranch switchToThumb;
if (kind == 0)
switchToThumb = TBNZ(W0, 0);
if (kind == 0 || kind == 1)
{
UBFX(W2, W3, 0, 8);
UBFX(W3, W3, 8, 8);
ADD(W2, W3, W2);
SUB(RCycles, RCycles, W2);
ANDI2R(W0, W0, ~3);
if (kind == 0)
ANDI2R(RCPSR, RCPSR, ~0x20);
ADD(W3, W0, 4);
STR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARM, R[15]));
RET();
}
if (kind == 0 || kind == 2)
{
if (kind == 0)
{
SetJumpTarget(switchToThumb);
ORRI2R(RCPSR, RCPSR, 0x20);
}
UBFX(W2, W3, 16, 8);
UBFX(W3, W3, 24, 8);
ADD(W2, W3, W2);
SUB(RCycles, RCycles, W2);
ANDI2R(W0, W0, ~1);
ADD(W3, W0, 2);
STR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARM, R[15]));
RET();
}
return res;
}
void Compiler::Comp_JumpTo(Arm64Gen::ARM64Reg addr, bool switchThumb, bool restoreCPSR)
{
IrregularCycles = true;
if (!restoreCPSR)
{
if (switchThumb)
CPSRDirty = true;
MOV(W0, addr);
BL((Num ? JumpToFuncs7 : JumpToFuncs9)[switchThumb ? 0 : (Thumb + 1)]);
}
else
{
bool cpsrDirty = CPSRDirty;
SaveCPSR();
SaveCycles();
PushRegs(restoreCPSR);
if (switchThumb)
MOV(W1, addr);
else
{
if (Thumb)
ORRI2R(W1, addr, 1);
else
ANDI2R(W1, addr, ~1);
}
MOV(X0, RCPU);
MOVI2R(W2, restoreCPSR);
if (Num == 0)
QuickCallFunction(X3, jumpToTrampoline<ARMv5>);
else
QuickCallFunction(X3, jumpToTrampoline<ARMv4>);
PopRegs(restoreCPSR);
LoadCycles();
LoadCPSR();
if (CurInstr.Cond() < 0xE)
CPSRDirty = cpsrDirty;
}
}
void Compiler::A_Comp_BranchImm()
{
int op = (CurInstr.Instr >> 24) & 1;
s32 offset = (s32)(CurInstr.Instr << 8) >> 6;
u32 target = R15 + offset;
bool link = op;
if (CurInstr.Cond() == 0xF) // BLX_imm
{
target += (op << 1) + 1;
link = true;
}
if (link)
MOVI2R(MapReg(14), R15 - 4);
Comp_JumpTo(target);
}
void Compiler::A_Comp_BranchXchangeReg()
{
ARM64Reg rn = MapReg(CurInstr.A_Reg(0));
MOV(W0, rn);
if ((CurInstr.Instr & 0xF0) == 0x30) // BLX_reg
MOVI2R(MapReg(14), R15 - 4);
Comp_JumpTo(W0, true);
}
void Compiler::T_Comp_BCOND()
{
u32 cond = (CurInstr.Instr >> 8) & 0xF;
FixupBranch skipExecute = CheckCondition(cond);
s32 offset = (s32)(CurInstr.Instr << 24) >> 23;
Comp_JumpTo(R15 + offset + 1, true);
Comp_BranchSpecialBehaviour(true);
FixupBranch skipFailed = B();
SetJumpTarget(skipExecute);
Comp_AddCycles_C(true);
Comp_BranchSpecialBehaviour(false);
SetJumpTarget(skipFailed);
}
void Compiler::T_Comp_B()
{
s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 20;
Comp_JumpTo(R15 + offset + 1);
}
void Compiler::T_Comp_BranchXchangeReg()
{
bool link = CurInstr.Instr & (1 << 7);
if (link)
{
if (Num == 1)
{
printf("BLX unsupported on ARM7!!!\n");
return;
}
MOV(W0, MapReg(CurInstr.A_Reg(3)));
MOVI2R(MapReg(14), R15 - 1);
Comp_JumpTo(W0, true);
}
else
{
ARM64Reg rn = MapReg(CurInstr.A_Reg(3));
Comp_JumpTo(rn, true);
}
}
void Compiler::T_Comp_BL_LONG_1()
{
s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 9;
MOVI2R(MapReg(14), R15 + offset);
Comp_AddCycles_C();
}
void Compiler::T_Comp_BL_LONG_2()
{
ARM64Reg lr = MapReg(14);
s32 offset = (CurInstr.Instr & 0x7FF) << 1;
ADD(W0, lr, offset);
MOVI2R(lr, (R15 - 2) | 1);
Comp_JumpTo(W0, Num == 0 && !(CurInstr.Instr & (1 << 12)));
}
void Compiler::T_Comp_BL_Merged()
{
Comp_AddCycles_C();
R15 += 2;
u32 upperPart = CurInstr.Instr >> 16;
u32 target = (R15 - 2) + ((s32)((CurInstr.Instr & 0x7FF) << 21) >> 9);
target += (upperPart & 0x7FF) << 1;
if (Num == 1 || upperPart & (1 << 12))
target |= 1;
MOVI2R(MapReg(14), (R15 - 2) | 1);
Comp_JumpTo(target);
}
}

View File

@ -0,0 +1,884 @@
#ifdef __SWITCH__
#include "../switch/compat_switch.h"
extern char __start__;
#else
#include <sys/mman.h>
#include <unistd.h>
#endif
#include "ARMJIT_Compiler.h"
#include "../ARMJIT_Internal.h"
#include "../ARMInterpreter.h"
#include "../Config.h"
#include <malloc.h>
using namespace Arm64Gen;
extern "C" void ARM_Ret();
namespace ARMJIT
{
/*
Recompiling classic ARM to ARMv8 code is at the same time
easier and trickier than compiling to a less related architecture
like x64. At one hand you can translate a lot of instructions directly.
But at the same time, there are a ton of exceptions, like for
example ADD and SUB can't have a RORed second operand on ARMv8.
While writing a JIT when an instruction is recompiled into multiple ones
not to write back until you've read all the other operands!
*/
template <>
const ARM64Reg RegisterCache<Compiler, ARM64Reg>::NativeRegAllocOrder[] =
{W19, W20, W21, W22, W23, W24, W25, W26};
template <>
const int RegisterCache<Compiler, ARM64Reg>::NativeRegsAvailable = 8;
const int JitMemSize = 16 * 1024 * 1024;
#ifndef __SWITCH__
u8 JitMem[JitMemSize];
#endif
void Compiler::MovePC()
{
ADD(MapReg(15), MapReg(15), Thumb ? 2 : 4);
}
void Compiler::A_Comp_MRS()
{
Comp_AddCycles_C();
ARM64Reg rd = MapReg(CurInstr.A_Reg(12));
if (CurInstr.Instr & (1 << 22))
{
ANDI2R(W5, RCPSR, 0x1F);
MOVI2R(W3, 0);
MOVI2R(W1, 15 - 8);
BL(ReadBanked);
MOV(rd, W3);
}
else
MOV(rd, RCPSR);
}
void Compiler::A_Comp_MSR()
{
Comp_AddCycles_C();
ARM64Reg val;
if (CurInstr.Instr & (1 << 25))
{
val = W0;
MOVI2R(val, ROR((CurInstr.Instr & 0xFF), ((CurInstr.Instr >> 7) & 0x1E)));
}
else
{
val = MapReg(CurInstr.A_Reg(0));
}
u32 mask = 0;
if (CurInstr.Instr & (1<<16)) mask |= 0x000000FF;
if (CurInstr.Instr & (1<<17)) mask |= 0x0000FF00;
if (CurInstr.Instr & (1<<18)) mask |= 0x00FF0000;
if (CurInstr.Instr & (1<<19)) mask |= 0xFF000000;
if (CurInstr.Instr & (1 << 22))
{
ANDI2R(W5, RCPSR, 0x1F);
MOVI2R(W3, 0);
MOVI2R(W1, 15 - 8);
BL(ReadBanked);
MOVI2R(W1, mask);
MOVI2R(W2, mask & 0xFFFFFF00);
ANDI2R(W5, RCPSR, 0x1F);
CMP(W5, 0x10);
CSEL(W1, W2, W1, CC_EQ);
BIC(W3, W3, W1);
AND(W0, val, W1);
ORR(W3, W3, W0);
MOVI2R(W1, 15 - 8);
BL(WriteBanked);
}
else
{
mask &= 0xFFFFFFDF;
CPSRDirty = true;
if ((mask & 0xFF) == 0)
{
ANDI2R(RCPSR, RCPSR, ~mask);
ANDI2R(W0, val, mask);
ORR(RCPSR, RCPSR, W0);
}
else
{
MOVI2R(W2, mask);
MOVI2R(W3, mask & 0xFFFFFF00);
ANDI2R(W1, RCPSR, 0x1F);
// W1 = first argument
CMP(W1, 0x10);
CSEL(W2, W3, W2, CC_EQ);
BIC(RCPSR, RCPSR, W2);
AND(W0, val, W2);
ORR(RCPSR, RCPSR, W0);
MOV(W2, RCPSR);
MOV(X0, RCPU);
PushRegs(true);
QuickCallFunction(X3, (void*)&ARM::UpdateMode);
PopRegs(true);
}
}
}
void Compiler::PushRegs(bool saveHiRegs)
{
if (saveHiRegs)
{
if (Thumb || CurInstr.Cond() == 0xE)
{
BitSet16 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00);
for (int reg : hiRegsLoaded)
RegCache.UnloadRegister(reg);
}
else
{
BitSet16 hiRegsDirty(RegCache.LoadedRegs & 0x7F00);
for (int reg : hiRegsDirty)
SaveReg(reg, RegCache.Mapping[reg]);
}
}
}
void Compiler::PopRegs(bool saveHiRegs)
{
if (saveHiRegs)
{
BitSet16 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00);
for (int reg : hiRegsLoaded)
LoadReg(reg, RegCache.Mapping[reg]);
}
}
Compiler::Compiler()
{
#ifdef __SWITCH__
JitRWBase = memalign(0x1000, JitMemSize);
JitRXStart = (u8*)&__start__ - JitMemSize - 0x1000;
JitRWStart = virtmemReserve(JitMemSize);
MemoryInfo info = {0};
u32 pageInfo = {0};
int i = 0;
while (JitRXStart != NULL)
{
svcQueryMemory(&info, &pageInfo, (u64)JitRXStart);
if (info.type != MemType_Unmapped)
JitRXStart = (void*)((u8*)info.addr - JitMemSize - 0x1000);
else
break;
if (i++ > 8)
{
printf("couldn't find unmapped place for jit memory\n");
JitRXStart = NULL;
}
}
assert(JitRXStart != NULL);
bool succeded = R_SUCCEEDED(svcMapProcessCodeMemory(envGetOwnProcessHandle(), (u64)JitRXStart, (u64)JitRWBase, JitMemSize));
assert(succeded);
succeded = R_SUCCEEDED(svcSetProcessMemoryPermission(envGetOwnProcessHandle(), (u64)JitRXStart, JitMemSize, Perm_Rx));
assert(succeded);
succeded = R_SUCCEEDED(svcMapProcessMemory(JitRWStart, envGetOwnProcessHandle(), (u64)JitRXStart, JitMemSize));
assert(succeded);
SetCodeBase((u8*)JitRWStart, (u8*)JitRXStart);
JitMemMainSize = JitMemSize;
#else
u64 pageSize = sysconf(_SC_PAGE_SIZE);
u8* pageAligned = (u8*)(((u64)JitMem & ~(pageSize - 1)) + pageSize);
u64 alignedSize = (((u64)JitMem + sizeof(JitMem)) & ~(pageSize - 1)) - (u64)pageAligned;
mprotect(pageAligned, alignedSize, PROT_EXEC | PROT_READ | PROT_WRITE);
SetCodeBase(pageAligned, pageAligned);
JitMemUseableSize = alignedSize;
#endif
SetCodePtr(0);
for (int i = 0; i < 3; i++)
{
JumpToFuncs9[i] = Gen_JumpTo9(i);
JumpToFuncs7[i] = Gen_JumpTo7(i);
}
/*
W5 - mode
W1 - reg num
W3 - in/out value of reg
*/
{
ReadBanked = GetRXPtr();
ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2));
CMP(W5, 0x11);
FixupBranch fiq = B(CC_EQ);
SUBS(W1, W1, 13 - 8);
ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2));
FixupBranch notEverything = B(CC_LT);
CMP(W5, 0x12);
FixupBranch irq = B(CC_EQ);
CMP(W5, 0x13);
FixupBranch svc = B(CC_EQ);
CMP(W5, 0x17);
FixupBranch abt = B(CC_EQ);
CMP(W5, 0x1B);
FixupBranch und = B(CC_EQ);
SetJumpTarget(notEverything);
RET();
SetJumpTarget(fiq);
LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_FIQ));
RET();
SetJumpTarget(irq);
LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_IRQ));
RET();
SetJumpTarget(svc);
LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_SVC));
RET();
SetJumpTarget(abt);
LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_ABT));
RET();
SetJumpTarget(und);
LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_UND));
RET();
}
{
WriteBanked = GetRXPtr();
ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2));
CMP(W5, 0x11);
FixupBranch fiq = B(CC_EQ);
SUBS(W1, W1, 13 - 8);
ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2));
FixupBranch notEverything = B(CC_LT);
CMP(W5, 0x12);
FixupBranch irq = B(CC_EQ);
CMP(W5, 0x13);
FixupBranch svc = B(CC_EQ);
CMP(W5, 0x17);
FixupBranch abt = B(CC_EQ);
CMP(W5, 0x1B);
FixupBranch und = B(CC_EQ);
SetJumpTarget(notEverything);
MOVI2R(W4, 0);
RET();
SetJumpTarget(fiq);
STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_FIQ));
MOVI2R(W4, 1);
RET();
SetJumpTarget(irq);
STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_IRQ));
MOVI2R(W4, 1);
RET();
SetJumpTarget(svc);
STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_SVC));
MOVI2R(W4, 1);
RET();
SetJumpTarget(abt);
STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_ABT));
MOVI2R(W4, 1);
RET();
SetJumpTarget(und);
STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_UND));
MOVI2R(W4, 1);
RET();
}
for (int num = 0; num < 2; num++)
{
for (int size = 0; size < 3; size++)
{
for (int reg = 0; reg < 8; reg++)
{
ARM64Reg rdMapped = (ARM64Reg)(W19 + reg);
PatchedStoreFuncs[num][size][reg] = GetRXPtr();
if (num == 0)
{
MOV(X1, RCPU);
MOV(W2, rdMapped);
}
else
{
MOV(W1, rdMapped);
}
ABI_PushRegisters({30});
switch ((8 << size) | num)
{
case 32: QuickCallFunction(X3, SlowWrite9<u32>); break;
case 33: QuickCallFunction(X3, SlowWrite7<u32>); break;
case 16: QuickCallFunction(X3, SlowWrite9<u16>); break;
case 17: QuickCallFunction(X3, SlowWrite7<u16>); break;
case 8: QuickCallFunction(X3, SlowWrite9<u8>); break;
case 9: QuickCallFunction(X3, SlowWrite7<u8>); break;
}
ABI_PopRegisters({30});
RET();
for (int signextend = 0; signextend < 2; signextend++)
{
PatchedLoadFuncs[num][size][signextend][reg] = GetRXPtr();
if (num == 0)
MOV(X1, RCPU);
ABI_PushRegisters({30});
switch ((8 << size) | num)
{
case 32: QuickCallFunction(X3, SlowRead9<u32>); break;
case 33: QuickCallFunction(X3, SlowRead7<u32>); break;
case 16: QuickCallFunction(X3, SlowRead9<u16>); break;
case 17: QuickCallFunction(X3, SlowRead7<u16>); break;
case 8: QuickCallFunction(X3, SlowRead9<u8>); break;
case 9: QuickCallFunction(X3, SlowRead7<u8>); break;
}
ABI_PopRegisters({30});
if (size == 32)
MOV(rdMapped, W0);
else if (signextend)
SBFX(rdMapped, W0, 0, 8 << size);
else
UBFX(rdMapped, W0, 0, 8 << size);
RET();
}
}
}
}
FlushIcache();
JitMemSecondarySize = 1024*1024*4;
JitMemMainSize -= GetCodeOffset();
JitMemMainSize -= JitMemSecondarySize;
SetCodeBase((u8*)GetRWPtr(), (u8*)GetRXPtr());
}
Compiler::~Compiler()
{
#ifdef __SWITCH__
if (JitRWStart != NULL)
{
bool succeded = R_SUCCEEDED(svcUnmapProcessMemory(JitRWStart, envGetOwnProcessHandle(), (u64)JitRXStart, JitMemSize));
assert(succeded);
virtmemFree(JitRWStart, JitMemSize);
succeded = R_SUCCEEDED(svcUnmapProcessCodeMemory(envGetOwnProcessHandle(), (u64)JitRXStart, (u64)JitRWBase, JitMemSize));
assert(succeded);
free(JitRWBase);
}
#endif
}
void Compiler::LoadCycles()
{
LDR(INDEX_UNSIGNED, RCycles, RCPU, offsetof(ARM, Cycles));
}
void Compiler::SaveCycles()
{
STR(INDEX_UNSIGNED, RCycles, RCPU, offsetof(ARM, Cycles));
}
void Compiler::LoadReg(int reg, ARM64Reg nativeReg)
{
if (reg == 15)
MOVI2R(nativeReg, R15);
else
LDR(INDEX_UNSIGNED, nativeReg, RCPU, offsetof(ARM, R[reg]));
}
void Compiler::SaveReg(int reg, ARM64Reg nativeReg)
{
STR(INDEX_UNSIGNED, nativeReg, RCPU, offsetof(ARM, R[reg]));
}
void Compiler::LoadCPSR()
{
assert(!CPSRDirty);
LDR(INDEX_UNSIGNED, RCPSR, RCPU, offsetof(ARM, CPSR));
}
void Compiler::SaveCPSR(bool markClean)
{
if (CPSRDirty)
{
STR(INDEX_UNSIGNED, RCPSR, RCPU, offsetof(ARM, CPSR));
CPSRDirty = CPSRDirty && !markClean;
}
}
FixupBranch Compiler::CheckCondition(u32 cond)
{
if (cond >= 0x8)
{
LSR(W1, RCPSR, 28);
MOVI2R(W2, 1);
LSLV(W2, W2, W1);
ANDI2R(W2, W2, ARM::ConditionTable[cond], W3);
return CBZ(W2);
}
else
{
u8 bit = (28 + ((~(cond >> 1) & 1) << 1 | (cond >> 2 & 1) ^ (cond >> 1 & 1)));
if (cond & 1)
return TBNZ(RCPSR, bit);
else
return TBZ(RCPSR, bit);
}
}
#define F(x) &Compiler::A_Comp_##x
const Compiler::CompileFunc A_Comp[ARMInstrInfo::ak_Count] =
{
// AND
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// EOR
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// SUB
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// RSB
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// ADD
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// ADC
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// SBC
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// RSC
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// ORR
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// MOV
F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp),
F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp),
// BIC
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
// MVN
F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp),
F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp),
// TST
F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp),
// TEQ
F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp),
// CMP
F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp),
// CMN
F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp),
// Mul
F(Mul), F(Mul), F(Mul_Long), F(Mul_Long), F(Mul_Long), F(Mul_Long), F(Mul_Short), F(Mul_Short), F(Mul_Short), F(Mul_Short), F(Mul_Short),
// ARMv5 exclusives
F(Clz), NULL, NULL, NULL, NULL,
// STR
F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB),
// STRB
F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB),
// LDR
F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB),
// LDRB
F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB),
// STRH
F(MemHD), F(MemHD), F(MemHD), F(MemHD),
// LDRD
NULL, NULL, NULL, NULL,
// STRD
NULL, NULL, NULL, NULL,
// LDRH
F(MemHD), F(MemHD), F(MemHD), F(MemHD),
// LDRSB
F(MemHD), F(MemHD), F(MemHD), F(MemHD),
// LDRSH
F(MemHD), F(MemHD), F(MemHD), F(MemHD),
// Swap
NULL, NULL,
// LDM, STM
F(LDM_STM), F(LDM_STM),
// Branch
F(BranchImm), F(BranchImm), F(BranchImm), F(BranchXchangeReg), F(BranchXchangeReg),
// Special
NULL, F(MSR), F(MSR), F(MRS), NULL, NULL, NULL,
&Compiler::Nop
};
#undef F
#define F(x) &Compiler::T_Comp_##x
const Compiler::CompileFunc T_Comp[ARMInstrInfo::tk_Count] =
{
// Shift imm
F(ShiftImm), F(ShiftImm), F(ShiftImm),
// Add/sub tri operand
F(AddSub_), F(AddSub_), F(AddSub_), F(AddSub_),
// 8 bit imm
F(ALUImm8), F(ALUImm8), F(ALUImm8), F(ALUImm8),
// ALU
F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU),
F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU),
// ALU hi reg
F(ALU_HiReg), F(ALU_HiReg), F(ALU_HiReg),
// PC/SP relative ops
F(RelAddr), F(RelAddr), F(AddSP),
// LDR PC rel
F(LoadPCRel),
// LDR/STR reg offset
F(MemReg), F(MemReg), F(MemReg), F(MemReg),
// LDR/STR sign extended, half
F(MemRegHalf), F(MemRegHalf), F(MemRegHalf), F(MemRegHalf),
// LDR/STR imm offset
F(MemImm), F(MemImm), F(MemImm), F(MemImm),
// LDR/STR half imm offset
F(MemImmHalf), F(MemImmHalf),
// LDR/STR sp rel
F(MemSPRel), F(MemSPRel),
// PUSH/POP
F(PUSH_POP), F(PUSH_POP),
// LDMIA, STMIA
F(LDMIA_STMIA), F(LDMIA_STMIA),
// Branch
F(BCOND), F(BranchXchangeReg), F(BranchXchangeReg), F(B), F(BL_LONG_1), F(BL_LONG_2),
// Unk, SVC
NULL, NULL,
F(BL_Merged)
};
bool Compiler::CanCompile(bool thumb, u16 kind)
{
return (thumb ? T_Comp[kind] : A_Comp[kind]) != NULL;
}
void Compiler::Comp_BranchSpecialBehaviour(bool taken)
{
if (taken && CurInstr.BranchFlags & branch_IdleBranch)
{
MOVI2R(W0, 1);
STRB(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, IdleLoop));
}
if ((CurInstr.BranchFlags & branch_FollowCondNotTaken && taken)
|| (CurInstr.BranchFlags & branch_FollowCondTaken && !taken))
{
RegCache.PrepareExit();
SUB(RCycles, RCycles, ConstantCycles);
QuickTailCall(X0, ARM_Ret);
}
}
JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount)
{
if (JitMemMainSize - GetCodeOffset() < 1024 * 16)
{
printf("JIT near memory full, resetting...\n");
ResetBlockCache();
}
if ((JitMemMainSize + JitMemSecondarySize) - OtherCodeRegion < 1024 * 8)
{
printf("JIT far memory full, resetting...\n");
ResetBlockCache();
}
JitBlockEntry res = (JitBlockEntry)GetRXPtr();
Thumb = thumb;
Num = cpu->Num;
CurCPU = cpu;
ConstantCycles = 0;
RegCache = RegisterCache<Compiler, ARM64Reg>(this, instrs, instrsCount, true);
CPSRDirty = false;
for (int i = 0; i < instrsCount; i++)
{
CurInstr = instrs[i];
R15 = CurInstr.Addr + (Thumb ? 4 : 8);
CodeRegion = R15 >> 24;
CompileFunc comp = Thumb
? T_Comp[CurInstr.Info.Kind]
: A_Comp[CurInstr.Info.Kind];
Exit = i == (instrsCount - 1) || (CurInstr.BranchFlags & branch_FollowCondNotTaken);
//printf("%x instr %x regs: r%x w%x n%x flags: %x %x %x\n", R15, CurInstr.Instr, CurInstr.Info.SrcRegs, CurInstr.Info.DstRegs, CurInstr.Info.ReadFlags, CurInstr.Info.NotStrictlyNeeded, CurInstr.Info.WriteFlags, CurInstr.SetFlags);
bool isConditional = Thumb ? CurInstr.Info.Kind == ARMInstrInfo::tk_BCOND : CurInstr.Cond() < 0xE;
if (comp == NULL || (CurInstr.BranchFlags & branch_FollowCondTaken) || (i == instrsCount - 1 && (!CurInstr.Info.Branches() || isConditional)))
{
MOVI2R(W0, R15);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, R[15]));
if (comp == NULL)
{
MOVI2R(W0, CurInstr.Instr);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CurInstr));
}
if (Num == 0)
{
MOVI2R(W0, (s32)CurInstr.CodeCycles);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CodeCycles));
}
}
if (comp == NULL)
{
SaveCycles();
SaveCPSR();
RegCache.Flush();
}
else
RegCache.Prepare(Thumb, i);
if (Thumb)
{
if (comp == NULL)
{
MOV(X0, RCPU);
QuickCallFunction(X1, InterpretTHUMB[CurInstr.Info.Kind]);
}
else
(this->*comp)();
}
else
{
u32 cond = CurInstr.Cond();
if (CurInstr.Info.Kind == ARMInstrInfo::ak_BLX_IMM)
{
if (comp)
(this->*comp)();
else
{
MOV(X0, RCPU);
QuickCallFunction(X1, ARMInterpreter::A_BLX_IMM);
}
}
else if (cond == 0xF)
Comp_AddCycles_C();
else
{
IrregularCycles = false;
FixupBranch skipExecute;
if (cond < 0xE)
skipExecute = CheckCondition(cond);
if (comp == NULL)
{
MOV(X0, RCPU);
QuickCallFunction(X1, InterpretARM[CurInstr.Info.Kind]);
}
else
{
(this->*comp)();
}
Comp_BranchSpecialBehaviour(true);
if (cond < 0xE)
{
if (IrregularCycles || (CurInstr.BranchFlags & branch_FollowCondTaken))
{
FixupBranch skipNop = B();
SetJumpTarget(skipExecute);
Comp_AddCycles_C();
Comp_BranchSpecialBehaviour(false);
SetJumpTarget(skipNop);
}
else
SetJumpTarget(skipExecute);
}
}
}
if (comp == NULL)
{
LoadCycles();
LoadCPSR();
}
}
RegCache.Flush();
SUB(RCycles, RCycles, ConstantCycles);
QuickTailCall(X0, ARM_Ret);
FlushIcache();
return res;
}
void Compiler::Reset()
{
LoadStorePatches.clear();
SetCodePtr(0);
OtherCodeRegion = JitMemMainSize;
const u32 brk_0 = 0xD4200000;
for (int i = 0; i < (JitMemMainSize + JitMemSecondarySize) / 4; i++)
*(((u32*)GetRWPtr()) + i) = brk_0;
}
void Compiler::Comp_AddCycles_C(bool forceNonConstant)
{
s32 cycles = Num ?
NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 1 : 3]
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles);
if (forceNonConstant)
ConstantCycles += cycles;
else
SUB(RCycles, RCycles, cycles);
}
void Compiler::Comp_AddCycles_CI(u32 numI)
{
IrregularCycles = true;
s32 cycles = (Num ?
NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + numI;
if (Thumb || CurInstr.Cond() == 0xE)
ConstantCycles += cycles;
else
SUB(RCycles, RCycles, cycles);
}
void Compiler::Comp_AddCycles_CI(u32 c, ARM64Reg numI, ArithOption shift)
{
IrregularCycles = true;
s32 cycles = (Num ?
NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + c;
SUB(RCycles, RCycles, cycles);
if (Thumb || CurInstr.Cond() >= 0xE)
ConstantCycles += cycles;
else
SUB(RCycles, RCycles, cycles);
}
void Compiler::Comp_AddCycles_CDI()
{
if (Num == 0)
Comp_AddCycles_CD();
else
{
IrregularCycles = true;
s32 cycles;
s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
s32 numD = CurInstr.DataCycles;
if ((CurInstr.DataRegion >> 24) == 0x02) // mainRAM
{
if (CodeRegion == 0x02)
cycles = numC + numD;
else
{
numC++;
cycles = std::max(numC + numD - 3, std::max(numC, numD));
}
}
else if (CodeRegion == 0x02)
{
numD++;
cycles = std::max(numC + numD - 3, std::max(numC, numD));
}
else
{
cycles = numC + numD + 1;
}
if (!Thumb && CurInstr.Cond() < 0xE)
SUB(RCycles, RCycles, cycles);
else
ConstantCycles += cycles;
}
}
void Compiler::Comp_AddCycles_CD()
{
u32 cycles = 0;
if (Num == 0)
{
s32 numC = (R15 & 0x2) ? 0 : CurInstr.CodeCycles;
s32 numD = CurInstr.DataCycles;
//if (DataRegion != CodeRegion)
cycles = std::max(numC + numD - 6, std::max(numC, numD));
IrregularCycles = cycles != numC;
}
else
{
s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
s32 numD = CurInstr.DataCycles;
if ((CurInstr.DataRegion >> 24) == 0x02)
{
if (CodeRegion == 0x02)
cycles += numC + numD;
else
cycles += std::max(numC + numD - 3, std::max(numC, numD));
}
else if (CodeRegion == 0x02)
{
cycles += std::max(numC + numD - 3, std::max(numC, numD));
}
else
{
cycles += numC + numD;
}
IrregularCycles = true;
}
if ((!Thumb && CurInstr.Cond() < 0xE) && IrregularCycles)
SUB(RCycles, RCycles, cycles);
else
ConstantCycles += cycles;
}
}

View File

@ -0,0 +1,269 @@
#ifndef ARMJIT_COMPILER_H
#define ARMJIT_COMPILER_H
#include "../ARM.h"
#include "../ARMJIT.h"
#include "../dolphin/Arm64Emitter.h"
#include "../ARMJIT_Internal.h"
#include "../ARMJIT_RegisterCache.h"
#include <unordered_map>
namespace ARMJIT
{
const Arm64Gen::ARM64Reg RCPSR = Arm64Gen::W27;
const Arm64Gen::ARM64Reg RCycles = Arm64Gen::W28;
const Arm64Gen::ARM64Reg RCPU = Arm64Gen::X29;
struct Op2
{
Op2()
{}
Op2(Arm64Gen::ARM64Reg rm) : IsImm(false)
{
Reg.Rm = rm;
Reg.ShiftType = Arm64Gen::ST_LSL;
Reg.ShiftAmount = 0;
}
Op2(u32 imm) : IsImm(true), Imm(imm)
{}
Op2(Arm64Gen::ARM64Reg rm, Arm64Gen::ShiftType st, int amount) : IsImm(false)
{
Reg.Rm = rm;
Reg.ShiftType = st;
Reg.ShiftAmount = amount;
}
Arm64Gen::ArithOption ToArithOption()
{
assert(!IsImm);
return Arm64Gen::ArithOption(Reg.Rm, Reg.ShiftType, Reg.ShiftAmount);
}
bool IsSimpleReg()
{ return !IsImm && !Reg.ShiftAmount && Reg.ShiftType == Arm64Gen::ST_LSL; }
bool ImmFits12Bit()
{ return IsImm && (Imm & 0xFFF == Imm); }
bool IsZero()
{ return IsImm && !Imm; }
bool IsImm;
union
{
struct
{
Arm64Gen::ARM64Reg Rm;
Arm64Gen::ShiftType ShiftType;
int ShiftAmount;
} Reg;
u32 Imm;
};
};
struct LoadStorePatch
{
void* PatchFunc;
s32 PatchOffset;
u32 PatchSize;
};
class Compiler : public Arm64Gen::ARM64XEmitter
{
public:
typedef void (Compiler::*CompileFunc)();
Compiler();
~Compiler();
void PushRegs(bool saveHiRegs);
void PopRegs(bool saveHiRegs);
Arm64Gen::ARM64Reg MapReg(int reg)
{
assert(RegCache.Mapping[reg] != Arm64Gen::INVALID_REG);
return RegCache.Mapping[reg];
}
JitBlockEntry CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount);
bool CanCompile(bool thumb, u16 kind);
bool FlagsNZNeeded()
{
return CurInstr.SetFlags & 0xC;
}
void Reset();
void Comp_AddCycles_C(bool forceNonConstant = false);
void Comp_AddCycles_CI(u32 numI);
void Comp_AddCycles_CI(u32 c, Arm64Gen::ARM64Reg numI, Arm64Gen::ArithOption shift);
void Comp_AddCycles_CD();
void Comp_AddCycles_CDI();
void MovePC();
void LoadReg(int reg, Arm64Gen::ARM64Reg nativeReg);
void SaveReg(int reg, Arm64Gen::ARM64Reg nativeReg);
void LoadCPSR();
void SaveCPSR(bool markClean = true);
void LoadCycles();
void SaveCycles();
void Nop() {}
void A_Comp_ALUTriOp();
void A_Comp_ALUMovOp();
void A_Comp_ALUCmpOp();
void A_Comp_Mul();
void A_Comp_Mul_Long();
void A_Comp_Mul_Short();
void A_Comp_Clz();
void A_Comp_MemWB();
void A_Comp_MemHD();
void A_Comp_LDM_STM();
void A_Comp_BranchImm();
void A_Comp_BranchXchangeReg();
void A_Comp_MRS();
void A_Comp_MSR();
void T_Comp_ShiftImm();
void T_Comp_AddSub_();
void T_Comp_ALUImm8();
void T_Comp_ALU();
void T_Comp_ALU_HiReg();
void T_Comp_AddSP();
void T_Comp_RelAddr();
void T_Comp_MemReg();
void T_Comp_MemImm();
void T_Comp_MemRegHalf();
void T_Comp_MemImmHalf();
void T_Comp_LoadPCRel();
void T_Comp_MemSPRel();
void T_Comp_LDMIA_STMIA();
void T_Comp_PUSH_POP();
void T_Comp_BCOND();
void T_Comp_B();
void T_Comp_BranchXchangeReg();
void T_Comp_BL_LONG_1();
void T_Comp_BL_LONG_2();
void T_Comp_BL_Merged();
s32 Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode);
void Comp_Mul_Mla(bool S, bool mla, Arm64Gen::ARM64Reg rd, Arm64Gen::ARM64Reg rm, Arm64Gen::ARM64Reg rs, Arm64Gen::ARM64Reg rn);
void Comp_Compare(int op, Arm64Gen::ARM64Reg rn, Op2 op2);
void Comp_Logical(int op, bool S, Arm64Gen::ARM64Reg rd, Arm64Gen::ARM64Reg rn, Op2 op2);
void Comp_Arithmetic(int op, bool S, Arm64Gen::ARM64Reg rd, Arm64Gen::ARM64Reg rn, Op2 op2);
void Comp_RetriveFlags(bool retriveCV);
Arm64Gen::FixupBranch CheckCondition(u32 cond);
void Comp_JumpTo(Arm64Gen::ARM64Reg addr, bool switchThumb, bool restoreCPSR = false);
void Comp_JumpTo(u32 addr, bool forceNonConstantCycles = false);
void A_Comp_GetOp2(bool S, Op2& op2);
void Comp_RegShiftImm(int op, int amount, bool S, Op2& op2, Arm64Gen::ARM64Reg tmp = Arm64Gen::W0);
void Comp_RegShiftReg(int op, bool S, Op2& op2, Arm64Gen::ARM64Reg rs);
bool Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr);
enum
{
memop_Writeback = 1 << 0,
memop_Post = 1 << 1,
memop_SignExtend = 1 << 2,
memop_Store = 1 << 3,
memop_SubtractOffset = 1 << 4
};
void Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags);
// 0 = switch mode, 1 = stay arm, 2 = stay thumb
void* Gen_JumpTo9(int kind);
void* Gen_JumpTo7(int kind);
void Comp_BranchSpecialBehaviour(bool taken);
JitBlockEntry AddEntryOffset(u32 offset)
{
return (JitBlockEntry)(GetRXBase() + offset);
}
u32 SubEntryOffset(JitBlockEntry entry)
{
return (u8*)entry - GetRXBase();
}
bool IsJITFault(u64 pc);
s64 RewriteMemAccess(u64 pc);
void SwapCodeRegion()
{
ptrdiff_t offset = GetCodeOffset();
SetCodePtrUnsafe(OtherCodeRegion);
OtherCodeRegion = offset;
}
ptrdiff_t OtherCodeRegion;
bool Exit;
FetchedInstr CurInstr;
bool Thumb;
u32 R15;
u32 Num;
ARM* CurCPU;
u32 ConstantCycles;
u32 CodeRegion;
BitSet32 SavedRegs;
u32 JitMemSecondarySize;
u32 JitMemMainSize;
void* ReadBanked, *WriteBanked;
void* JumpToFuncs9[3];
void* JumpToFuncs7[3];
std::unordered_map<ptrdiff_t, LoadStorePatch> LoadStorePatches;
// [Num][Size][Sign Extend][Output register]
void* PatchedLoadFuncs[2][3][2][8];
void* PatchedStoreFuncs[2][3][8];
RegisterCache<Compiler, Arm64Gen::ARM64Reg> RegCache;
bool CPSRDirty = false;
bool IrregularCycles = false;
#ifdef __SWITCH__
void* JitRWBase;
void* JitRWStart;
void* JitRXStart;
#endif
};
}
#endif

View File

@ -0,0 +1,68 @@
#include "../ARMJIT_x64/ARMJIT_Offsets.h"
.text
#define RCPSR W27
#define RCycles W28
#define RCPU X29
.p2align 4,,15
.global ARM_Dispatch
ARM_Dispatch:
stp x19, x20, [sp, #-96]!
stp x21, x22, [sp, #16]
stp x23, x24, [sp, #32]
stp x25, x26, [sp, #48]
stp x27, x28, [sp, #64]
stp x29, x30, [sp, #80]
mov RCPU, x0
ldr RCycles, [RCPU, ARM_Cycles_offset]
ldr RCPSR, [RCPU, ARM_CPSR_offset]
br x1
.p2align 4,,15
.global ARM_Ret
ARM_Ret:
str RCycles, [RCPU, ARM_Cycles_offset]
str RCPSR, [RCPU, ARM_CPSR_offset]
ldp x29, x30, [sp, #80]
ldp x27, x28, [sp, #64]
ldp x25, x26, [sp, #48]
ldp x23, x24, [sp, #32]
ldp x21, x22, [sp, #16]
ldp x19, x20, [sp], #96
ret
.p2align 4,,15
.global ARM_RestoreContext
ARM_RestoreContext:
mov sp, x0
ldp x0, x1, [sp]
ldp x2, x3, [sp, #16]
ldp x4, x5, [sp, #32]
ldp x6, x7, [sp, #48]
ldp x8, x9, [sp, #64]
ldp x10, x11, [sp, #80]
ldp x12, x13, [sp, #96]
ldp x14, x15, [sp, #112]
ldp x16, x17, [sp, #128]
ldp x18, x19, [sp, #144]
ldp x20, x21, [sp, #160]
ldp x22, x23, [sp, #176]
ldp x24, x25, [sp, #192]
ldp x26, x27, [sp, #208]
ldp x28, x29, [sp, #224]
ldr x30, [sp, #240]
ldp x17, x18, [sp, #248]
mov sp, x17
br x18

View File

@ -0,0 +1,794 @@
#include "ARMJIT_Compiler.h"
#include "../Config.h"
#include "../ARMJIT_Memory.h"
using namespace Arm64Gen;
namespace ARMJIT
{
bool Compiler::IsJITFault(u64 pc)
{
return pc >= (u64)GetRXBase() && pc - (u64)GetRXBase() < (JitMemMainSize + JitMemSecondarySize);
}
s64 Compiler::RewriteMemAccess(u64 pc)
{
ptrdiff_t pcOffset = pc - (u64)GetRXBase();
auto it = LoadStorePatches.find(pcOffset);
if (it != LoadStorePatches.end())
{
LoadStorePatch patch = it->second;
ptrdiff_t curCodeOffset = GetCodeOffset();
SetCodePtrUnsafe(pcOffset + patch.PatchOffset);
BL(patch.PatchFunc);
for (int i = 0; i < patch.PatchSize / 4 - 1; i++)
HINT(HINT_NOP);
FlushIcacheSection((u8*)pc + patch.PatchOffset, (u8*)GetRXPtr());
SetCodePtrUnsafe(curCodeOffset);
LoadStorePatches.erase(it);
return patch.PatchOffset;
}
printf("this is a JIT bug! %08x\n", __builtin_bswap32(*(u32*)pc));
assert(false);
}
bool Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr)
{
u32 localAddr = LocaliseCodeAddress(Num, addr);
int invalidLiteralIdx = InvalidLiterals.Find(localAddr);
if (invalidLiteralIdx != -1)
{
InvalidLiterals.Remove(invalidLiteralIdx);
return false;
}
Comp_AddCycles_CDI();
u32 val;
// make sure arm7 bios is accessible
u32 tmpR15 = CurCPU->R[15];
CurCPU->R[15] = R15;
if (size == 32)
{
CurCPU->DataRead32(addr & ~0x3, &val);
val = ROR(val, (addr & 0x3) << 3);
}
else if (size == 16)
{
CurCPU->DataRead16(addr & ~0x1, &val);
if (signExtend)
val = ((s32)val << 16) >> 16;
}
else
{
CurCPU->DataRead8(addr, &val);
if (signExtend)
val = ((s32)val << 24) >> 24;
}
CurCPU->R[15] = tmpR15;
MOVI2R(MapReg(rd), val);
if (Thumb || CurInstr.Cond() == 0xE)
RegCache.PutLiteral(rd, val);
return true;
}
void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags)
{
u32 addressMask = ~0;
if (size == 32)
addressMask = ~3;
if (size == 16)
addressMask = ~1;
if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && offset.IsImm && !(flags & (memop_Post|memop_Store|memop_Writeback)))
{
u32 addr = R15 + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
if (Comp_MemLoadLiteral(size, flags & memop_SignExtend, rd, addr))
return;
}
if (flags & memop_Store)
Comp_AddCycles_CD();
else
Comp_AddCycles_CDI();
ARM64Reg rdMapped = MapReg(rd);
ARM64Reg rnMapped = MapReg(rn);
if (Thumb && rn == 15)
{
ANDI2R(W3, rnMapped, ~2);
rnMapped = W3;
}
ARM64Reg finalAddr = W0;
if (flags & memop_Post)
{
finalAddr = rnMapped;
MOV(W0, rnMapped);
}
bool addrIsStatic = Config::JIT_LiteralOptimisations
&& RegCache.IsLiteral(rn) && offset.IsImm && !(flags & (memop_Writeback|memop_Post));
u32 staticAddress;
if (addrIsStatic)
staticAddress = RegCache.LiteralValues[rn] + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
if (!offset.IsImm)
Comp_RegShiftImm(offset.Reg.ShiftType, offset.Reg.ShiftAmount, false, offset, W2);
// offset might has become an immediate
if (offset.IsImm)
{
if (offset.Imm)
{
if (flags & memop_SubtractOffset)
SUB(finalAddr, rnMapped, offset.Imm);
else
ADD(finalAddr, rnMapped, offset.Imm);
}
else if (finalAddr != rnMapped)
MOV(finalAddr, rnMapped);
}
else
{
if (offset.Reg.ShiftType == ST_ROR)
{
ROR_(W0, offset.Reg.Rm, offset.Reg.ShiftAmount);
offset = Op2(W0);
}
if (flags & memop_SubtractOffset)
SUB(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption());
else
ADD(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption());
}
if (!(flags & memop_Post) && (flags & memop_Writeback))
MOV(rnMapped, W0);
u32 expectedTarget = Num == 0
? ARMJIT_Memory::ClassifyAddress9(addrIsStatic ? staticAddress : CurInstr.DataRegion)
: ARMJIT_Memory::ClassifyAddress7(addrIsStatic ? staticAddress : CurInstr.DataRegion);
if (Config::JIT_FastMemory && ((!Thumb && CurInstr.Cond() != 0xE) || ARMJIT_Memory::IsFastmemCompatible(expectedTarget)))
{
ptrdiff_t memopStart = GetCodeOffset();
LoadStorePatch patch;
patch.PatchFunc = flags & memop_Store
? PatchedStoreFuncs[Num][__builtin_ctz(size) - 3][rdMapped - W19]
: PatchedLoadFuncs[Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped - W19];
assert(rdMapped - W19 >= 0 && rdMapped - W19 < 8);
MOVP2R(X7, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
// take a chance at fastmem
if (size > 8)
ANDI2R(W1, W0, addressMask);
ptrdiff_t loadStorePosition = GetCodeOffset();
if (flags & memop_Store)
{
STRGeneric(size, rdMapped, size > 8 ? X1 : X0, X7);
}
else
{
LDRGeneric(size, flags & memop_SignExtend, rdMapped, size > 8 ? X1 : X0, X7);
if (size == 32)
{
UBFIZ(W0, W0, 3, 2);
RORV(rdMapped, rdMapped, W0);
}
}
patch.PatchOffset = memopStart - loadStorePosition;
patch.PatchSize = GetCodeOffset() - memopStart;
LoadStorePatches[loadStorePosition] = patch;
}
else
{
void* func = NULL;
if (addrIsStatic)
func = ARMJIT_Memory::GetFuncForAddr(CurCPU, staticAddress, flags & memop_Store, size);
if (func)
{
if (flags & memop_Store)
MOV(W1, rdMapped);
QuickCallFunction(X2, (void (*)())func);
if (!(flags & memop_Store))
{
if (size == 32)
{
if (staticAddress & 0x3)
ROR_(rdMapped, W0, (staticAddress & 0x3) << 3);
else
MOV(rdMapped, W0);
}
else
{
if (flags & memop_SignExtend)
SBFX(rdMapped, W0, 0, size);
else
UBFX(rdMapped, W0, 0, size);
}
}
}
else
{
if (Num == 0)
{
MOV(X1, RCPU);
if (flags & memop_Store)
{
MOV(W2, rdMapped);
switch (size)
{
case 32: QuickCallFunction(X3, SlowWrite9<u32>); break;
case 16: QuickCallFunction(X3, SlowWrite9<u16>); break;
case 8: QuickCallFunction(X3, SlowWrite9<u8>); break;
}
}
else
{
switch (size)
{
case 32: QuickCallFunction(X3, SlowRead9<u32>); break;
case 16: QuickCallFunction(X3, SlowRead9<u16>); break;
case 8: QuickCallFunction(X3, SlowRead9<u8>); break;
}
}
}
else
{
if (flags & memop_Store)
{
MOV(W1, rdMapped);
switch (size)
{
case 32: QuickCallFunction(X3, SlowWrite7<u32>); break;
case 16: QuickCallFunction(X3, SlowWrite7<u16>); break;
case 8: QuickCallFunction(X3, SlowWrite7<u8>); break;
}
}
else
{
switch (size)
{
case 32: QuickCallFunction(X3, SlowRead7<u32>); break;
case 16: QuickCallFunction(X3, SlowRead7<u16>); break;
case 8: QuickCallFunction(X3, SlowRead7<u8>); break;
}
}
}
if (!(flags & memop_Store))
{
if (size == 32)
MOV(rdMapped, W0);
else if (flags & memop_SignExtend)
SBFX(rdMapped, W0, 0, size);
else
UBFX(rdMapped, W0, 0, size);
}
}
}
if (CurInstr.Info.Branches())
{
if (size < 32)
printf("LDR size < 32 branching?\n");
Comp_JumpTo(rdMapped, Num == 0, false);
}
}
void Compiler::A_Comp_MemWB()
{
Op2 offset;
if (CurInstr.Instr & (1 << 25))
offset = Op2(MapReg(CurInstr.A_Reg(0)), (ShiftType)((CurInstr.Instr >> 5) & 0x3), (CurInstr.Instr >> 7) & 0x1F);
else
offset = Op2(CurInstr.Instr & 0xFFF);
bool load = CurInstr.Instr & (1 << 20);
bool byte = CurInstr.Instr & (1 << 22);
int flags = 0;
if (!load)
flags |= memop_Store;
if (!(CurInstr.Instr & (1 << 24)))
flags |= memop_Post;
if (CurInstr.Instr & (1 << 21))
flags |= memop_Writeback;
if (!(CurInstr.Instr & (1 << 23)))
flags |= memop_SubtractOffset;
Comp_MemAccess(CurInstr.A_Reg(12), CurInstr.A_Reg(16), offset, byte ? 8 : 32, flags);
}
void Compiler::A_Comp_MemHD()
{
bool load = CurInstr.Instr & (1 << 20);
bool signExtend;
int op = (CurInstr.Instr >> 5) & 0x3;
int size;
if (load)
{
signExtend = op >= 2;
size = op == 2 ? 8 : 16;
}
else
{
size = 16;
signExtend = false;
}
Op2 offset;
if (CurInstr.Instr & (1 << 22))
offset = Op2((CurInstr.Instr & 0xF) | ((CurInstr.Instr >> 4) & 0xF0));
else
offset = Op2(MapReg(CurInstr.A_Reg(0)));
int flags = 0;
if (signExtend)
flags |= memop_SignExtend;
if (!load)
flags |= memop_Store;
if (!(CurInstr.Instr & (1 << 24)))
flags |= memop_Post;
if (!(CurInstr.Instr & (1 << 23)))
flags |= memop_SubtractOffset;
if (CurInstr.Instr & (1 << 21))
flags |= memop_Writeback;
Comp_MemAccess(CurInstr.A_Reg(12), CurInstr.A_Reg(16), offset, size, flags);
}
void Compiler::T_Comp_MemReg()
{
int op = (CurInstr.Instr >> 10) & 0x3;
bool load = op & 0x2;
bool byte = op & 0x1;
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3),
Op2(MapReg(CurInstr.T_Reg(6))), byte ? 8 : 32, load ? 0 : memop_Store);
}
void Compiler::T_Comp_MemImm()
{
int op = (CurInstr.Instr >> 11) & 0x3;
bool load = op & 0x1;
bool byte = op & 0x2;
u32 offset = ((CurInstr.Instr >> 6) & 0x1F) * (byte ? 1 : 4);
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(offset),
byte ? 8 : 32, load ? 0 : memop_Store);
}
void Compiler::T_Comp_MemRegHalf()
{
int op = (CurInstr.Instr >> 10) & 0x3;
bool load = op != 0;
int size = op != 1 ? 16 : 8;
bool signExtend = op & 1;
int flags = 0;
if (signExtend)
flags |= memop_SignExtend;
if (!load)
flags |= memop_Store;
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(MapReg(CurInstr.T_Reg(6))),
size, flags);
}
void Compiler::T_Comp_MemImmHalf()
{
u32 offset = (CurInstr.Instr >> 5) & 0x3E;
bool load = CurInstr.Instr & (1 << 11);
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(offset), 16,
load ? 0 : memop_Store);
}
void Compiler::T_Comp_LoadPCRel()
{
u32 offset = ((CurInstr.Instr & 0xFF) << 2);
u32 addr = (R15 & ~0x2) + offset;
if (!Config::JIT_LiteralOptimisations || !Comp_MemLoadLiteral(32, false, CurInstr.T_Reg(8), addr))
Comp_MemAccess(CurInstr.T_Reg(8), 15, Op2(offset), 32, 0);
}
void Compiler::T_Comp_MemSPRel()
{
u32 offset = (CurInstr.Instr & 0xFF) * 4;
bool load = CurInstr.Instr & (1 << 11);
Comp_MemAccess(CurInstr.T_Reg(8), 13, Op2(offset), 32, load ? 0 : memop_Store);
}
s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode)
{
IrregularCycles = true;
int regsCount = regs.Count();
if (regsCount == 0)
return 0; // actually not the right behaviour TODO: fix me
if (regsCount == 1 && !usermode && RegCache.LoadedRegs & (1 << *regs.begin()))
{
int flags = 0;
if (store)
flags |= memop_Store;
if (decrement)
flags |= memop_SubtractOffset;
Op2 offset = preinc ? Op2(4) : Op2(0);
Comp_MemAccess(*regs.begin(), rn, offset, 32, flags);
return decrement ? -4 : 4;
}
if (store)
Comp_AddCycles_CD();
else
Comp_AddCycles_CDI();
int expectedTarget = Num == 0
? ARMJIT_Memory::ClassifyAddress9(CurInstr.DataRegion)
: ARMJIT_Memory::ClassifyAddress7(CurInstr.DataRegion);
bool compileFastPath = Config::JIT_FastMemory
&& store && !usermode && (CurInstr.Cond() < 0xE || ARMJIT_Memory::IsFastmemCompatible(expectedTarget));
if (decrement)
{
SUB(W0, MapReg(rn), regsCount * 4);
ANDI2R(W0, W0, ~3);
preinc ^= true;
}
else
{
ANDI2R(W0, MapReg(rn), ~3);
}
LoadStorePatch patch;
if (compileFastPath)
{
ptrdiff_t fastPathStart = GetCodeOffset();
ptrdiff_t firstLoadStoreOffset;
bool firstLoadStore = true;
MOVP2R(X1, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
ADD(X1, X1, X0);
u32 offset = preinc ? 4 : 0;
BitSet16::Iterator it = regs.begin();
if (regsCount & 1)
{
int reg = *it;
it++;
ARM64Reg first = W3;
if (RegCache.LoadedRegs & (1 << reg))
first = MapReg(reg);
else if (store)
LoadReg(reg, first);
if (firstLoadStore)
{
firstLoadStoreOffset = GetCodeOffset();
firstLoadStore = false;
}
if (store)
STR(INDEX_UNSIGNED, first, X1, offset);
else
LDR(INDEX_UNSIGNED, first, X1, offset);
if (!(RegCache.LoadedRegs & (1 << reg)) && !store)
SaveReg(reg, first);
offset += 4;
}
while (it != regs.end())
{
int reg = *it;
it++;
int nextReg = *it;
it++;
ARM64Reg first = W3, second = W4;
if (RegCache.LoadedRegs & (1 << reg))
first = MapReg(reg);
else if (store)
LoadReg(reg, first);
if (RegCache.LoadedRegs & (1 << nextReg))
second = MapReg(nextReg);
else if (store)
LoadReg(nextReg, second);
if (firstLoadStore)
{
firstLoadStoreOffset = GetCodeOffset();
firstLoadStore = false;
}
if (store)
STP(INDEX_SIGNED, first, second, X1, offset);
else
LDP(INDEX_SIGNED, first, second, X1, offset);
if (!(RegCache.LoadedRegs & (1 << reg)) && !store)
SaveReg(reg, first);
if (!(RegCache.LoadedRegs & (1 << nextReg)) && !store)
SaveReg(nextReg, second);
offset += 8;
}
patch.PatchSize = GetCodeOffset() - fastPathStart;
patch.PatchOffset = fastPathStart - firstLoadStoreOffset;
SwapCodeRegion();
patch.PatchFunc = GetRXPtr();
LoadStorePatches[firstLoadStoreOffset] = patch;
ABI_PushRegisters({30});
}
int i = 0;
SUB(SP, SP, ((regsCount + 1) & ~1) * 8);
if (store)
{
if (usermode && (regs & BitSet16(0x7f00)))
UBFX(W5, RCPSR, 0, 5);
BitSet16::Iterator it = regs.begin();
while (it != regs.end())
{
BitSet16::Iterator nextReg = it;
nextReg++;
int reg = *it;
if (usermode && reg >= 8 && reg < 15)
{
if (RegCache.LoadedRegs & (1 << reg))
MOV(W3, MapReg(reg));
else
LoadReg(reg, W3);
MOVI2R(W1, reg - 8);
BL(ReadBanked);
STR(INDEX_UNSIGNED, W3, SP, i * 8);
}
else if (!usermode && nextReg != regs.end())
{
ARM64Reg first = W3, second = W4;
if (RegCache.LoadedRegs & (1 << reg))
first = MapReg(reg);
else
LoadReg(reg, W3);
if (RegCache.LoadedRegs & (1 << *nextReg))
second = MapReg(*nextReg);
else
LoadReg(*nextReg, W4);
STP(INDEX_SIGNED, EncodeRegTo64(first), EncodeRegTo64(second), SP, i * 8);
i++;
it++;
}
else if (RegCache.LoadedRegs & (1 << reg))
{
STR(INDEX_UNSIGNED, MapReg(reg), SP, i * 8);
}
else
{
LoadReg(reg, W3);
STR(INDEX_UNSIGNED, W3, SP, i * 8);
}
i++;
it++;
}
}
ADD(X1, SP, 0);
MOVI2R(W2, regsCount);
if (Num == 0)
{
MOV(X3, RCPU);
switch (preinc * 2 | store)
{
case 0: QuickCallFunction(X4, SlowBlockTransfer9<false, false>); break;
case 1: QuickCallFunction(X4, SlowBlockTransfer9<false, true>); break;
case 2: QuickCallFunction(X4, SlowBlockTransfer9<true, false>); break;
case 3: QuickCallFunction(X4, SlowBlockTransfer9<true, true>); break;
}
}
else
{
switch (preinc * 2 | store)
{
case 0: QuickCallFunction(X4, SlowBlockTransfer7<false, false>); break;
case 1: QuickCallFunction(X4, SlowBlockTransfer7<false, true>); break;
case 2: QuickCallFunction(X4, SlowBlockTransfer7<true, false>); break;
case 3: QuickCallFunction(X4, SlowBlockTransfer7<true, true>); break;
}
}
if (!store)
{
if (usermode && !regs[15] && (regs & BitSet16(0x7f00)))
UBFX(W5, RCPSR, 0, 5);
BitSet16::Iterator it = regs.begin();
while (it != regs.end())
{
BitSet16::Iterator nextReg = it;
nextReg++;
int reg = *it;
if (usermode && !regs[15] && reg >= 8 && reg < 15)
{
LDR(INDEX_UNSIGNED, W3, SP, i * 8);
MOVI2R(W1, reg - 8);
BL(WriteBanked);
FixupBranch alreadyWritten = CBNZ(W4);
if (RegCache.LoadedRegs & (1 << reg))
MOV(MapReg(reg), W3);
else
SaveReg(reg, W3);
SetJumpTarget(alreadyWritten);
}
else if (!usermode && nextReg != regs.end())
{
ARM64Reg first = W3, second = W4;
if (RegCache.LoadedRegs & (1 << reg))
first = MapReg(reg);
if (RegCache.LoadedRegs & (1 << *nextReg))
second = MapReg(*nextReg);
LDP(INDEX_SIGNED, EncodeRegTo64(first), EncodeRegTo64(second), SP, i * 8);
if (first == W3)
SaveReg(reg, W3);
if (second == W4)
SaveReg(*nextReg, W4);
it++;
i++;
}
else if (RegCache.LoadedRegs & (1 << reg))
{
ARM64Reg mapped = MapReg(reg);
LDR(INDEX_UNSIGNED, mapped, SP, i * 8);
}
else
{
LDR(INDEX_UNSIGNED, W3, SP, i * 8);
SaveReg(reg, W3);
}
it++;
i++;
}
}
ADD(SP, SP, ((regsCount + 1) & ~1) * 8);
if (compileFastPath)
{
ABI_PopRegisters({30});
RET();
FlushIcacheSection((u8*)patch.PatchFunc, (u8*)GetRXPtr());
SwapCodeRegion();
}
if (!store && regs[15])
{
ARM64Reg mapped = MapReg(15);
Comp_JumpTo(mapped, Num == 0, usermode);
}
return regsCount * 4 * (decrement ? -1 : 1);
}
void Compiler::A_Comp_LDM_STM()
{
BitSet16 regs(CurInstr.Instr & 0xFFFF);
bool load = CurInstr.Instr & (1 << 20);
bool pre = CurInstr.Instr & (1 << 24);
bool add = CurInstr.Instr & (1 << 23);
bool writeback = CurInstr.Instr & (1 << 21);
bool usermode = CurInstr.Instr & (1 << 22);
ARM64Reg rn = MapReg(CurInstr.A_Reg(16));
s32 offset = Comp_MemAccessBlock(CurInstr.A_Reg(16), regs, !load, pre, !add, usermode);
if (load && writeback && regs[CurInstr.A_Reg(16)])
writeback = Num == 0
? (!(regs & ~BitSet16(1 << CurInstr.A_Reg(16)))) || (regs & ~BitSet16((2 << CurInstr.A_Reg(16)) - 1))
: false;
if (writeback)
{
if (offset > 0)
ADD(rn, rn, offset);
else
SUB(rn, rn, -offset);
}
}
void Compiler::T_Comp_PUSH_POP()
{
bool load = CurInstr.Instr & (1 << 11);
BitSet16 regs(CurInstr.Instr & 0xFF);
if (CurInstr.Instr & (1 << 8))
{
if (load)
regs[15] = true;
else
regs[14] = true;
}
ARM64Reg sp = MapReg(13);
s32 offset = Comp_MemAccessBlock(13, regs, !load, !load, !load, false);
if (offset > 0)
ADD(sp, sp, offset);
else
SUB(sp, sp, -offset);
}
void Compiler::T_Comp_LDMIA_STMIA()
{
BitSet16 regs(CurInstr.Instr & 0xFF);
ARM64Reg rb = MapReg(CurInstr.T_Reg(8));
bool load = CurInstr.Instr & (1 << 11);
u32 regsCount = regs.Count();
s32 offset = Comp_MemAccessBlock(CurInstr.T_Reg(8), regs, !load, false, false, false);
if (!load || !regs[CurInstr.T_Reg(8)])
{
if (offset > 0)
ADD(rb, rb, offset);
else
SUB(rb, rb, -offset);
}
}
}

12
src/ARMJIT_Compiler.h Normal file
View File

@ -0,0 +1,12 @@
#if defined(__x86_64__)
#include "ARMJIT_x64/ARMJIT_Compiler.h"
#elif defined(__aarch64__)
#include "ARMJIT_A64/ARMJIT_Compiler.h"
#else
#error "The current target platform doesn't have a JIT backend"
#endif
namespace ARMJIT
{
extern Compiler* JITCompiler;
}

227
src/ARMJIT_Internal.h Normal file
View File

@ -0,0 +1,227 @@
#ifndef ARMJIT_INTERNAL_H
#define ARMJIT_INTERNAL_H
#include "types.h"
#include <stdint.h>
#include <string.h>
#include <assert.h>
#include "ARMJIT.h"
#include "ARMJIT_Memory.h"
// here lands everything which doesn't fit into ARMJIT.h
// where it would be included by pretty much everything
namespace ARMJIT
{
enum
{
branch_IdleBranch = 1 << 0,
branch_FollowCondTaken = 1 << 1,
branch_FollowCondNotTaken = 1 << 2,
branch_StaticTarget = 1 << 3,
};
struct FetchedInstr
{
u32 A_Reg(int pos) const
{
return (Instr >> pos) & 0xF;
}
u32 T_Reg(int pos) const
{
return (Instr >> pos) & 0x7;
}
u32 Cond() const
{
return Instr >> 28;
}
u8 BranchFlags;
u8 SetFlags;
u32 Instr;
u32 Addr;
u8 DataCycles;
u16 CodeCycles;
u32 DataRegion;
ARMInstrInfo::Info Info;
};
/*
TinyVector
- because reinventing the wheel is the best!
- meant to be used very often, with not so many elements
max 1 << 16 elements
- doesn't allocate while no elements are inserted
- not stl confirmant of course
- probably only works with POD types
- remove operations don't preserve order, but O(1)!
*/
template <typename T>
struct __attribute__((packed)) TinyVector
{
T* Data = NULL;
u16 Capacity = 0;
u16 Length = 0;
~TinyVector()
{
delete[] Data;
}
void MakeCapacity(u32 capacity)
{
assert(capacity <= UINT16_MAX);
assert(capacity > Capacity);
T* newMem = new T[capacity];
if (Data != NULL)
memcpy(newMem, Data, sizeof(T) * Length);
T* oldData = Data;
Data = newMem;
if (oldData != NULL)
delete[] oldData;
Capacity = capacity;
}
void SetLength(u16 length)
{
if (Capacity < length)
MakeCapacity(length);
Length = length;
}
void Clear()
{
Length = 0;
}
void Add(T element)
{
assert(Length + 1 <= UINT16_MAX);
if (Length + 1 > Capacity)
MakeCapacity(((Capacity + 4) * 3) / 2);
Data[Length++] = element;
}
void Remove(int index)
{
assert(index >= 0 && index < Length);
Length--;
Data[index] = Data[Length];
/*for (int i = index; i < Length; i++)
Data[i] = Data[i + 1];*/
}
int Find(T needle)
{
for (int i = 0; i < Length; i++)
{
if (Data[i] == needle)
return i;
}
return -1;
}
bool RemoveByValue(T needle)
{
for (int i = 0; i < Length; i++)
{
if (Data[i] == needle)
{
Remove(i);
return true;
}
}
return false;
}
T& operator[](int index)
{
assert(index >= 0 && index < Length);
return Data[index];
}
};
class JitBlock
{
public:
JitBlock(u32 num, u32 literalHash, u32 numAddresses, u32 numLiterals)
{
Num = num;
NumAddresses = numAddresses;
NumLiterals = numLiterals;
Data.SetLength(numAddresses * 2 + numLiterals);
}
u32 StartAddr;
u32 StartAddrLocal;
u32 InstrHash, LiteralHash;
u8 Num;
u16 NumAddresses;
u16 NumLiterals;
JitBlockEntry EntryPoint;
u32* AddressRanges()
{ return &Data[0]; }
u32* AddressMasks()
{ return &Data[NumAddresses]; }
u32* Literals()
{ return &Data[NumAddresses * 2]; }
private:
TinyVector<u32> Data;
};
// size should be 16 bytes because I'm to lazy to use mul and whatnot
struct __attribute__((packed)) AddressRange
{
TinyVector<JitBlock*> Blocks;
u32 Code;
};
typedef void (*InterpreterFunc)(ARM* cpu);
extern InterpreterFunc InterpretARM[];
extern InterpreterFunc InterpretTHUMB[];
extern TinyVector<u32> InvalidLiterals;
extern AddressRange* const CodeMemRegions[ARMJIT_Memory::memregions_Count];
inline bool PageContainsCode(AddressRange* range)
{
for (int i = 0; i < 8; i++)
{
if (range[i].Blocks.Length > 0)
return true;
}
return false;
}
u32 LocaliseCodeAddress(u32 num, u32 addr);
template <u32 Num>
void LinkBlock(ARM* cpu, u32 codeOffset);
template <typename T, int ConsoleType> T SlowRead9(u32 addr, ARMv5* cpu);
template <typename T, int ConsoleType> void SlowWrite9(u32 addr, ARMv5* cpu, T val);
template <typename T, int ConsoleType> T SlowRead7(u32 addr);
template <typename T, int ConsoleType> void SlowWrite7(u32 addr, T val);
template <bool Write, int ConsoleType> void SlowBlockTransfer9(u32 addr, u64* data, u32 num, ARMv5* cpu);
template <bool Write, int ConsoleType> void SlowBlockTransfer7(u32 addr, u64* data, u32 num);
}
#endif

1072
src/ARMJIT_Memory.cpp Normal file

File diff suppressed because it is too large Load Diff

63
src/ARMJIT_Memory.h Normal file
View File

@ -0,0 +1,63 @@
#ifndef ARMJIT_MEMORY
#define ARMJIT_MEMORY
#include "types.h"
#include "ARM.h"
namespace ARMJIT_Memory
{
extern void* FastMem9Start;
extern void* FastMem7Start;
void Init();
void DeInit();
void Reset();
enum
{
memregion_Other = 0,
memregion_ITCM,
memregion_DTCM,
memregion_BIOS9,
memregion_MainRAM,
memregion_SharedWRAM,
memregion_IO9,
memregion_VRAM,
memregion_BIOS7,
memregion_WRAM7,
memregion_IO7,
memregion_Wifi,
memregion_VWRAM,
// DSi
memregion_BIOS9DSi,
memregion_BIOS7DSi,
memregion_NewSharedWRAM_A,
memregion_NewSharedWRAM_B,
memregion_NewSharedWRAM_C,
memregions_Count
};
int ClassifyAddress9(u32 addr);
int ClassifyAddress7(u32 addr);
bool GetMirrorLocation(int region, u32 num, u32 addr, u32& memoryOffset, u32& mirrorStart, u32& mirrorSize);
u32 LocaliseAddress(int region, u32 num, u32 addr);
bool IsFastmemCompatible(int region);
void RemapDTCM(u32 newBase, u32 newSize);
void RemapSWRAM();
void RemapNWRAM(int num);
void SetCodeProtection(int region, u32 offset, bool protect);
void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size);
}
#endif

199
src/ARMJIT_RegisterCache.h Normal file
View File

@ -0,0 +1,199 @@
#ifndef ARMJIT_REGCACHE_H
#define ARMJIT_REGCACHE_H
#include "ARMJIT.h"
// TODO: replace this in the future
#include "dolphin/BitSet.h"
#include <assert.h>
namespace ARMJIT
{
template <typename T, typename Reg>
class RegisterCache
{
public:
RegisterCache()
{}
RegisterCache(T* compiler, FetchedInstr instrs[], int instrsCount, bool pcAllocatableAsSrc = false)
: Compiler(compiler), Instrs(instrs), InstrsCount(instrsCount)
{
for (int i = 0; i < 16; i++)
Mapping[i] = (Reg)-1;
PCAllocatableAsSrc = ~(pcAllocatableAsSrc
? 0
: (1 << 15));
}
void UnloadRegister(int reg)
{
assert(Mapping[reg] != -1);
if (DirtyRegs & (1 << reg))
Compiler->SaveReg(reg, Mapping[reg]);
DirtyRegs &= ~(1 << reg);
LoadedRegs &= ~(1 << reg);
NativeRegsUsed &= ~(1 << (int)Mapping[reg]);
Mapping[reg] = (Reg)-1;
}
void LoadRegister(int reg, bool loadValue)
{
assert(Mapping[reg] == -1);
for (int i = 0; i < NativeRegsAvailable; i++)
{
Reg nativeReg = NativeRegAllocOrder[i];
if (!(NativeRegsUsed & (1 << nativeReg)))
{
Mapping[reg] = nativeReg;
NativeRegsUsed |= 1 << (int)nativeReg;
LoadedRegs |= 1 << reg;
if (loadValue)
Compiler->LoadReg(reg, nativeReg);
return;
}
}
assert("Welp!");
}
void PutLiteral(int reg, u32 val)
{
LiteralsLoaded |= (1 << reg);
LiteralValues[reg] = val;
}
void UnloadLiteral(int reg)
{
LiteralsLoaded &= ~(1 << reg);
}
bool IsLiteral(int reg)
{
return LiteralsLoaded & (1 << reg);
}
void PrepareExit()
{
BitSet16 dirtyRegs(DirtyRegs);
for (int reg : dirtyRegs)
Compiler->SaveReg(reg, Mapping[reg]);
}
void Flush()
{
BitSet16 loadedSet(LoadedRegs);
for (int reg : loadedSet)
UnloadRegister(reg);
LiteralsLoaded = 0;
}
void Prepare(bool thumb, int i)
{
FetchedInstr instr = Instrs[i];
if (LoadedRegs & (1 << 15))
UnloadRegister(15);
BitSet16 invalidedLiterals(LiteralsLoaded & instr.Info.DstRegs);
for (int reg : invalidedLiterals)
UnloadLiteral(reg);
u16 futureNeeded = 0;
int ranking[16];
for (int j = 0; j < 16; j++)
ranking[j] = 0;
for (int j = i; j < InstrsCount; j++)
{
BitSet16 regsNeeded((Instrs[j].Info.SrcRegs & ~(1 << 15)) | Instrs[j].Info.DstRegs);
futureNeeded |= regsNeeded.m_val;
regsNeeded &= BitSet16(~Instrs[j].Info.NotStrictlyNeeded);
for (int reg : regsNeeded)
ranking[reg]++;
}
// we'll unload all registers which are never used again
BitSet16 neverNeededAgain(LoadedRegs & ~futureNeeded);
for (int reg : neverNeededAgain)
UnloadRegister(reg);
u16 necessaryRegs = ((instr.Info.SrcRegs & PCAllocatableAsSrc) | instr.Info.DstRegs) & ~instr.Info.NotStrictlyNeeded;
BitSet16 needToBeLoaded(necessaryRegs & ~LoadedRegs);
if (needToBeLoaded != BitSet16(0))
{
int neededCount = needToBeLoaded.Count();
BitSet16 loadedSet(LoadedRegs);
while (loadedSet.Count() + neededCount > NativeRegsAvailable)
{
int leastReg = -1;
int rank = 1000;
for (int reg : loadedSet)
{
if (!((1 << reg) & necessaryRegs) && ranking[reg] < rank)
{
leastReg = reg;
rank = ranking[reg];
}
}
assert(leastReg != -1);
UnloadRegister(leastReg);
loadedSet.m_val = LoadedRegs;
}
// we don't need to load a value which is always going to be overwritten
BitSet16 needValueLoaded(needToBeLoaded);
if (thumb || instr.Cond() >= 0xE)
needValueLoaded = BitSet16(instr.Info.SrcRegs);
for (int reg : needToBeLoaded)
LoadRegister(reg, needValueLoaded[reg]);
}
{
BitSet16 loadedSet(LoadedRegs);
BitSet16 loadRegs(instr.Info.NotStrictlyNeeded & futureNeeded & ~LoadedRegs);
if (loadRegs && loadedSet.Count() < NativeRegsAvailable)
{
int left = NativeRegsAvailable - loadedSet.Count();
for (int reg : loadRegs)
{
if (left-- == 0)
break;
LoadRegister(reg, !(thumb || instr.Cond() >= 0xE) || (1 << reg) & instr.Info.SrcRegs);
}
}
}
DirtyRegs |= (LoadedRegs & instr.Info.DstRegs) & ~(1 << 15);
}
static const Reg NativeRegAllocOrder[];
static const int NativeRegsAvailable;
Reg Mapping[16];
u32 LiteralValues[16];
u16 LiteralsLoaded = 0;
u32 NativeRegsUsed = 0;
u16 LoadedRegs = 0;
u16 DirtyRegs = 0;
u16 PCAllocatableAsSrc = 0;
T* Compiler;
FetchedInstr* Instrs;
int InstrsCount;
};
}
#endif

View File

@ -0,0 +1,768 @@
#include "ARMJIT_Compiler.h"
using namespace Gen;
namespace ARMJIT
{
// uses RSCRATCH3
void Compiler::Comp_ArithTriOp(void (Compiler::*op)(int, const OpArg&, const OpArg&),
OpArg rd, OpArg rn, OpArg op2, bool carryUsed, int opFlags)
{
if (opFlags & opSyncCarry)
{
BT(32, R(RCPSR), Imm8(29));
if (opFlags & opInvertCarry)
CMC();
}
if (rd == rn && !(opFlags & opInvertOp2))
(this->*op)(32, rd, op2);
else if (opFlags & opSymmetric && op2 == R(RSCRATCH))
{
if (opFlags & opInvertOp2)
NOT(32, op2);
(this->*op)(32, op2, rn);
MOV(32, rd, op2);
}
else
{
if (opFlags & opInvertOp2)
{
if (op2 != R(RSCRATCH))
{
MOV(32, R(RSCRATCH), op2);
op2 = R(RSCRATCH);
}
NOT(32, op2);
}
MOV(32, R(RSCRATCH3), rn);
(this->*op)(32, R(RSCRATCH3), op2);
MOV(32, rd, R(RSCRATCH3));
}
if (opFlags & opSetsFlags)
Comp_RetriveFlags(opFlags & opInvertCarry, opFlags & opRetriveCV, carryUsed);
}
void Compiler::Comp_ArithTriOpReverse(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&),
Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags)
{
if (opFlags & opSyncCarry)
{
BT(32, R(RCPSR), Imm8(29));
if (opFlags & opInvertCarry)
CMC();
}
if (op2 != R(RSCRATCH))
{
MOV(32, R(RSCRATCH), op2);
op2 = R(RSCRATCH);
}
(this->*op)(32, op2, rn);
MOV(32, rd, op2);
if (opFlags & opSetsFlags)
Comp_RetriveFlags(opFlags & opInvertCarry, opFlags & opRetriveCV, carryUsed);
}
void Compiler::Comp_CmpOp(int op, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed)
{
switch (op)
{
case 0: // TST
if (rn.IsImm())
{
MOV(32, R(RSCRATCH3), rn);
rn = R(RSCRATCH3);
}
TEST(32, rn, op2);
break;
case 1: // TEQ
MOV(32, R(RSCRATCH3), rn);
XOR(32, R(RSCRATCH3), op2);
break;
case 2: // CMP
if (rn.IsImm())
{
MOV(32, R(RSCRATCH3), rn);
rn = R(RSCRATCH3);
}
CMP(32, rn, op2);
break;
case 3: // CMN
MOV(32, R(RSCRATCH3), rn);
ADD(32, R(RSCRATCH3), op2);
break;
}
Comp_RetriveFlags(op == 2, op >= 2, carryUsed);
}
// also calculates cycles
OpArg Compiler::A_Comp_GetALUOp2(bool S, bool& carryUsed)
{
if (CurInstr.Instr & (1 << 25))
{
Comp_AddCycles_C();
carryUsed = false;
return Imm32(ROR(CurInstr.Instr & 0xFF, (CurInstr.Instr >> 7) & 0x1E));
}
else
{
S = S && (CurInstr.SetFlags & 0x2);
int op = (CurInstr.Instr >> 5) & 0x3;
if (CurInstr.Instr & (1 << 4))
{
Comp_AddCycles_CI(1);
OpArg rm = MapReg(CurInstr.A_Reg(0));
if (rm.IsImm() && CurInstr.A_Reg(0) == 15)
rm = Imm32(rm.Imm32() + 4);
return Comp_RegShiftReg(op, MapReg(CurInstr.A_Reg(8)), rm, S, carryUsed);
}
else
{
Comp_AddCycles_C();
return Comp_RegShiftImm(op, (CurInstr.Instr >> 7) & 0x1F,
MapReg(CurInstr.A_Reg(0)), S, carryUsed);
}
}
}
void Compiler::A_Comp_CmpOp()
{
u32 op = (CurInstr.Instr >> 21) & 0xF;
bool carryUsed;
OpArg rn = MapReg(CurInstr.A_Reg(16));
OpArg op2 = A_Comp_GetALUOp2((1 << op) & 0xF303, carryUsed);
Comp_CmpOp(op - 0x8, rn, op2, carryUsed);
}
void Compiler::A_Comp_Arith()
{
bool S = CurInstr.Instr & (1 << 20);
u32 op = (CurInstr.Instr >> 21) & 0xF;
bool carryUsed;
OpArg rn = MapReg(CurInstr.A_Reg(16));
OpArg rd = MapReg(CurInstr.A_Reg(12));
OpArg op2 = A_Comp_GetALUOp2(S && (1 << op) & 0xF303, carryUsed);
u32 sFlag = S ? opSetsFlags : 0;
switch (op)
{
case 0x0: // AND
Comp_ArithTriOp(&Compiler::AND, rd, rn, op2, carryUsed, opSymmetric|sFlag);
break;
case 0x1: // EOR
Comp_ArithTriOp(&Compiler::XOR, rd, rn, op2, carryUsed, opSymmetric|sFlag);
break;
case 0x2: // SUB
Comp_ArithTriOp(&Compiler::SUB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry);
break;
case 0x3: // RSB
if (op2.IsZero())
{
if (rd != rn)
MOV(32, rd, rn);
NEG(32, rd);
if (S)
Comp_RetriveFlags(true, true, false);
}
else
Comp_ArithTriOpReverse(&Compiler::SUB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry);
break;
case 0x4: // ADD
Comp_ArithTriOp(&Compiler::ADD, rd, rn, op2, carryUsed, opSymmetric|sFlag|opRetriveCV);
break;
case 0x5: // ADC
Comp_ArithTriOp(&Compiler::ADC, rd, rn, op2, carryUsed, opSymmetric|sFlag|opRetriveCV|opSyncCarry);
break;
case 0x6: // SBC
Comp_ArithTriOp(&Compiler::SBB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opSyncCarry|opInvertCarry);
break;
case 0x7: // RSC
Comp_ArithTriOpReverse(&Compiler::SBB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry|opSyncCarry);
break;
case 0xC: // ORR
Comp_ArithTriOp(&Compiler::OR, rd, rn, op2, carryUsed, opSymmetric|sFlag);
break;
case 0xE: // BIC
Comp_ArithTriOp(&Compiler::AND, rd, rn, op2, carryUsed, sFlag|opSymmetric|opInvertOp2);
break;
default:
assert("unimplemented");
}
if (CurInstr.A_Reg(12) == 15)
Comp_JumpTo(rd.GetSimpleReg(), S);
}
void Compiler::A_Comp_MovOp()
{
bool carryUsed;
bool S = CurInstr.Instr & (1 << 20);
OpArg op2 = A_Comp_GetALUOp2(S, carryUsed);
OpArg rd = MapReg(CurInstr.A_Reg(12));
if (rd != op2)
MOV(32, rd, op2);
if (((CurInstr.Instr >> 21) & 0xF) == 0xF)
{
NOT(32, rd);
if (op2.IsImm() && CurInstr.Cond() == 0xE)
RegCache.PutLiteral(CurInstr.A_Reg(12), ~op2.Imm32());
}
else if (op2.IsImm() && CurInstr.Cond() == 0xE)
RegCache.PutLiteral(CurInstr.A_Reg(12), op2.Imm32());
if (S)
{
if (FlagsNZRequired())
TEST(32, rd, rd);
Comp_RetriveFlags(false, false, carryUsed);
}
if (CurInstr.A_Reg(12) == 15)
Comp_JumpTo(rd.GetSimpleReg(), S);
}
void Compiler::A_Comp_CLZ()
{
OpArg rd = MapReg(CurInstr.A_Reg(12));
OpArg rm = MapReg(CurInstr.A_Reg(0));
MOV(32, R(RSCRATCH), Imm32(32));
TEST(32, rm, rm);
FixupBranch skipZero = J_CC(CC_Z);
BSR(32, RSCRATCH, rm);
XOR(32, R(RSCRATCH), Imm8(0x1F)); // 31 - RSCRATCH
SetJumpTarget(skipZero);
MOV(32, rd, R(RSCRATCH));
}
void Compiler::Comp_MulOp(bool S, bool add, Gen::OpArg rd, Gen::OpArg rm, Gen::OpArg rs, Gen::OpArg rn)
{
if (Num == 0)
Comp_AddCycles_CI(S ? 3 : 1);
else
{
XOR(32, R(RSCRATCH), R(RSCRATCH));
MOV(32, R(RSCRATCH3), rs);
TEST(32, R(RSCRATCH3), R(RSCRATCH3));
FixupBranch zeroBSR = J_CC(CC_Z);
BSR(32, RSCRATCH2, R(RSCRATCH3));
NOT(32, R(RSCRATCH3));
BSR(32, RSCRATCH, R(RSCRATCH3));
CMP(32, R(RSCRATCH2), R(RSCRATCH));
CMOVcc(32, RSCRATCH, R(RSCRATCH2), CC_L);
SHR(32, R(RSCRATCH), Imm8(3));
SetJumpTarget(zeroBSR); // fortunately that's even right
Comp_AddCycles_CI(RSCRATCH, add ? 2 : 1);
}
static_assert(EAX == RSCRATCH, "Someone changed RSCRATCH!");
MOV(32, R(RSCRATCH), rm);
if (add)
{
IMUL(32, RSCRATCH, rs);
LEA(32, rd.GetSimpleReg(), MRegSum(RSCRATCH, rn.GetSimpleReg()));
if (S && FlagsNZRequired())
TEST(32, rd, rd);
}
else
{
IMUL(32, RSCRATCH, rs);
MOV(32, rd, R(RSCRATCH));
if (S && FlagsNZRequired())
TEST(32, R(RSCRATCH), R(RSCRATCH));
}
if (S)
Comp_RetriveFlags(false, false, false);
}
void Compiler::A_Comp_MUL_MLA()
{
bool S = CurInstr.Instr & (1 << 20);
bool add = CurInstr.Instr & (1 << 21);
OpArg rd = MapReg(CurInstr.A_Reg(16));
OpArg rm = MapReg(CurInstr.A_Reg(0));
OpArg rs = MapReg(CurInstr.A_Reg(8));
OpArg rn;
if (add)
rn = MapReg(CurInstr.A_Reg(12));
Comp_MulOp(S, add, rd, rm, rs, rn);
}
void Compiler::A_Comp_Mul_Long()
{
bool S = CurInstr.Instr & (1 << 20);
bool add = CurInstr.Instr & (1 << 21);
bool sign = CurInstr.Instr & (1 << 22);
OpArg rd = MapReg(CurInstr.A_Reg(16));
OpArg rm = MapReg(CurInstr.A_Reg(0));
OpArg rs = MapReg(CurInstr.A_Reg(8));
OpArg rn = MapReg(CurInstr.A_Reg(12));
if (Num == 0)
Comp_AddCycles_CI(S ? 3 : 1);
else
{
XOR(32, R(RSCRATCH), R(RSCRATCH));
MOV(32, R(RSCRATCH3), rs);
TEST(32, R(RSCRATCH3), R(RSCRATCH3));
FixupBranch zeroBSR = J_CC(CC_Z);
if (sign)
{
BSR(32, RSCRATCH2, R(RSCRATCH3));
NOT(32, R(RSCRATCH3));
BSR(32, RSCRATCH, R(RSCRATCH3));
CMP(32, R(RSCRATCH2), R(RSCRATCH));
CMOVcc(32, RSCRATCH, R(RSCRATCH2), CC_L);
}
else
{
BSR(32, RSCRATCH, R(RSCRATCH3));
}
SHR(32, R(RSCRATCH), Imm8(3));
SetJumpTarget(zeroBSR); // fortunately that's even right
Comp_AddCycles_CI(RSCRATCH, 2);
}
if (sign)
{
MOVSX(64, 32, RSCRATCH2, rm);
MOVSX(64, 32, RSCRATCH3, rs);
}
else
{
MOV(32, R(RSCRATCH2), rm);
MOV(32, R(RSCRATCH3), rs);
}
if (add)
{
MOV(32, R(RSCRATCH), rd);
SHL(64, R(RSCRATCH), Imm8(32));
OR(64, R(RSCRATCH), rn);
IMUL(64, RSCRATCH2, R(RSCRATCH3));
ADD(64, R(RSCRATCH2), R(RSCRATCH));
}
else
{
IMUL(64, RSCRATCH2, R(RSCRATCH3));
if (S && FlagsNZRequired())
TEST(64, R(RSCRATCH2), R(RSCRATCH2));
}
if (S)
Comp_RetriveFlags(false, false, false);
MOV(32, rn, R(RSCRATCH2));
SHR(64, R(RSCRATCH2), Imm8(32));
MOV(32, rd, R(RSCRATCH2));
}
void Compiler::Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed)
{
if (CurInstr.SetFlags == 0)
return;
if (retriveCV && !(CurInstr.SetFlags & 0x3))
retriveCV = false;
bool carryOnly = !retriveCV && carryUsed;
if (carryOnly && !(CurInstr.SetFlags & 0x2))
{
carryUsed = false;
carryOnly = false;
}
CPSRDirty = true;
if (retriveCV)
{
SETcc(CC_O, R(RSCRATCH));
SETcc(sign ? CC_NC : CC_C, R(RSCRATCH3));
LEA(32, RSCRATCH2, MComplex(RSCRATCH, RSCRATCH3, SCALE_2, 0));
}
if (FlagsNZRequired())
{
SETcc(CC_S, R(RSCRATCH));
SETcc(CC_Z, R(RSCRATCH3));
LEA(32, RSCRATCH, MComplex(RSCRATCH3, RSCRATCH, SCALE_2, 0));
int shiftAmount = 30;
if (retriveCV || carryUsed)
{
LEA(32, RSCRATCH, MComplex(RSCRATCH2, RSCRATCH, carryOnly ? SCALE_2 : SCALE_4, 0));
shiftAmount = carryOnly ? 29 : 28;
}
SHL(32, R(RSCRATCH), Imm8(shiftAmount));
AND(32, R(RCPSR), Imm32(0x3FFFFFFF & ~(carryUsed << 29) & ~((retriveCV ? 3 : 0) << 28)));
OR(32, R(RCPSR), R(RSCRATCH));
}
else if (carryUsed || retriveCV)
{
SHL(32, R(RSCRATCH2), Imm8(carryOnly ? 29 : 28));
AND(32, R(RCPSR), Imm32(0xFFFFFFFF & ~(carryUsed << 29) & ~((retriveCV ? 3 : 0) << 28)));
OR(32, R(RCPSR), R(RSCRATCH2));
}
}
// always uses RSCRATCH, RSCRATCH2 only if S == true
OpArg Compiler::Comp_RegShiftReg(int op, Gen::OpArg rs, Gen::OpArg rm, bool S, bool& carryUsed)
{
carryUsed = S;
if (S)
{
XOR(32, R(RSCRATCH2), R(RSCRATCH2));
TEST(32, R(RCPSR), Imm32(1 << 29));
SETcc(CC_NZ, R(RSCRATCH2));
}
MOV(32, R(RSCRATCH), rm);
static_assert(RSCRATCH3 == ECX, "Someone changed RSCRATCH3");
MOV(32, R(ECX), rs);
AND(32, R(ECX), Imm32(0xFF));
FixupBranch zero = J_CC(CC_Z);
if (op < 3)
{
void (Compiler::*shiftOp)(int, const OpArg&, const OpArg&) = NULL;
if (op == 0)
shiftOp = &Compiler::SHL;
else if (op == 1)
shiftOp = &Compiler::SHR;
else if (op == 2)
shiftOp = &Compiler::SAR;
CMP(32, R(ECX), Imm8(32));
FixupBranch lt32 = J_CC(CC_L);
FixupBranch done1;
if (op < 2)
{
FixupBranch eq32 = J_CC(CC_E);
XOR(32, R(RSCRATCH), R(RSCRATCH));
if (S)
XOR(32, R(RSCRATCH2), R(RSCRATCH2));
done1 = J();
SetJumpTarget(eq32);
}
(this->*shiftOp)(32, R(RSCRATCH), Imm8(31));
(this->*shiftOp)(32, R(RSCRATCH), Imm8(1));
if (S)
SETcc(CC_C, R(RSCRATCH2));
FixupBranch done2 = J();
SetJumpTarget(lt32);
(this->*shiftOp)(32, R(RSCRATCH), R(ECX));
if (S)
SETcc(CC_C, R(RSCRATCH2));
if (op < 2)
SetJumpTarget(done1);
SetJumpTarget(done2);
}
else if (op == 3)
{
if (S)
BT(32, R(RSCRATCH), Imm8(31));
ROR_(32, R(RSCRATCH), R(ECX));
if (S)
SETcc(CC_C, R(RSCRATCH2));
}
SetJumpTarget(zero);
return R(RSCRATCH);
}
// may uses RSCRATCH for op2 and RSCRATCH2 for the carryValue
OpArg Compiler::Comp_RegShiftImm(int op, int amount, OpArg rm, bool S, bool& carryUsed)
{
carryUsed = true;
switch (op)
{
case 0: // LSL
if (amount > 0)
{
MOV(32, R(RSCRATCH), rm);
SHL(32, R(RSCRATCH), Imm8(amount));
if (S)
SETcc(CC_C, R(RSCRATCH2));
return R(RSCRATCH);
}
else
{
carryUsed = false;
return rm;
}
case 1: // LSR
if (amount > 0)
{
MOV(32, R(RSCRATCH), rm);
SHR(32, R(RSCRATCH), Imm8(amount));
if (S)
SETcc(CC_C, R(RSCRATCH2));
return R(RSCRATCH);
}
else
{
if (S)
{
MOV(32, R(RSCRATCH2), rm);
SHR(32, R(RSCRATCH2), Imm8(31));
}
return Imm32(0);
}
case 2: // ASR
MOV(32, R(RSCRATCH), rm);
SAR(32, R(RSCRATCH), Imm8(amount ? amount : 31));
if (S)
{
if (amount == 0)
BT(32, rm, Imm8(31));
SETcc(CC_C, R(RSCRATCH2));
}
return R(RSCRATCH);
case 3: // ROR
MOV(32, R(RSCRATCH), rm);
if (amount > 0)
ROR_(32, R(RSCRATCH), Imm8(amount));
else
{
BT(32, R(RCPSR), Imm8(29));
RCR(32, R(RSCRATCH), Imm8(1));
}
if (S)
SETcc(CC_C, R(RSCRATCH2));
return R(RSCRATCH);
}
assert(false);
}
void Compiler::T_Comp_ShiftImm()
{
OpArg rd = MapReg(CurInstr.T_Reg(0));
OpArg rs = MapReg(CurInstr.T_Reg(3));
int op = (CurInstr.Instr >> 11) & 0x3;
int amount = (CurInstr.Instr >> 6) & 0x1F;
Comp_AddCycles_C();
bool carryUsed;
OpArg shifted = Comp_RegShiftImm(op, amount, rs, true, carryUsed);
if (shifted != rd)
MOV(32, rd, shifted);
if (FlagsNZRequired())
TEST(32, rd, rd);
Comp_RetriveFlags(false, false, carryUsed);
}
void Compiler::T_Comp_AddSub_()
{
OpArg rd = MapReg(CurInstr.T_Reg(0));
OpArg rs = MapReg(CurInstr.T_Reg(3));
int op = (CurInstr.Instr >> 9) & 0x3;
OpArg rn = op >= 2 ? Imm32((CurInstr.Instr >> 6) & 0x7) : MapReg(CurInstr.T_Reg(6));
Comp_AddCycles_C();
// special case for thumb mov being alias to add rd, rn, #0
if (CurInstr.SetFlags == 0 && rn.IsImm() && rn.Imm32() == 0)
{
if (rd != rs)
MOV(32, rd, rs);
}
else if (op & 1)
Comp_ArithTriOp(&Compiler::SUB, rd, rs, rn, false, opSetsFlags|opInvertCarry|opRetriveCV);
else
Comp_ArithTriOp(&Compiler::ADD, rd, rs, rn, false, opSetsFlags|opSymmetric|opRetriveCV);
}
void Compiler::T_Comp_ALU_Imm8()
{
OpArg rd = MapReg(CurInstr.T_Reg(8));
u32 op = (CurInstr.Instr >> 11) & 0x3;
OpArg imm = Imm32(CurInstr.Instr & 0xFF);
Comp_AddCycles_C();
switch (op)
{
case 0x0:
MOV(32, rd, imm);
if (FlagsNZRequired())
TEST(32, rd, rd);
Comp_RetriveFlags(false, false, false);
return;
case 0x1:
Comp_CmpOp(2, rd, imm, false);
return;
case 0x2:
Comp_ArithTriOp(&Compiler::ADD, rd, rd, imm, false, opSetsFlags|opSymmetric|opRetriveCV);
return;
case 0x3:
Comp_ArithTriOp(&Compiler::SUB, rd, rd, imm, false, opSetsFlags|opInvertCarry|opRetriveCV);
return;
}
}
void Compiler::T_Comp_MUL()
{
OpArg rd = MapReg(CurInstr.T_Reg(0));
OpArg rs = MapReg(CurInstr.T_Reg(3));
Comp_MulOp(true, false, rd, rd, rs, Imm8(-1));
}
void Compiler::T_Comp_ALU()
{
OpArg rd = MapReg(CurInstr.T_Reg(0));
OpArg rs = MapReg(CurInstr.T_Reg(3));
u32 op = (CurInstr.Instr >> 6) & 0xF;
if ((op >= 0x2 && op < 0x4) || op == 0x7)
Comp_AddCycles_CI(1); // shift by reg
else
Comp_AddCycles_C();
switch (op)
{
case 0x0: // AND
Comp_ArithTriOp(&Compiler::AND, rd, rd, rs, false, opSetsFlags|opSymmetric);
return;
case 0x1: // EOR
Comp_ArithTriOp(&Compiler::XOR, rd, rd, rs, false, opSetsFlags|opSymmetric);
return;
case 0x2:
case 0x3:
case 0x4:
case 0x7:
{
int shiftOp = op == 0x7 ? 3 : op - 0x2;
bool carryUsed;
OpArg shifted = Comp_RegShiftReg(shiftOp, rs, rd, true, carryUsed);
if (FlagsNZRequired())
TEST(32, shifted, shifted);
MOV(32, rd, shifted);
Comp_RetriveFlags(false, false, true);
}
return;
case 0x5: // ADC
Comp_ArithTriOp(&Compiler::ADC, rd, rd, rs, false, opSetsFlags|opSymmetric|opSyncCarry|opRetriveCV);
return;
case 0x6: // SBC
Comp_ArithTriOp(&Compiler::SBB, rd, rd, rs, false, opSetsFlags|opSyncCarry|opInvertCarry|opRetriveCV);
return;
case 0x8: // TST
Comp_CmpOp(0, rd, rs, false);
return;
case 0x9: // NEG
if (rd != rs)
MOV(32, rd, rs);
NEG(32, rd);
Comp_RetriveFlags(true, true, false);
return;
case 0xA: // CMP
Comp_CmpOp(2, rd, rs, false);
return;
case 0xB: // CMN
Comp_CmpOp(3, rd, rs, false);
return;
case 0xC: // ORR
Comp_ArithTriOp(&Compiler::OR, rd, rd, rs, false, opSetsFlags|opSymmetric);
return;
case 0xE: // BIC
Comp_ArithTriOp(&Compiler::AND, rd, rd, rs, false, opSetsFlags|opSymmetric|opInvertOp2);
return;
case 0xF: // MVN
if (rd != rs)
MOV(32, rd, rs);
NOT(32, rd);
Comp_RetriveFlags(false, false, false);
return;
default:
break;
}
}
void Compiler::T_Comp_ALU_HiReg()
{
u32 rd = ((CurInstr.Instr & 0x7) | ((CurInstr.Instr >> 4) & 0x8));
OpArg rdMapped = MapReg(rd);
OpArg rs = MapReg((CurInstr.Instr >> 3) & 0xF);
u32 op = (CurInstr.Instr >> 8) & 0x3;
Comp_AddCycles_C();
switch (op)
{
case 0x0: // ADD
Comp_ArithTriOp(&Compiler::ADD, rdMapped, rdMapped, rs, false, opSymmetric);
break;
case 0x1: // CMP
Comp_CmpOp(2, rdMapped, rs, false);
return; // this is on purpose
case 0x2: // MOV
if (rdMapped != rs)
MOV(32, rdMapped, rs);
break;
}
if (rd == 15)
{
OR(32, rdMapped, Imm8(1));
Comp_JumpTo(rdMapped.GetSimpleReg());
}
}
void Compiler::T_Comp_AddSP()
{
Comp_AddCycles_C();
OpArg sp = MapReg(13);
OpArg offset = Imm32((CurInstr.Instr & 0x7F) << 2);
if (CurInstr.Instr & (1 << 7))
SUB(32, sp, offset);
else
ADD(32, sp, offset);
}
void Compiler::T_Comp_RelAddr()
{
Comp_AddCycles_C();
OpArg rd = MapReg(CurInstr.T_Reg(8));
u32 offset = (CurInstr.Instr & 0xFF) << 2;
if (CurInstr.Instr & (1 << 11))
{
OpArg sp = MapReg(13);
LEA(32, rd.GetSimpleReg(), MDisp(sp.GetSimpleReg(), offset));
}
else
MOV(32, rd, Imm32((R15 & ~2) + offset));
}
}

View File

@ -0,0 +1,272 @@
#include "ARMJIT_Compiler.h"
using namespace Gen;
namespace ARMJIT
{
template <typename T>
int squeezePointer(T* ptr)
{
int truncated = (int)((u64)ptr);
assert((T*)((u64)truncated) == ptr);
return truncated;
}
void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
{
// we can simplify constant branches by a lot
IrregularCycles = true;
u32 newPC;
u32 cycles = 0;
if (addr & 0x1 && !Thumb)
{
CPSRDirty = true;
OR(32, R(RCPSR), Imm8(0x20));
}
else if (!(addr & 0x1) && Thumb)
{
CPSRDirty = true;
AND(32, R(RCPSR), Imm32(~0x20));
}
if (Num == 0)
{
ARMv5* cpu9 = (ARMv5*)CurCPU;
u32 regionCodeCycles = cpu9->MemTimings[addr >> 12][0];
u32 compileTimeCodeCycles = cpu9->RegionCodeCycles;
cpu9->RegionCodeCycles = regionCodeCycles;
if (Exit)
MOV(32, MDisp(RCPU, offsetof(ARMv5, RegionCodeCycles)), Imm32(regionCodeCycles));
if (addr & 0x1)
{
addr &= ~0x1;
newPC = addr+2;
// two-opcodes-at-once fetch
// doesn't matter if we put garbage in the MSbs there
if (addr & 0x2)
{
cpu9->CodeRead32(addr-2, true);
cycles += cpu9->CodeCycles;
cpu9->CodeRead32(addr+2, false);
cycles += CurCPU->CodeCycles;
}
else
{
cpu9->CodeRead32(addr, true);
cycles += cpu9->CodeCycles;
}
}
else
{
addr &= ~0x3;
newPC = addr+4;
cpu9->CodeRead32(addr, true);
cycles += cpu9->CodeCycles;
cpu9->CodeRead32(addr+4, false);
cycles += cpu9->CodeCycles;
}
cpu9->RegionCodeCycles = compileTimeCodeCycles;
}
else
{
ARMv4* cpu7 = (ARMv4*)CurCPU;
u32 codeRegion = addr >> 24;
u32 codeCycles = addr >> 15; // cheato
cpu7->CodeRegion = codeRegion;
cpu7->CodeCycles = codeCycles;
if (Exit)
{
MOV(32, MDisp(RCPU, offsetof(ARM, CodeRegion)), Imm32(codeRegion));
MOV(32, MDisp(RCPU, offsetof(ARM, CodeCycles)), Imm32(codeCycles));
}
if (addr & 0x1)
{
addr &= ~0x1;
newPC = addr+2;
// this is necessary because ARM7 bios protection
u32 compileTimePC = CurCPU->R[15];
CurCPU->R[15] = newPC;
cycles += NDS::ARM7MemTimings[codeCycles][0] + NDS::ARM7MemTimings[codeCycles][1];
CurCPU->R[15] = compileTimePC;
}
else
{
addr &= ~0x3;
newPC = addr+4;
u32 compileTimePC = CurCPU->R[15];
CurCPU->R[15] = newPC;
cycles += NDS::ARM7MemTimings[codeCycles][2] + NDS::ARM7MemTimings[codeCycles][3];
CurCPU->R[15] = compileTimePC;
}
cpu7->CodeRegion = R15 >> 24;
cpu7->CodeCycles = addr >> 15;
}
if (Exit)
MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(newPC));
if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles)
ConstantCycles += cycles;
else
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
}
void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
{
IrregularCycles = true;
bool cpsrDirty = CPSRDirty;
SaveCPSR();
PushRegs(restoreCPSR);
MOV(64, R(ABI_PARAM1), R(RCPU));
MOV(32, R(ABI_PARAM2), R(addr));
if (!restoreCPSR)
XOR(32, R(ABI_PARAM3), R(ABI_PARAM3));
else
MOV(32, R(ABI_PARAM3), Imm32(true)); // what a waste
if (Num == 0)
CALL((void*)&ARMv5::JumpTo);
else
CALL((void*)&ARMv4::JumpTo);
PopRegs(restoreCPSR);
LoadCPSR();
// in case this instruction is skipped
if (CurInstr.Cond() < 0xE)
CPSRDirty = cpsrDirty;
}
void Compiler::A_Comp_BranchImm()
{
int op = (CurInstr.Instr >> 24) & 1;
s32 offset = (s32)(CurInstr.Instr << 8) >> 6;
u32 target = R15 + offset;
bool link = op;
if (CurInstr.Cond() == 0xF) // BLX_imm
{
target += (op << 1) + 1;
link = true;
}
if (link)
MOV(32, MapReg(14), Imm32(R15 - 4));
Comp_JumpTo(target);
}
void Compiler::A_Comp_BranchXchangeReg()
{
OpArg rn = MapReg(CurInstr.A_Reg(0));
MOV(32, R(RSCRATCH), rn);
if ((CurInstr.Instr & 0xF0) == 0x30) // BLX_reg
MOV(32, MapReg(14), Imm32(R15 - 4));
Comp_JumpTo(RSCRATCH);
}
void Compiler::T_Comp_BCOND()
{
u32 cond = (CurInstr.Instr >> 8) & 0xF;
FixupBranch skipExecute = CheckCondition(cond);
s32 offset = (s32)(CurInstr.Instr << 24) >> 23;
Comp_JumpTo(R15 + offset + 1, true);
Comp_SpecialBranchBehaviour(true);
FixupBranch skipFailed = J();
SetJumpTarget(skipExecute);
Comp_SpecialBranchBehaviour(false);
Comp_AddCycles_C(true);
SetJumpTarget(skipFailed);
}
void Compiler::T_Comp_B()
{
s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 20;
Comp_JumpTo(R15 + offset + 1);
}
void Compiler::T_Comp_BranchXchangeReg()
{
bool link = CurInstr.Instr & (1 << 7);
if (link)
{
if (Num == 1)
{
printf("BLX unsupported on ARM7!!!\n");
return;
}
MOV(32, R(RSCRATCH), MapReg(CurInstr.A_Reg(3)));
MOV(32, MapReg(14), Imm32(R15 - 1));
Comp_JumpTo(RSCRATCH);
}
else
{
OpArg rn = MapReg(CurInstr.A_Reg(3));
Comp_JumpTo(rn.GetSimpleReg());
}
}
void Compiler::T_Comp_BL_LONG_1()
{
s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 9;
MOV(32, MapReg(14), Imm32(R15 + offset));
Comp_AddCycles_C();
}
void Compiler::T_Comp_BL_LONG_2()
{
OpArg lr = MapReg(14);
s32 offset = (CurInstr.Instr & 0x7FF) << 1;
LEA(32, RSCRATCH, MDisp(lr.GetSimpleReg(), offset));
MOV(32, lr, Imm32((R15 - 2) | 1));
if (Num == 1 || CurInstr.Instr & (1 << 12))
OR(32, R(RSCRATCH), Imm8(1));
Comp_JumpTo(RSCRATCH);
}
void Compiler::T_Comp_BL_Merged()
{
Comp_AddCycles_C();
R15 += 2;
u32 upperPart = CurInstr.Instr >> 16;
u32 target = (R15 - 2) + ((s32)((CurInstr.Instr & 0x7FF) << 21) >> 9);
target += (upperPart & 0x7FF) << 1;
if (Num == 1 || upperPart & (1 << 12))
target |= 1;
MOV(32, MapReg(14), Imm32((R15 - 2) | 1));
Comp_JumpTo(target);
}
}

View File

@ -0,0 +1,899 @@
#include "ARMJIT_Compiler.h"
#include "../ARMInterpreter.h"
#include "../Config.h"
#include <assert.h>
#include "../dolphin/CommonFuncs.h"
#ifdef _WIN32
#include <windows.h>
#else
#include <sys/mman.h>
#include <unistd.h>
#endif
using namespace Gen;
extern "C" void ARM_Ret();
namespace ARMJIT
{
template <>
const X64Reg RegisterCache<Compiler, X64Reg>::NativeRegAllocOrder[] =
{
#ifdef _WIN32
RBX, RSI, RDI, R12, R13, R14, // callee saved
R10, R11, // caller saved
#else
RBX, R12, R13, R14, // callee saved, this is sad
R9, R10, R11, // caller saved
#endif
};
template <>
const int RegisterCache<Compiler, X64Reg>::NativeRegsAvailable =
#ifdef _WIN32
8
#else
7
#endif
;
#ifdef _WIN32
const BitSet32 CallerSavedPushRegs({R10, R11});
#else
const BitSet32 CallerSavedPushRegs({R9, R10, R11});
#endif
void Compiler::PushRegs(bool saveHiRegs)
{
BitSet32 loadedRegs(RegCache.LoadedRegs);
if (saveHiRegs)
{
BitSet32 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00);
for (int reg : hiRegsLoaded)
{
if (Thumb || CurInstr.Cond() == 0xE)
RegCache.UnloadRegister(reg);
else
SaveReg(reg, RegCache.Mapping[reg]);
// prevent saving the register twice
loadedRegs[reg] = false;
}
}
for (int reg : loadedRegs)
if (BitSet32(1 << RegCache.Mapping[reg]) & ABI_ALL_CALLER_SAVED)
SaveReg(reg, RegCache.Mapping[reg]);
}
void Compiler::PopRegs(bool saveHiRegs)
{
BitSet32 loadedRegs(RegCache.LoadedRegs);
for (int reg : loadedRegs)
{
if ((saveHiRegs && reg >= 8 && reg < 15)
|| BitSet32(1 << RegCache.Mapping[reg]) & ABI_ALL_CALLER_SAVED)
{
LoadReg(reg, RegCache.Mapping[reg]);
}
}
}
void Compiler::A_Comp_MRS()
{
Comp_AddCycles_C();
OpArg rd = MapReg(CurInstr.A_Reg(12));
if (CurInstr.Instr & (1 << 22))
{
MOV(32, R(RSCRATCH), R(RCPSR));
AND(32, R(RSCRATCH), Imm8(0x1F));
XOR(32, R(RSCRATCH3), R(RSCRATCH3));
MOV(32, R(RSCRATCH2), Imm32(15 - 8));
CALL(ReadBanked);
MOV(32, rd, R(RSCRATCH3));
}
else
MOV(32, rd, R(RCPSR));
}
void Compiler::A_Comp_MSR()
{
Comp_AddCycles_C();
OpArg val = CurInstr.Instr & (1 << 25)
? Imm32(ROR((CurInstr.Instr & 0xFF), ((CurInstr.Instr >> 7) & 0x1E)))
: MapReg(CurInstr.A_Reg(0));
u32 mask = 0;
if (CurInstr.Instr & (1<<16)) mask |= 0x000000FF;
if (CurInstr.Instr & (1<<17)) mask |= 0x0000FF00;
if (CurInstr.Instr & (1<<18)) mask |= 0x00FF0000;
if (CurInstr.Instr & (1<<19)) mask |= 0xFF000000;
if (CurInstr.Instr & (1 << 22))
{
MOV(32, R(RSCRATCH), R(RCPSR));
AND(32, R(RSCRATCH), Imm8(0x1F));
XOR(32, R(RSCRATCH3), R(RSCRATCH3));
MOV(32, R(RSCRATCH2), Imm32(15 - 8));
CALL(ReadBanked);
MOV(32, R(RSCRATCH2), Imm32(mask));
MOV(32, R(RSCRATCH4), R(RSCRATCH2));
AND(32, R(RSCRATCH4), Imm32(0xFFFFFF00));
MOV(32, R(RSCRATCH), R(RCPSR));
AND(32, R(RSCRATCH), Imm8(0x1F));
CMP(32, R(RSCRATCH), Imm8(0x10));
CMOVcc(32, RSCRATCH2, R(RSCRATCH4), CC_E);
MOV(32, R(RSCRATCH4), R(RSCRATCH2));
NOT(32, R(RSCRATCH4));
AND(32, R(RSCRATCH3), R(RSCRATCH4));
AND(32, R(RSCRATCH2), val);
OR(32, R(RSCRATCH3), R(RSCRATCH2));
MOV(32, R(RSCRATCH2), Imm32(15 - 8));
CALL(WriteBanked);
}
else
{
mask &= 0xFFFFFFDF;
CPSRDirty = true;
if ((mask & 0xFF) == 0)
{
AND(32, R(RCPSR), Imm32(~mask));
if (!val.IsImm())
{
MOV(32, R(RSCRATCH), val);
AND(32, R(RSCRATCH), Imm32(mask));
OR(32, R(RCPSR), R(RSCRATCH));
}
else
{
OR(32, R(RCPSR), Imm32(val.Imm32() & mask));
}
}
else
{
MOV(32, R(RSCRATCH2), Imm32(mask));
MOV(32, R(RSCRATCH3), R(RSCRATCH2));
AND(32, R(RSCRATCH3), Imm32(0xFFFFFF00));
MOV(32, R(RSCRATCH), R(RCPSR));
AND(32, R(RSCRATCH), Imm8(0x1F));
CMP(32, R(RSCRATCH), Imm8(0x10));
CMOVcc(32, RSCRATCH2, R(RSCRATCH3), CC_E);
MOV(32, R(RSCRATCH3), R(RCPSR));
// I need you ANDN
MOV(32, R(RSCRATCH), R(RSCRATCH2));
NOT(32, R(RSCRATCH));
AND(32, R(RCPSR), R(RSCRATCH));
AND(32, R(RSCRATCH2), val);
OR(32, R(RCPSR), R(RSCRATCH2));
PushRegs(true);
MOV(32, R(ABI_PARAM3), R(RCPSR));
MOV(32, R(ABI_PARAM2), R(RSCRATCH3));
MOV(64, R(ABI_PARAM1), R(RCPU));
CALL((void*)&ARM::UpdateMode);
PopRegs(true);
}
}
}
/*
We'll repurpose this .bss memory
*/
u8 CodeMemory[1024 * 1024 * 32];
Compiler::Compiler()
{
{
#ifdef _WIN32
SYSTEM_INFO sysInfo;
GetSystemInfo(&sysInfo);
u64 pageSize = (u64)sysInfo.dwPageSize;
#else
u64 pageSize = sysconf(_SC_PAGE_SIZE);
#endif
u8* pageAligned = (u8*)(((u64)CodeMemory & ~(pageSize - 1)) + pageSize);
u64 alignedSize = (((u64)CodeMemory + sizeof(CodeMemory)) & ~(pageSize - 1)) - (u64)pageAligned;
#ifdef _WIN32
DWORD dummy;
VirtualProtect(pageAligned, alignedSize, PAGE_EXECUTE_READWRITE, &dummy);
#else
mprotect(pageAligned, alignedSize, PROT_EXEC | PROT_READ | PROT_WRITE);
#endif
ResetStart = pageAligned;
CodeMemSize = alignedSize;
}
Reset();
{
// RSCRATCH mode
// RSCRATCH2 reg number
// RSCRATCH3 value in current mode
// ret - RSCRATCH3
ReadBanked = (void*)GetWritableCodePtr();
CMP(32, R(RSCRATCH), Imm8(0x11));
FixupBranch fiq = J_CC(CC_E);
SUB(32, R(RSCRATCH2), Imm8(13 - 8));
FixupBranch notEverything = J_CC(CC_L);
CMP(32, R(RSCRATCH), Imm8(0x12));
FixupBranch irq = J_CC(CC_E);
CMP(32, R(RSCRATCH), Imm8(0x13));
FixupBranch svc = J_CC(CC_E);
CMP(32, R(RSCRATCH), Imm8(0x17));
FixupBranch abt = J_CC(CC_E);
CMP(32, R(RSCRATCH), Imm8(0x1B));
FixupBranch und = J_CC(CC_E);
SetJumpTarget(notEverything);
RET();
SetJumpTarget(fiq);
MOV(32, R(RSCRATCH3), MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_FIQ)));
RET();
SetJumpTarget(irq);
MOV(32, R(RSCRATCH3), MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_IRQ)));
RET();
SetJumpTarget(svc);
MOV(32, R(RSCRATCH3), MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_SVC)));
RET();
SetJumpTarget(abt);
MOV(32, R(RSCRATCH3), MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_ABT)));
RET();
SetJumpTarget(und);
MOV(32, R(RSCRATCH3), MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_UND)));
RET();
}
{
// RSCRATCH mode
// RSCRATCH2 reg n
// RSCRATCH3 value
// carry flag set if the register isn't banked
WriteBanked = (void*)GetWritableCodePtr();
CMP(32, R(RSCRATCH), Imm8(0x11));
FixupBranch fiq = J_CC(CC_E);
SUB(32, R(RSCRATCH2), Imm8(13 - 8));
FixupBranch notEverything = J_CC(CC_L);
CMP(32, R(RSCRATCH), Imm8(0x12));
FixupBranch irq = J_CC(CC_E);
CMP(32, R(RSCRATCH), Imm8(0x13));
FixupBranch svc = J_CC(CC_E);
CMP(32, R(RSCRATCH), Imm8(0x17));
FixupBranch abt = J_CC(CC_E);
CMP(32, R(RSCRATCH), Imm8(0x1B));
FixupBranch und = J_CC(CC_E);
SetJumpTarget(notEverything);
STC();
RET();
SetJumpTarget(fiq);
MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_FIQ)), R(RSCRATCH3));
CLC();
RET();
SetJumpTarget(irq);
MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_IRQ)), R(RSCRATCH3));
CLC();
RET();
SetJumpTarget(svc);
MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_SVC)), R(RSCRATCH3));
CLC();
RET();
SetJumpTarget(abt);
MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_ABT)), R(RSCRATCH3));
CLC();
RET();
SetJumpTarget(und);
MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_UND)), R(RSCRATCH3));
CLC();
RET();
}
for (int consoleType = 0; consoleType < 2; consoleType++)
{
for (int num = 0; num < 2; num++)
{
for (int size = 0; size < 3; size++)
{
for (int reg = 0; reg < 16; reg++)
{
if (reg == RSCRATCH || reg == ABI_PARAM1 || reg == ABI_PARAM2 || reg == ABI_PARAM3)
{
PatchedStoreFuncs[consoleType][num][size][reg] = NULL;
PatchedLoadFuncs[consoleType][num][size][0][reg] = NULL;
PatchedLoadFuncs[consoleType][num][size][1][reg] = NULL;
continue;
}
X64Reg rdMapped = (X64Reg)reg;
PatchedStoreFuncs[consoleType][num][size][reg] = GetWritableCodePtr();
if (RSCRATCH3 != ABI_PARAM1)
MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
if (num == 0)
{
MOV(64, R(ABI_PARAM2), R(RCPU));
MOV(32, R(ABI_PARAM3), R(rdMapped));
}
else
{
MOV(32, R(ABI_PARAM2), R(rdMapped));
}
ABI_PushRegistersAndAdjustStack(CallerSavedPushRegs, 8);
if (consoleType == 0)
{
switch ((8 << size) | num)
{
case 32: ABI_CallFunction(SlowWrite9<u32, 0>); break;
case 33: ABI_CallFunction(SlowWrite7<u32, 0>); break;
case 16: ABI_CallFunction(SlowWrite9<u16, 0>); break;
case 17: ABI_CallFunction(SlowWrite7<u16, 0>); break;
case 8: ABI_CallFunction(SlowWrite9<u8, 0>); break;
case 9: ABI_CallFunction(SlowWrite7<u8, 0>); break;
}
}
else
{
switch ((8 << size) | num)
{
case 32: ABI_CallFunction(SlowWrite9<u32, 1>); break;
case 33: ABI_CallFunction(SlowWrite7<u32, 1>); break;
case 16: ABI_CallFunction(SlowWrite9<u16, 1>); break;
case 17: ABI_CallFunction(SlowWrite7<u16, 1>); break;
case 8: ABI_CallFunction(SlowWrite9<u8, 1>); break;
case 9: ABI_CallFunction(SlowWrite7<u8, 1>); break;
}
}
ABI_PopRegistersAndAdjustStack(CallerSavedPushRegs, 8);
RET();
for (int signextend = 0; signextend < 2; signextend++)
{
PatchedLoadFuncs[consoleType][num][size][signextend][reg] = GetWritableCodePtr();
if (RSCRATCH3 != ABI_PARAM1)
MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
if (num == 0)
MOV(64, R(ABI_PARAM2), R(RCPU));
ABI_PushRegistersAndAdjustStack(CallerSavedPushRegs, 8);
if (consoleType == 0)
{
switch ((8 << size) | num)
{
case 32: ABI_CallFunction(SlowRead9<u32, 0>); break;
case 33: ABI_CallFunction(SlowRead7<u32, 0>); break;
case 16: ABI_CallFunction(SlowRead9<u16, 0>); break;
case 17: ABI_CallFunction(SlowRead7<u16, 0>); break;
case 8: ABI_CallFunction(SlowRead9<u8, 0>); break;
case 9: ABI_CallFunction(SlowRead7<u8, 0>); break;
}
}
else
{
switch ((8 << size) | num)
{
case 32: ABI_CallFunction(SlowRead9<u32, 1>); break;
case 33: ABI_CallFunction(SlowRead7<u32, 1>); break;
case 16: ABI_CallFunction(SlowRead9<u16, 1>); break;
case 17: ABI_CallFunction(SlowRead7<u16, 1>); break;
case 8: ABI_CallFunction(SlowRead9<u8, 1>); break;
case 9: ABI_CallFunction(SlowRead7<u8, 1>); break;
}
}
ABI_PopRegistersAndAdjustStack(CallerSavedPushRegs, 8);
if (signextend)
MOVSX(32, 8 << size, rdMapped, R(RSCRATCH));
else
MOVZX(32, 8 << size, rdMapped, R(RSCRATCH));
RET();
}
}
}
}
}
// move the region forward to prevent overwriting the generated functions
CodeMemSize -= GetWritableCodePtr() - ResetStart;
ResetStart = GetWritableCodePtr();
NearStart = ResetStart;
FarStart = ResetStart + 1024*1024*24;
NearSize = FarStart - ResetStart;
FarSize = (ResetStart + CodeMemSize) - FarStart;
}
void Compiler::LoadCPSR()
{
assert(!CPSRDirty);
MOV(32, R(RCPSR), MDisp(RCPU, offsetof(ARM, CPSR)));
}
void Compiler::SaveCPSR(bool flagClean)
{
if (CPSRDirty)
{
MOV(32, MDisp(RCPU, offsetof(ARM, CPSR)), R(RCPSR));
if (flagClean)
CPSRDirty = false;
}
}
void Compiler::LoadReg(int reg, X64Reg nativeReg)
{
if (reg != 15)
MOV(32, R(nativeReg), MDisp(RCPU, offsetof(ARM, R[reg])));
else
MOV(32, R(nativeReg), Imm32(R15));
}
void Compiler::SaveReg(int reg, X64Reg nativeReg)
{
MOV(32, MDisp(RCPU, offsetof(ARM, R[reg])), R(nativeReg));
}
// invalidates RSCRATCH and RSCRATCH3
Gen::FixupBranch Compiler::CheckCondition(u32 cond)
{
// hack, ldm/stm can get really big TODO: make this better
bool ldmStm = !Thumb &&
(CurInstr.Info.Kind == ARMInstrInfo::ak_LDM || CurInstr.Info.Kind == ARMInstrInfo::ak_STM);
if (cond >= 0x8)
{
static_assert(RSCRATCH3 == ECX, "RSCRATCH has to be equal to ECX!");
MOV(32, R(RSCRATCH3), R(RCPSR));
SHR(32, R(RSCRATCH3), Imm8(28));
MOV(32, R(RSCRATCH), Imm32(1));
SHL(32, R(RSCRATCH), R(RSCRATCH3));
TEST(32, R(RSCRATCH), Imm32(ARM::ConditionTable[cond]));
return J_CC(CC_Z, ldmStm);
}
else
{
// could have used a LUT, but then where would be the fun?
TEST(32, R(RCPSR), Imm32(1 << (28 + ((~(cond >> 1) & 1) << 1 | (cond >> 2 & 1) ^ (cond >> 1 & 1)))));
return J_CC(cond & 1 ? CC_NZ : CC_Z, ldmStm);
}
}
#define F(x) &Compiler::x
const Compiler::CompileFunc A_Comp[ARMInstrInfo::ak_Count] =
{
// AND
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
// EOR
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
// SUB
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
// RSB
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
// ADD
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
// ADC
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
// SBC
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
// RSC
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
// ORR
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
// MOV
F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp),
F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp),
// BIC
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
// MVN
F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp),
F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp),
// TST
F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp),
// TEQ
F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp),
// CMP
F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp),
// CMN
F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp),
// Mul
F(A_Comp_MUL_MLA), F(A_Comp_MUL_MLA), F(A_Comp_Mul_Long), F(A_Comp_Mul_Long), F(A_Comp_Mul_Long), F(A_Comp_Mul_Long), NULL, NULL, NULL, NULL, NULL,
// ARMv5 stuff
F(A_Comp_CLZ), NULL, NULL, NULL, NULL,
// STR
F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB),
// STRB
F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB),
// LDR
F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB),
// LDRB
F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB),
// STRH
F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf),
// LDRD, STRD never used by anything so they stay interpreted (by anything I mean the 5 games I checked)
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
// LDRH
F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf),
// LDRSB
F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf),
// LDRSH
F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf),
// swap
NULL, NULL,
// LDM/STM
F(A_Comp_LDM_STM), F(A_Comp_LDM_STM),
// Branch
F(A_Comp_BranchImm), F(A_Comp_BranchImm), F(A_Comp_BranchImm), F(A_Comp_BranchXchangeReg), F(A_Comp_BranchXchangeReg),
// system stuff
NULL, F(A_Comp_MSR), F(A_Comp_MSR), F(A_Comp_MRS), NULL, NULL, NULL,
F(Nop)
};
const Compiler::CompileFunc T_Comp[ARMInstrInfo::tk_Count] = {
// Shift imm
F(T_Comp_ShiftImm), F(T_Comp_ShiftImm), F(T_Comp_ShiftImm),
// Three operand ADD/SUB
F(T_Comp_AddSub_), F(T_Comp_AddSub_), F(T_Comp_AddSub_), F(T_Comp_AddSub_),
// 8 bit imm
F(T_Comp_ALU_Imm8), F(T_Comp_ALU_Imm8), F(T_Comp_ALU_Imm8), F(T_Comp_ALU_Imm8),
// general ALU
F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU),
F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU),
F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU),
F(T_Comp_ALU), F(T_Comp_MUL), F(T_Comp_ALU), F(T_Comp_ALU),
// hi reg
F(T_Comp_ALU_HiReg), F(T_Comp_ALU_HiReg), F(T_Comp_ALU_HiReg),
// pc/sp relative
F(T_Comp_RelAddr), F(T_Comp_RelAddr), F(T_Comp_AddSP),
// LDR pcrel
F(T_Comp_LoadPCRel),
// LDR/STR reg offset
F(T_Comp_MemReg), F(T_Comp_MemReg), F(T_Comp_MemReg), F(T_Comp_MemReg),
// LDR/STR sign extended, half
F(T_Comp_MemRegHalf), F(T_Comp_MemRegHalf), F(T_Comp_MemRegHalf), F(T_Comp_MemRegHalf),
// LDR/STR imm offset
F(T_Comp_MemImm), F(T_Comp_MemImm), F(T_Comp_MemImm), F(T_Comp_MemImm),
// LDR/STR half imm offset
F(T_Comp_MemImmHalf), F(T_Comp_MemImmHalf),
// LDR/STR sp rel
F(T_Comp_MemSPRel), F(T_Comp_MemSPRel),
// PUSH/POP
F(T_Comp_PUSH_POP), F(T_Comp_PUSH_POP),
// LDMIA, STMIA
F(T_Comp_LDMIA_STMIA), F(T_Comp_LDMIA_STMIA),
// Branch
F(T_Comp_BCOND), F(T_Comp_BranchXchangeReg), F(T_Comp_BranchXchangeReg), F(T_Comp_B), F(T_Comp_BL_LONG_1), F(T_Comp_BL_LONG_2),
// Unk, SVC
NULL, NULL,
F(T_Comp_BL_Merged)
};
#undef F
bool Compiler::CanCompile(bool thumb, u16 kind)
{
return (thumb ? T_Comp[kind] : A_Comp[kind]) != NULL;
}
void Compiler::Reset()
{
memset(ResetStart, 0xcc, CodeMemSize);
SetCodePtr(ResetStart);
NearCode = NearStart;
FarCode = FarStart;
LoadStorePatches.clear();
}
bool Compiler::IsJITFault(u64 addr)
{
return addr >= (u64)CodeMemory && addr < (u64)CodeMemory + sizeof(CodeMemory);
}
void Compiler::Comp_SpecialBranchBehaviour(bool taken)
{
if (taken && CurInstr.BranchFlags & branch_IdleBranch)
OR(8, MDisp(RCPU, offsetof(ARM, IdleLoop)), Imm8(0x1));
if ((CurInstr.BranchFlags & branch_FollowCondNotTaken && taken)
|| (CurInstr.BranchFlags & branch_FollowCondTaken && !taken))
{
RegCache.PrepareExit();
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles));
JMP((u8*)&ARM_Ret, true);
}
}
JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount)
{
if (NearSize - (NearCode - NearStart) < 1024 * 32) // guess...
{
printf("near reset\n");
ResetBlockCache();
}
if (FarSize - (FarCode - FarStart) < 1024 * 32) // guess...
{
printf("far reset\n");
ResetBlockCache();
}
ConstantCycles = 0;
Thumb = thumb;
Num = cpu->Num;
CodeRegion = instrs[0].Addr >> 24;
CurCPU = cpu;
// CPSR might have been modified in a previous block
CPSRDirty = false;
JitBlockEntry res = (JitBlockEntry)GetWritableCodePtr();
RegCache = RegisterCache<Compiler, X64Reg>(this, instrs, instrsCount);
for (int i = 0; i < instrsCount; i++)
{
CurInstr = instrs[i];
R15 = CurInstr.Addr + (Thumb ? 4 : 8);
CodeRegion = R15 >> 24;
Exit = i == instrsCount - 1 || (CurInstr.BranchFlags & branch_FollowCondNotTaken);
CompileFunc comp = Thumb
? T_Comp[CurInstr.Info.Kind]
: A_Comp[CurInstr.Info.Kind];
bool isConditional = Thumb ? CurInstr.Info.Kind == ARMInstrInfo::tk_BCOND : CurInstr.Cond() < 0xE;
if (comp == NULL || (CurInstr.BranchFlags & branch_FollowCondTaken) || (i == instrsCount - 1 && (!CurInstr.Info.Branches() || isConditional)))
{
MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(R15));
if (comp == NULL)
{
MOV(32, MDisp(RCPU, offsetof(ARM, CodeCycles)), Imm32(CurInstr.CodeCycles));
MOV(32, MDisp(RCPU, offsetof(ARM, CurInstr)), Imm32(CurInstr.Instr));
SaveCPSR();
}
}
if (comp != NULL)
RegCache.Prepare(Thumb, i);
else
RegCache.Flush();
if (Thumb)
{
if (comp == NULL)
{
MOV(64, R(ABI_PARAM1), R(RCPU));
ABI_CallFunction(InterpretTHUMB[CurInstr.Info.Kind]);
}
else
(this->*comp)();
}
else
{
u32 cond = CurInstr.Cond();
if (CurInstr.Info.Kind == ARMInstrInfo::ak_BLX_IMM)
{
if (comp)
(this->*comp)();
else
{
MOV(64, R(ABI_PARAM1), R(RCPU));
ABI_CallFunction(ARMInterpreter::A_BLX_IMM);
}
}
else if (cond == 0xF)
{
Comp_AddCycles_C();
}
else
{
IrregularCycles = false;
FixupBranch skipExecute;
if (cond < 0xE)
skipExecute = CheckCondition(cond);
if (comp == NULL)
{
MOV(64, R(ABI_PARAM1), R(RCPU));
ABI_CallFunction(InterpretARM[CurInstr.Info.Kind]);
}
else
(this->*comp)();
Comp_SpecialBranchBehaviour(true);
if (CurInstr.Cond() < 0xE)
{
if (IrregularCycles || (CurInstr.BranchFlags & branch_FollowCondTaken))
{
FixupBranch skipFailed = J();
SetJumpTarget(skipExecute);
Comp_AddCycles_C(true);
Comp_SpecialBranchBehaviour(false);
SetJumpTarget(skipFailed);
}
else
SetJumpTarget(skipExecute);
}
}
}
if (comp == NULL)
LoadCPSR();
}
RegCache.Flush();
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles));
JMP((u8*)ARM_Ret, true);
/*FILE* codeout = fopen("codeout", "a");
fprintf(codeout, "beginning block argargarg__ %x!!!", instrs[0].Addr);
fwrite((u8*)res, GetWritableCodePtr() - (u8*)res, 1, codeout);
fclose(codeout);*/
return res;
}
void Compiler::Comp_AddCycles_C(bool forceNonConstant)
{
s32 cycles = Num ?
NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 1 : 3]
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles);
if ((!Thumb && CurInstr.Cond() < 0xE) || forceNonConstant)
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
else
ConstantCycles += cycles;
}
void Compiler::Comp_AddCycles_CI(u32 i)
{
s32 cycles = (Num ?
NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + i;
if (!Thumb && CurInstr.Cond() < 0xE)
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
else
ConstantCycles += cycles;
}
void Compiler::Comp_AddCycles_CI(Gen::X64Reg i, int add)
{
s32 cycles = Num ?
NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles);
if (!Thumb && CurInstr.Cond() < 0xE)
{
LEA(32, RSCRATCH, MDisp(i, add + cycles));
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(RSCRATCH));
}
else
{
ConstantCycles += cycles;
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(i));
}
}
void Compiler::Comp_AddCycles_CDI()
{
if (Num == 0)
Comp_AddCycles_CD();
else
{
IrregularCycles = true;
s32 cycles;
s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
s32 numD = CurInstr.DataCycles;
if ((CurInstr.DataRegion >> 24) == 0x02) // mainRAM
{
if (CodeRegion == 0x02)
cycles = numC + numD;
else
{
numC++;
cycles = std::max(numC + numD - 3, std::max(numC, numD));
}
}
else if (CodeRegion == 0x02)
{
numD++;
cycles = std::max(numC + numD - 3, std::max(numC, numD));
}
else
{
cycles = numC + numD + 1;
}
if (!Thumb && CurInstr.Cond() < 0xE)
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
else
ConstantCycles += cycles;
}
}
void Compiler::Comp_AddCycles_CD()
{
u32 cycles = 0;
if (Num == 0)
{
s32 numC = (R15 & 0x2) ? 0 : CurInstr.CodeCycles;
s32 numD = CurInstr.DataCycles;
//if (DataRegion != CodeRegion)
cycles = std::max(numC + numD - 6, std::max(numC, numD));
IrregularCycles = cycles != numC;
}
else
{
s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
s32 numD = CurInstr.DataCycles;
if ((CurInstr.DataRegion >> 4) == 0x02)
{
if (CodeRegion == 0x02)
cycles += numC + numD;
else
cycles += std::max(numC + numD - 3, std::max(numC, numD));
}
else if (CodeRegion == 0x02)
{
cycles += std::max(numC + numD - 3, std::max(numC, numD));
}
else
{
cycles += numC + numD;
}
IrregularCycles = true;
}
if (IrregularCycles && !Thumb && CurInstr.Cond() < 0xE)
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
else
ConstantCycles += cycles;
}
}

View File

@ -0,0 +1,255 @@
#ifndef ARMJIT_COMPILER_H
#define ARMJIT_COMPILER_H
#include "../dolphin/x64Emitter.h"
#include "../ARMJIT.h"
#include "../ARMJIT_Internal.h"
#include "../ARMJIT_RegisterCache.h"
#include <unordered_map>
namespace ARMJIT
{
const Gen::X64Reg RCPU = Gen::RBP;
const Gen::X64Reg RCPSR = Gen::R15;
const Gen::X64Reg RSCRATCH = Gen::EAX;
const Gen::X64Reg RSCRATCH2 = Gen::EDX;
const Gen::X64Reg RSCRATCH3 = Gen::ECX;
const Gen::X64Reg RSCRATCH4 = Gen::R8;
struct LoadStorePatch
{
void* PatchFunc;
s16 Offset;
u16 Size;
};
struct Op2
{
Op2()
{}
Op2(u32 imm)
: IsImm(true), Imm(imm)
{}
Op2(int reg, int op, int amount)
: IsImm(false)
{
Reg.Reg = reg;
Reg.Op = op;
Reg.Amount = amount;
}
bool IsImm;
union
{
struct
{
int Reg, Op, Amount;
} Reg;
u32 Imm;
};
};
class Compiler : public Gen::XEmitter
{
public:
Compiler();
void Reset();
JitBlockEntry CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount);
void LoadReg(int reg, Gen::X64Reg nativeReg);
void SaveReg(int reg, Gen::X64Reg nativeReg);
bool CanCompile(bool thumb, u16 kind);
typedef void (Compiler::*CompileFunc)();
void Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR = false);
void Comp_JumpTo(u32 addr, bool forceNonConstantCycles = false);
void Comp_AddCycles_C(bool forceNonConstant = false);
void Comp_AddCycles_CI(u32 i);
void Comp_AddCycles_CI(Gen::X64Reg i, int add);
void Comp_AddCycles_CDI();
void Comp_AddCycles_CD();
enum
{
opSetsFlags = 1 << 0,
opSymmetric = 1 << 1,
opRetriveCV = 1 << 2,
opInvertCarry = 1 << 3,
opSyncCarry = 1 << 4,
opInvertOp2 = 1 << 5,
};
void Nop() {}
void A_Comp_Arith();
void A_Comp_MovOp();
void A_Comp_CmpOp();
void A_Comp_MUL_MLA();
void A_Comp_Mul_Long();
void A_Comp_CLZ();
void A_Comp_MemWB();
void A_Comp_MemHalf();
void A_Comp_LDM_STM();
void A_Comp_BranchImm();
void A_Comp_BranchXchangeReg();
void A_Comp_MRS();
void A_Comp_MSR();
void T_Comp_ShiftImm();
void T_Comp_AddSub_();
void T_Comp_ALU_Imm8();
void T_Comp_ALU();
void T_Comp_ALU_HiReg();
void T_Comp_MUL();
void T_Comp_RelAddr();
void T_Comp_AddSP();
void T_Comp_MemReg();
void T_Comp_MemImm();
void T_Comp_MemRegHalf();
void T_Comp_MemImmHalf();
void T_Comp_LoadPCRel();
void T_Comp_MemSPRel();
void T_Comp_PUSH_POP();
void T_Comp_LDMIA_STMIA();
void T_Comp_BCOND();
void T_Comp_B();
void T_Comp_BranchXchangeReg();
void T_Comp_BL_LONG_1();
void T_Comp_BL_LONG_2();
void T_Comp_BL_Merged();
enum
{
memop_Writeback = 1 << 0,
memop_Post = 1 << 1,
memop_SignExtend = 1 << 2,
memop_Store = 1 << 3,
memop_SubtractOffset = 1 << 4
};
void Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flags);
s32 Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode);
bool Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr);
void Comp_ArithTriOp(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&),
Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags);
void Comp_ArithTriOpReverse(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&),
Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags);
void Comp_CmpOp(int op, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed);
void Comp_MulOp(bool S, bool add, Gen::OpArg rd, Gen::OpArg rm, Gen::OpArg rs, Gen::OpArg rn);
void Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed);
void Comp_SpecialBranchBehaviour(bool taken);
Gen::OpArg Comp_RegShiftImm(int op, int amount, Gen::OpArg rm, bool S, bool& carryUsed);
Gen::OpArg Comp_RegShiftReg(int op, Gen::OpArg rs, Gen::OpArg rm, bool S, bool& carryUsed);
Gen::OpArg A_Comp_GetALUOp2(bool S, bool& carryUsed);
void LoadCPSR();
void SaveCPSR(bool flagClean = true);
bool FlagsNZRequired()
{ return CurInstr.SetFlags & 0xC; }
Gen::FixupBranch CheckCondition(u32 cond);
void PushRegs(bool saveHiRegs);
void PopRegs(bool saveHiRegs);
Gen::OpArg MapReg(int reg)
{
if (reg == 15 && RegCache.Mapping[reg] == Gen::INVALID_REG)
return Gen::Imm32(R15);
assert(RegCache.Mapping[reg] != Gen::INVALID_REG);
return Gen::R(RegCache.Mapping[reg]);
}
JitBlockEntry AddEntryOffset(u32 offset)
{
return (JitBlockEntry)(ResetStart + offset);
}
u32 SubEntryOffset(JitBlockEntry entry)
{
return (u8*)entry - ResetStart;
}
void SwitchToNearCode()
{
FarCode = GetWritableCodePtr();
SetCodePtr(NearCode);
}
void SwitchToFarCode()
{
NearCode = GetWritableCodePtr();
SetCodePtr(FarCode);
}
bool IsJITFault(u64 addr);
s32 RewriteMemAccess(u64 pc);
u8* FarCode;
u8* NearCode;
u32 FarSize;
u32 NearSize;
u8* NearStart;
u8* FarStart;
void* PatchedStoreFuncs[2][2][3][16];
void* PatchedLoadFuncs[2][2][3][2][16];
std::unordered_map<u8*, LoadStorePatch> LoadStorePatches;
u8* ResetStart;
u32 CodeMemSize;
bool Exit;
bool IrregularCycles;
void* ReadBanked;
void* WriteBanked;
bool CPSRDirty = false;
FetchedInstr CurInstr;
RegisterCache<Compiler, Gen::X64Reg> RegCache;
bool Thumb;
u32 Num;
u32 R15;
u32 CodeRegion;
u32 ConstantCycles;
ARM* CurCPU;
};
}
#endif

View File

@ -0,0 +1,15 @@
#include "../ARM.h"
int main(int argc, char* argv[])
{
FILE* f = fopen("ARMJIT_Offsets.h", "w");
#define writeOffset(field) \
fprintf(f, "#define ARM_" #field "_offset 0x%x\n", offsetof(ARM, field))
writeOffset(CPSR);
writeOffset(Cycles);
writeOffset(StopExecution);
fclose(f);
return 0;
}

View File

@ -0,0 +1,78 @@
.intel_syntax noprefix
#include "ARMJIT_Offsets.h"
.text
#define RCPU rbp
#define RCPSR r15d
#ifdef WIN64
#define ARG1_REG ecx
#define ARG2_REG edx
#define ARG3_REG r8d
#define ARG4_REG r9d
#define ARG1_REG64 rcx
#define ARG2_REG64 rdx
#define ARG3_REG64 r8
#define ARG4_REG64 r9
#else
#define ARG1_REG edi
#define ARG2_REG esi
#define ARG3_REG edx
#define ARG4_REG ecx
#define ARG1_REG64 rdi
#define ARG2_REG64 rsi
#define ARG3_REG64 rdx
#define ARG4_REG64 rcx
#endif
.p2align 4,,15
.global ARM_Dispatch
ARM_Dispatch:
#ifdef WIN64
push rdi
push rsi
#endif
push rbx
push r12
push r13
push r14
push r15
push rbp
#ifdef WIN64
sub rsp, 0x28
#else
sub rsp, 0x8
#endif
mov RCPU, ARG1_REG64
mov RCPSR, [RCPU + ARM_CPSR_offset]
jmp ARG2_REG64
.p2align 4,,15
.global ARM_Ret
ARM_Ret:
mov [RCPU + ARM_CPSR_offset], RCPSR
#ifdef WIN64
add rsp, 0x28
#else
add rsp, 0x8
#endif
pop rbp
pop r15
pop r14
pop r13
pop r12
pop rbx
#ifdef WIN64
pop rsi
pop rdi
#endif
ret

View File

@ -0,0 +1,773 @@
#include "ARMJIT_Compiler.h"
#include "../Config.h"
using namespace Gen;
namespace ARMJIT
{
template <typename T>
int squeezePointer(T* ptr)
{
int truncated = (int)((u64)ptr);
assert((T*)((u64)truncated) == ptr);
return truncated;
}
s32 Compiler::RewriteMemAccess(u64 pc)
{
auto it = LoadStorePatches.find((u8*)pc);
if (it != LoadStorePatches.end())
{
LoadStorePatch patch = it->second;
LoadStorePatches.erase(it);
u8* curCodePtr = GetWritableCodePtr();
u8* rewritePtr = (u8*)pc + (ptrdiff_t)patch.Offset;
SetCodePtr(rewritePtr);
CALL(patch.PatchFunc);
u32 remainingSize = patch.Size - (GetWritableCodePtr() - rewritePtr);
if (remainingSize > 0)
NOP(remainingSize);
//printf("rewriting memory access %p %d %d\n", patch.PatchFunc, patch.Offset, patch.Size);
SetCodePtr(curCodePtr);
return patch.Offset;
}
printf("this is a JIT bug %x\n", pc);
abort();
}
/*
According to DeSmuME and my own research, approx. 99% (seriously, that's an empirical number)
of all memory load and store instructions always access addresses in the same region as
during the their first execution.
I tried multiple optimisations, which would benefit from this behaviour
(having fast paths for the first region, ), though none of them yielded a measureable
improvement.
*/
bool Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr)
{
u32 localAddr = LocaliseCodeAddress(Num, addr);
int invalidLiteralIdx = InvalidLiterals.Find(localAddr);
if (invalidLiteralIdx != -1)
{
InvalidLiterals.Remove(invalidLiteralIdx);
return false;
}
Comp_AddCycles_CDI();
u32 val;
// make sure arm7 bios is accessible
u32 tmpR15 = CurCPU->R[15];
CurCPU->R[15] = R15;
if (size == 32)
{
CurCPU->DataRead32(addr & ~0x3, &val);
val = ROR(val, (addr & 0x3) << 3);
}
else if (size == 16)
{
CurCPU->DataRead16(addr & ~0x1, &val);
if (signExtend)
val = ((s32)val << 16) >> 16;
}
else
{
CurCPU->DataRead8(addr, &val);
if (signExtend)
val = ((s32)val << 24) >> 24;
}
CurCPU->R[15] = tmpR15;
MOV(32, MapReg(rd), Imm32(val));
if (Thumb || CurInstr.Cond() == 0xE)
RegCache.PutLiteral(rd, val);
return true;
}
void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flags)
{
u32 addressMask = ~0;
if (size == 32)
addressMask = ~3;
if (size == 16)
addressMask = ~1;
if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && op2.IsImm && !(flags & (memop_Post|memop_Store|memop_Writeback)))
{
u32 addr = R15 + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
if (Comp_MemLoadLiteral(size, flags & memop_SignExtend, rd, addr))
return;
}
if (flags & memop_Store)
{
Comp_AddCycles_CD();
}
else
{
Comp_AddCycles_CDI();
}
bool addrIsStatic = Config::JIT_LiteralOptimisations
&& RegCache.IsLiteral(rn) && op2.IsImm && !(flags & (memop_Writeback|memop_Post));
u32 staticAddress;
if (addrIsStatic)
staticAddress = RegCache.LiteralValues[rn] + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
OpArg rdMapped = MapReg(rd);
OpArg rnMapped = MapReg(rn);
if (Thumb && rn == 15)
rnMapped = Imm32(R15 & ~0x2);
X64Reg finalAddr = RSCRATCH3;
if (flags & memop_Post)
{
MOV(32, R(RSCRATCH3), rnMapped);
finalAddr = rnMapped.GetSimpleReg();
}
if (op2.IsImm)
{
MOV_sum(32, finalAddr, rnMapped, Imm32(op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1)));
}
else
{
OpArg rm = MapReg(op2.Reg.Reg);
if (!(flags & memop_SubtractOffset) && rm.IsSimpleReg() && rnMapped.IsSimpleReg()
&& op2.Reg.Op == 0 && op2.Reg.Amount > 0 && op2.Reg.Amount <= 3)
{
LEA(32, finalAddr,
MComplex(rnMapped.GetSimpleReg(), rm.GetSimpleReg(), 1 << op2.Reg.Amount, 0));
}
else
{
bool throwAway;
OpArg offset =
Comp_RegShiftImm(op2.Reg.Op, op2.Reg.Amount, rm, false, throwAway);
if (flags & memop_SubtractOffset)
{
if (R(finalAddr) != rnMapped)
MOV(32, R(finalAddr), rnMapped);
if (!offset.IsZero())
SUB(32, R(finalAddr), offset);
}
else
MOV_sum(32, finalAddr, rnMapped, offset);
}
}
if ((flags & memop_Writeback) && !(flags & memop_Post))
MOV(32, rnMapped, R(finalAddr));
u32 expectedTarget = Num == 0
? ARMJIT_Memory::ClassifyAddress9(CurInstr.DataRegion)
: ARMJIT_Memory::ClassifyAddress7(CurInstr.DataRegion);
if (Config::JIT_FastMemory && ((!Thumb && CurInstr.Cond() != 0xE) || ARMJIT_Memory::IsFastmemCompatible(expectedTarget)))
{
u8* memopStart = GetWritableCodePtr();
LoadStorePatch patch;
patch.PatchFunc = flags & memop_Store
? PatchedStoreFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][rdMapped.GetSimpleReg()]
: PatchedLoadFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped.GetSimpleReg()];
assert(patch.PatchFunc != NULL);
MOV(64, R(RSCRATCH), ImmPtr(Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start));
X64Reg maskedAddr = RSCRATCH3;
if (size > 8)
{
maskedAddr = RSCRATCH2;
MOV(32, R(RSCRATCH2), R(RSCRATCH3));
AND(32, R(RSCRATCH2), Imm8(addressMask));
}
u8* memopLoadStoreLocation = GetWritableCodePtr();
if (flags & memop_Store)
{
MOV(size, MRegSum(RSCRATCH, maskedAddr), rdMapped);
}
else
{
if (flags & memop_SignExtend)
MOVSX(32, size, rdMapped.GetSimpleReg(), MRegSum(RSCRATCH, maskedAddr));
else
MOVZX(32, size, rdMapped.GetSimpleReg(), MRegSum(RSCRATCH, maskedAddr));
if (size == 32)
{
AND(32, R(RSCRATCH3), Imm8(0x3));
SHL(32, R(RSCRATCH3), Imm8(3));
ROR_(32, rdMapped, R(RSCRATCH3));
}
}
patch.Offset = memopStart - memopLoadStoreLocation;
patch.Size = GetWritableCodePtr() - memopStart;
assert(patch.Size >= 5);
LoadStorePatches[memopLoadStoreLocation] = patch;
}
else
{
PushRegs(false);
if (Num == 0)
{
MOV(64, R(ABI_PARAM2), R(RCPU));
if (ABI_PARAM1 != RSCRATCH3)
MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
if (flags & memop_Store)
{
MOV(32, R(ABI_PARAM3), rdMapped);
switch (size | NDS::ConsoleType)
{
case 32: CALL((void*)&SlowWrite9<u32, 0>); break;
case 16: CALL((void*)&SlowWrite9<u16, 0>); break;
case 8: CALL((void*)&SlowWrite9<u8, 0>); break;
case 33: CALL((void*)&SlowWrite9<u32, 1>); break;
case 17: CALL((void*)&SlowWrite9<u16, 1>); break;
case 9: CALL((void*)&SlowWrite9<u8, 1>); break;
}
}
else
{
switch (size | NDS::ConsoleType)
{
case 32: CALL((void*)&SlowRead9<u32, 0>); break;
case 16: CALL((void*)&SlowRead9<u16, 0>); break;
case 8: CALL((void*)&SlowRead9<u8, 0>); break;
case 33: CALL((void*)&SlowRead9<u32, 1>); break;
case 17: CALL((void*)&SlowRead9<u16, 1>); break;
case 9: CALL((void*)&SlowRead9<u8, 1>); break;
}
}
}
else
{
if (ABI_PARAM1 != RSCRATCH3)
MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
if (flags & memop_Store)
{
MOV(32, R(ABI_PARAM2), rdMapped);
switch (size | NDS::ConsoleType)
{
case 32: CALL((void*)&SlowWrite7<u32, 0>); break;
case 16: CALL((void*)&SlowWrite7<u16, 0>); break;
case 8: CALL((void*)&SlowWrite7<u8, 0>); break;
case 33: CALL((void*)&SlowWrite7<u32, 1>); break;
case 17: CALL((void*)&SlowWrite7<u16, 1>); break;
case 9: CALL((void*)&SlowWrite7<u8, 1>); break;
}
}
else
{
switch (size | NDS::ConsoleType)
{
case 32: CALL((void*)&SlowRead7<u32, 0>); break;
case 16: CALL((void*)&SlowRead7<u16, 0>); break;
case 8: CALL((void*)&SlowRead7<u8, 0>); break;
case 33: CALL((void*)&SlowRead7<u32, 1>); break;
case 17: CALL((void*)&SlowRead7<u16, 1>); break;
case 9: CALL((void*)&SlowRead7<u8, 1>); break;
}
}
}
PopRegs(false);
if (!(flags & memop_Store))
{
if (flags & memop_SignExtend)
MOVSX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
else
MOVZX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
}
}
if (!(flags & memop_Store) && rd == 15)
{
if (size < 32)
printf("!!! LDR <32 bit PC %08X %x\n", R15, CurInstr.Instr);
{
if (Num == 1)
{
if (Thumb)
OR(32, rdMapped, Imm8(0x1));
else
AND(32, rdMapped, Imm8(0xFE));
}
Comp_JumpTo(rdMapped.GetSimpleReg());
}
}
}
s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode)
{
int regsCount = regs.Count();
if (regsCount == 0)
return 0; // actually not the right behaviour TODO: fix me
if (regsCount == 1 && !usermode && RegCache.LoadedRegs & (1 << *regs.begin()))
{
int flags = 0;
if (store)
flags |= memop_Store;
if (decrement && preinc)
flags |= memop_SubtractOffset;
Op2 offset = preinc ? Op2(4) : Op2(0);
Comp_MemAccess(*regs.begin(), rn, offset, 32, flags);
return decrement ? -4 : 4;
}
s32 offset = (regsCount * 4) * (decrement ? -1 : 1);
int expectedTarget = Num == 0
? ARMJIT_Memory::ClassifyAddress9(CurInstr.DataRegion)
: ARMJIT_Memory::ClassifyAddress7(CurInstr.DataRegion);
if (!store)
Comp_AddCycles_CDI();
else
Comp_AddCycles_CD();
bool compileFastPath = Config::JIT_FastMemory
&& !usermode && (CurInstr.Cond() < 0xE || ARMJIT_Memory::IsFastmemCompatible(expectedTarget));
// we need to make sure that the stack stays aligned to 16 bytes
#ifdef _WIN32
// include shadow
u32 stackAlloc = (((regsCount + 4 + 1) & ~1) + (compileFastPath ? 1 : 0)) * 8;
#else
u32 stackAlloc = (((regsCount + 1) & ~1) + (compileFastPath ? 1 : 0)) * 8;
#endif
u32 allocOffset = stackAlloc - regsCount * 8;
if (decrement)
MOV_sum(32, RSCRATCH4, MapReg(rn), Imm32(-regsCount * 4 + (preinc ? 0 : 4)));
else
MOV_sum(32, RSCRATCH4, MapReg(rn), Imm32(preinc ? 4 : 0));
if (compileFastPath)
{
AND(32, R(RSCRATCH4), Imm8(~3));
u8* fastPathStart = GetWritableCodePtr();
u8* firstLoadStoreAddr;
bool firstLoadStore = true;
MOV(64, R(RSCRATCH2), ImmPtr(Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start));
ADD(64, R(RSCRATCH2), R(RSCRATCH4));
MOV(32, R(RSCRATCH3), R(RSCRATCH4));
u32 offset = 0;
for (int reg : regs)
{
if (firstLoadStore)
firstLoadStoreAddr = GetWritableCodePtr();
OpArg mem = MDisp(RSCRATCH2, offset);
if (store)
{
if (RegCache.LoadedRegs & (1 << reg))
{
MOV(32, mem, MapReg(reg));
}
else
{
LoadReg(reg, RSCRATCH);
if (firstLoadStore)
firstLoadStoreAddr = GetWritableCodePtr();
MOV(32, mem, R(RSCRATCH));
}
}
else
{
if (RegCache.LoadedRegs & (1 << reg))
{
MOV(32, MapReg(reg), mem);
}
else
{
MOV(32, R(RSCRATCH), mem);
SaveReg(reg, RSCRATCH);
}
}
offset += 4;
firstLoadStore = false;
}
LoadStorePatch patch;
patch.Size = GetWritableCodePtr() - fastPathStart;
patch.Offset = fastPathStart - firstLoadStoreAddr;
SwitchToFarCode();
patch.PatchFunc = GetWritableCodePtr();
LoadStorePatches[firstLoadStoreAddr] = patch;
}
if (!store)
{
PushRegs(false);
MOV(32, R(ABI_PARAM1), R(RSCRATCH4));
MOV(32, R(ABI_PARAM3), Imm32(regsCount));
SUB(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc));
if (allocOffset == 0)
MOV(64, R(ABI_PARAM2), R(RSP));
else
LEA(64, ABI_PARAM2, MDisp(RSP, allocOffset));
if (Num == 0)
MOV(64, R(ABI_PARAM4), R(RCPU));
switch (Num * 2 | NDS::ConsoleType)
{
case 0: CALL((void*)&SlowBlockTransfer9<false, 0>); break;
case 1: CALL((void*)&SlowBlockTransfer9<false, 1>); break;
case 2: CALL((void*)&SlowBlockTransfer7<false, 0>); break;
case 3: CALL((void*)&SlowBlockTransfer7<false, 1>); break;
}
PopRegs(false);
if (allocOffset)
ADD(64, R(RSP), Imm8(allocOffset));
bool firstUserMode = true;
for (int reg : regs)
{
if (usermode && !regs[15] && reg >= 8 && reg < 15)
{
if (firstUserMode)
{
MOV(32, R(RSCRATCH), R(RCPSR));
AND(32, R(RSCRATCH), Imm8(0x1F));
firstUserMode = false;
}
MOV(32, R(RSCRATCH2), Imm32(reg - 8));
POP(RSCRATCH3);
CALL(WriteBanked);
FixupBranch sucessfulWritten = J_CC(CC_NC);
if (RegCache.LoadedRegs & (1 << reg))
MOV(32, R(RegCache.Mapping[reg]), R(RSCRATCH3));
else
SaveReg(reg, RSCRATCH3);
SetJumpTarget(sucessfulWritten);
}
else if (!(RegCache.LoadedRegs & (1 << reg)))
{
assert(reg != 15);
POP(RSCRATCH);
SaveReg(reg, RSCRATCH);
}
else
{
POP(MapReg(reg).GetSimpleReg());
}
}
}
else
{
bool firstUserMode = true;
for (int reg = 15; reg >= 0; reg--)
{
if (regs[reg])
{
if (usermode && reg >= 8 && reg < 15)
{
if (firstUserMode)
{
MOV(32, R(RSCRATCH), R(RCPSR));
AND(32, R(RSCRATCH), Imm8(0x1F));
firstUserMode = false;
}
if (RegCache.Mapping[reg] == INVALID_REG)
LoadReg(reg, RSCRATCH3);
else
MOV(32, R(RSCRATCH3), R(RegCache.Mapping[reg]));
MOV(32, R(RSCRATCH2), Imm32(reg - 8));
CALL(ReadBanked);
PUSH(RSCRATCH3);
}
else if (!(RegCache.LoadedRegs & (1 << reg)))
{
LoadReg(reg, RSCRATCH);
PUSH(RSCRATCH);
}
else
{
PUSH(MapReg(reg).GetSimpleReg());
}
}
}
if (allocOffset)
SUB(64, R(RSP), Imm8(allocOffset));
PushRegs(false);
MOV(32, R(ABI_PARAM1), R(RSCRATCH4));
if (allocOffset)
LEA(64, ABI_PARAM2, MDisp(RSP, allocOffset));
else
MOV(64, R(ABI_PARAM2), R(RSP));
MOV(32, R(ABI_PARAM3), Imm32(regsCount));
if (Num == 0)
MOV(64, R(ABI_PARAM4), R(RCPU));
switch (Num * 2 | NDS::ConsoleType)
{
case 0: CALL((void*)&SlowBlockTransfer9<true, 0>); break;
case 1: CALL((void*)&SlowBlockTransfer9<true, 1>); break;
case 2: CALL((void*)&SlowBlockTransfer7<true, 0>); break;
case 3: CALL((void*)&SlowBlockTransfer7<true, 1>); break;
}
ADD(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc));
PopRegs(false);
}
if (compileFastPath)
{
RET();
SwitchToNearCode();
}
if (!store && regs[15])
{
if (Num == 1)
{
if (Thumb)
OR(32, MapReg(15), Imm8(1));
else
AND(32, MapReg(15), Imm8(0xFE));
}
Comp_JumpTo(MapReg(15).GetSimpleReg(), usermode);
}
return offset;
}
void Compiler::A_Comp_MemWB()
{
bool load = CurInstr.Instr & (1 << 20);
bool byte = CurInstr.Instr & (1 << 22);
int size = byte ? 8 : 32;
int flags = 0;
if (!load)
flags |= memop_Store;
if (!(CurInstr.Instr & (1 << 24)))
flags |= memop_Post;
if (CurInstr.Instr & (1 << 21))
flags |= memop_Writeback;
if (!(CurInstr.Instr & (1 << 23)))
flags |= memop_SubtractOffset;
Op2 offset;
if (!(CurInstr.Instr & (1 << 25)))
{
offset = Op2(CurInstr.Instr & 0xFFF);
}
else
{
int op = (CurInstr.Instr >> 5) & 0x3;
int amount = (CurInstr.Instr >> 7) & 0x1F;
int rm = CurInstr.A_Reg(0);
offset = Op2(rm, op, amount);
}
Comp_MemAccess(CurInstr.A_Reg(12), CurInstr.A_Reg(16), offset, size, flags);
}
void Compiler::A_Comp_MemHalf()
{
Op2 offset = CurInstr.Instr & (1 << 22)
? Op2(CurInstr.Instr & 0xF | ((CurInstr.Instr >> 4) & 0xF0))
: Op2(CurInstr.A_Reg(0), 0, 0);
int op = (CurInstr.Instr >> 5) & 0x3;
bool load = CurInstr.Instr & (1 << 20);
bool signExtend = false;
int size;
if (!load)
{
size = op == 1 ? 16 : 32;
load = op == 2;
}
else if (load)
{
size = op == 2 ? 8 : 16;
signExtend = op > 1;
}
if (size == 32 && Num == 1)
return; // NOP
int flags = 0;
if (signExtend)
flags |= memop_SignExtend;
if (!load)
flags |= memop_Store;
if (!(CurInstr.Instr & (1 << 24)))
flags |= memop_Post;
if (!(CurInstr.Instr & (1 << 23)))
flags |= memop_SubtractOffset;
if (CurInstr.Instr & (1 << 21))
flags |= memop_Writeback;
Comp_MemAccess(CurInstr.A_Reg(12), CurInstr.A_Reg(16), offset, size, flags);
}
void Compiler::T_Comp_MemReg()
{
int op = (CurInstr.Instr >> 10) & 0x3;
bool load = op & 0x2;
bool byte = op & 0x1;
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(CurInstr.T_Reg(6), 0, 0),
byte ? 8 : 32, load ? 0 : memop_Store);
}
void Compiler::A_Comp_LDM_STM()
{
BitSet16 regs(CurInstr.Instr & 0xFFFF);
bool load = CurInstr.Instr & (1 << 20);
bool pre = CurInstr.Instr & (1 << 24);
bool add = CurInstr.Instr & (1 << 23);
bool writeback = CurInstr.Instr & (1 << 21);
bool usermode = CurInstr.Instr & (1 << 22);
OpArg rn = MapReg(CurInstr.A_Reg(16));
s32 offset = Comp_MemAccessBlock(CurInstr.A_Reg(16), regs, !load, pre, !add, usermode);
if (load && writeback && regs[CurInstr.A_Reg(16)])
writeback = Num == 0
? (!(regs & ~BitSet16(1 << CurInstr.A_Reg(16)))) || (regs & ~BitSet16((2 << CurInstr.A_Reg(16)) - 1))
: false;
if (writeback)
ADD(32, rn, offset >= INT8_MIN && offset < INT8_MAX ? Imm8(offset) : Imm32(offset));
}
void Compiler::T_Comp_MemImm()
{
int op = (CurInstr.Instr >> 11) & 0x3;
bool load = op & 0x1;
bool byte = op & 0x2;
u32 offset = ((CurInstr.Instr >> 6) & 0x1F) * (byte ? 1 : 4);
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(offset),
byte ? 8 : 32, load ? 0 : memop_Store);
}
void Compiler::T_Comp_MemRegHalf()
{
int op = (CurInstr.Instr >> 10) & 0x3;
bool load = op != 0;
int size = op != 1 ? 16 : 8;
bool signExtend = op & 1;
int flags = 0;
if (signExtend)
flags |= memop_SignExtend;
if (!load)
flags |= memop_Store;
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(CurInstr.T_Reg(6), 0, 0),
size, flags);
}
void Compiler::T_Comp_MemImmHalf()
{
u32 offset = (CurInstr.Instr >> 5) & 0x3E;
bool load = CurInstr.Instr & (1 << 11);
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(offset), 16,
load ? 0 : memop_Store);
}
void Compiler::T_Comp_LoadPCRel()
{
u32 offset = (CurInstr.Instr & 0xFF) << 2;
u32 addr = (R15 & ~0x2) + offset;
if (!Config::JIT_LiteralOptimisations || !Comp_MemLoadLiteral(32, false, CurInstr.T_Reg(8), addr))
Comp_MemAccess(CurInstr.T_Reg(8), 15, Op2(offset), 32, 0);
}
void Compiler::T_Comp_MemSPRel()
{
u32 offset = (CurInstr.Instr & 0xFF) * 4;
bool load = CurInstr.Instr & (1 << 11);
Comp_MemAccess(CurInstr.T_Reg(8), 13, Op2(offset), 32,
load ? 0 : memop_Store);
}
void Compiler::T_Comp_PUSH_POP()
{
bool load = CurInstr.Instr & (1 << 11);
BitSet16 regs(CurInstr.Instr & 0xFF);
if (CurInstr.Instr & (1 << 8))
{
if (load)
regs[15] = true;
else
regs[14] = true;
}
OpArg sp = MapReg(13);
s32 offset = Comp_MemAccessBlock(13, regs, !load, !load, !load, false);
ADD(32, sp, Imm8(offset)); // offset will be always be in range since PUSH accesses 9 regs max
}
void Compiler::T_Comp_LDMIA_STMIA()
{
BitSet16 regs(CurInstr.Instr & 0xFF);
OpArg rb = MapReg(CurInstr.T_Reg(8));
bool load = CurInstr.Instr & (1 << 11);
s32 offset = Comp_MemAccessBlock(CurInstr.T_Reg(8), regs, !load, false, false, false);
if (!load || !regs[CurInstr.T_Reg(8)])
ADD(32, rb, Imm8(offset));
}
}

View File

@ -0,0 +1,3 @@
#define ARM_CPSR_offset 0x64
#define ARM_Cycles_offset 0xc
#define ARM_StopExecution_offset 0x10

539
src/ARM_InstrInfo.cpp Normal file
View File

@ -0,0 +1,539 @@
#include "ARM_InstrInfo.h"
#include <stdio.h>
#include "Config.h"
namespace ARMInstrInfo
{
#define ak(x) ((x) << 22)
enum {
A_Read0 = 1 << 0,
A_Read16 = 1 << 1,
A_Read8 = 1 << 2,
A_Read12 = 1 << 3,
A_Write12 = 1 << 4,
A_Write16 = 1 << 5,
A_MemWriteback = 1 << 6,
A_BranchAlways = 1 << 7,
// for STRD/LDRD
A_Read12Double = 1 << 8,
A_Write12Double = 1 << 9,
A_Link = 1 << 10,
A_UnkOnARM7 = 1 << 11,
A_SetNZ = 1 << 12,
A_SetCV = 1 << 13,
A_SetMaybeC = 1 << 14,
A_MulFlags = 1 << 15,
A_ReadC = 1 << 16,
A_RRXReadC = 1 << 17,
A_StaticShiftSetC = 1 << 18,
A_SetC = 1 << 19,
A_WriteMem = 1 << 20,
A_LoadMem = 1 << 21
};
#define A_BIOP A_Read16
#define A_MONOOP 0
#define A_ARITH_LSL_IMM A_SetCV
#define A_LOGIC_LSL_IMM A_StaticShiftSetC
#define A_ARITH_SHIFT_IMM A_SetCV
#define A_LOGIC_SHIFT_IMM A_SetC
#define A_ARITH_SHIFT_REG A_SetCV
#define A_LOGIC_SHIFT_REG A_SetMaybeC
#define A_ARITH_IMM A_SetCV
#define A_LOGIC_IMM 0
#define A_IMPLEMENT_ALU_OP(x,k,a,c) \
const u32 A_##x##_IMM = A_Write12 | c | A_##k | ak(ak_##x##_IMM); \
const u32 A_##x##_REG_LSL_IMM = A_Write12 | c | A_##k | A_Read0 | ak(ak_##x##_REG_LSL_IMM); \
const u32 A_##x##_REG_LSR_IMM = A_Write12 | c | A_##k | A_Read0 | ak(ak_##x##_REG_LSR_IMM); \
const u32 A_##x##_REG_ASR_IMM = A_Write12 | c | A_##k | A_Read0 | ak(ak_##x##_REG_ASR_IMM); \
const u32 A_##x##_REG_ROR_IMM = A_RRXReadC | A_Write12 | c | A_##k | A_Read0 | ak(ak_##x##_REG_ROR_IMM); \
const u32 A_##x##_REG_LSL_REG = A_Write12 | c | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSL_REG); \
const u32 A_##x##_REG_LSR_REG = A_Write12 | c | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSR_REG); \
const u32 A_##x##_REG_ASR_REG = A_Write12 | c | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_ASR_REG); \
const u32 A_##x##_REG_ROR_REG = A_Write12 | c | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_ROR_REG); \
\
const u32 A_##x##_IMM_S = A_SetNZ | c | A_##a##_IMM | A_Write12 | A_##k | ak(ak_##x##_IMM_S); \
const u32 A_##x##_REG_LSL_IMM_S = A_SetNZ | c | A_##a##_LSL_IMM | A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_LSL_IMM_S); \
const u32 A_##x##_REG_LSR_IMM_S = A_SetNZ | c | A_##a##_SHIFT_IMM | A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_LSR_IMM_S); \
const u32 A_##x##_REG_ASR_IMM_S = A_SetNZ | c | A_##a##_SHIFT_IMM | A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_ASR_IMM_S); \
const u32 A_##x##_REG_ROR_IMM_S = A_RRXReadC | A_SetNZ | c | A_##a##_SHIFT_IMM | A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_ROR_IMM_S); \
const u32 A_##x##_REG_LSL_REG_S = A_SetNZ | c | A_##a##_SHIFT_REG | A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSL_REG_S); \
const u32 A_##x##_REG_LSR_REG_S = A_SetNZ | c | A_##a##_SHIFT_REG | A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSR_REG_S); \
const u32 A_##x##_REG_ASR_REG_S = A_SetNZ | c | A_##a##_SHIFT_REG | A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_ASR_REG_S); \
const u32 A_##x##_REG_ROR_REG_S = A_SetNZ | c | A_##a##_SHIFT_REG | A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_ROR_REG_S);
A_IMPLEMENT_ALU_OP(AND,BIOP,LOGIC,0)
A_IMPLEMENT_ALU_OP(EOR,BIOP,LOGIC,0)
A_IMPLEMENT_ALU_OP(SUB,BIOP,ARITH,0)
A_IMPLEMENT_ALU_OP(RSB,BIOP,ARITH,0)
A_IMPLEMENT_ALU_OP(ADD,BIOP,ARITH,0)
A_IMPLEMENT_ALU_OP(ADC,BIOP,ARITH,A_ReadC)
A_IMPLEMENT_ALU_OP(SBC,BIOP,ARITH,A_ReadC)
A_IMPLEMENT_ALU_OP(RSC,BIOP,ARITH,A_ReadC)
A_IMPLEMENT_ALU_OP(ORR,BIOP,LOGIC,0)
A_IMPLEMENT_ALU_OP(MOV,MONOOP,LOGIC,0)
A_IMPLEMENT_ALU_OP(BIC,BIOP,LOGIC,0)
A_IMPLEMENT_ALU_OP(MVN,MONOOP,LOGIC,0)
const u32 A_MOV_REG_LSL_IMM_DBG = A_MOV_REG_LSL_IMM;
#define A_IMPLEMENT_ALU_TEST(x,a) \
const u32 A_##x##_IMM = A_SetNZ | A_Read16 | A_##a##_IMM | ak(ak_##x##_IMM); \
const u32 A_##x##_REG_LSL_IMM = A_SetNZ | A_Read16 | A_##a##_LSL_IMM | A_Read0 | ak(ak_##x##_REG_LSL_IMM); \
const u32 A_##x##_REG_LSR_IMM = A_SetNZ | A_Read16 | A_##a##_SHIFT_IMM | A_Read0 | ak(ak_##x##_REG_LSR_IMM); \
const u32 A_##x##_REG_ASR_IMM = A_SetNZ | A_Read16 | A_##a##_SHIFT_IMM | A_Read0 | ak(ak_##x##_REG_ASR_IMM); \
const u32 A_##x##_REG_ROR_IMM = A_RRXReadC | A_SetNZ | A_Read16 | A_##a##_SHIFT_IMM | A_Read0 | ak(ak_##x##_REG_ROR_IMM); \
const u32 A_##x##_REG_LSL_REG = A_SetNZ | A_Read16 | A_##a##_SHIFT_REG | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSL_REG); \
const u32 A_##x##_REG_LSR_REG = A_SetNZ | A_Read16 | A_##a##_SHIFT_REG | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSR_REG); \
const u32 A_##x##_REG_ASR_REG = A_SetNZ | A_Read16 | A_##a##_SHIFT_REG | A_Read0 | A_Read8 | ak(ak_##x##_REG_ASR_REG); \
const u32 A_##x##_REG_ROR_REG = A_SetNZ | A_Read16 | A_##a##_SHIFT_REG | A_Read0 | A_Read8 | ak(ak_##x##_REG_ROR_REG);
A_IMPLEMENT_ALU_TEST(TST,LOGIC)
A_IMPLEMENT_ALU_TEST(TEQ,LOGIC)
A_IMPLEMENT_ALU_TEST(CMP,ARITH)
A_IMPLEMENT_ALU_TEST(CMN,ARITH)
const u32 A_MUL = A_MulFlags | A_Write16 | A_Read0 | A_Read8 | ak(ak_MUL);
const u32 A_MLA = A_MulFlags | A_Write16 | A_Read0 | A_Read8 | A_Read12 | ak(ak_MLA);
const u32 A_UMULL = A_MulFlags | A_Write16 | A_Write12 | A_Read0 | A_Read8 | ak(ak_UMULL);
const u32 A_UMLAL = A_MulFlags | A_Write16 | A_Write12 | A_Read16 | A_Read12 | A_Read0 | A_Read8 | ak(ak_UMLAL);
const u32 A_SMULL = A_MulFlags | A_Write16 | A_Write12 | A_Read0 | A_Read8 | ak(ak_SMULL);
const u32 A_SMLAL = A_MulFlags | A_Write16 | A_Write12 | A_Read16 | A_Read12 | A_Read0 | A_Read8 | ak(ak_SMLAL);
const u32 A_SMLAxy = A_Write16 | A_Read0 | A_Read8 | A_Read12 | ak(ak_SMLAxy);
const u32 A_SMLAWy = A_Write16 | A_Read0 | A_Read8 | A_Read12 | ak(ak_SMLAWy);
const u32 A_SMULWy = A_Write16 | A_Read0 | A_Read8 | ak(ak_SMULWy);
const u32 A_SMLALxy = A_Write16 | A_Write12 | A_Read16 | A_Read12 | A_Read0 | A_Read8 | ak(ak_SMLALxy);
const u32 A_SMULxy = A_Write16 | A_Read0 | A_Read8 | ak(ak_SMULxy);
const u32 A_CLZ = A_Write12 | A_Read0 | A_UnkOnARM7 | ak(ak_CLZ);
const u32 A_QADD = A_Write12 | A_Read0 | A_Read16 | A_UnkOnARM7 | ak(ak_QADD);
const u32 A_QSUB = A_Write12 | A_Read0 | A_Read16 | A_UnkOnARM7 | ak(ak_QSUB);
const u32 A_QDADD = A_Write12 | A_Read0 | A_Read16 | A_UnkOnARM7 | ak(ak_QDADD);
const u32 A_QDSUB = A_Write12 | A_Read0 | A_Read16 | A_UnkOnARM7 | ak(ak_QDSUB);
#define A_LDR A_Write12 | A_LoadMem
#define A_STR A_Read12 | A_WriteMem
#define A_IMPLEMENT_WB_LDRSTR(x,k) \
const u32 A_##x##_IMM = A_##k | A_Read16 | A_MemWriteback | ak(ak_##x##_IMM); \
const u32 A_##x##_REG_LSL = A_##k | A_Read16 | A_MemWriteback | A_Read0 | ak(ak_##x##_REG_LSL); \
const u32 A_##x##_REG_LSR = A_##k | A_Read16 | A_MemWriteback | A_Read0 | ak(ak_##x##_REG_LSR); \
const u32 A_##x##_REG_ASR = A_##k | A_Read16 | A_MemWriteback | A_Read0 | ak(ak_##x##_REG_ASR); \
const u32 A_##x##_REG_ROR = A_##k | A_RRXReadC | A_Read16 | A_MemWriteback | A_Read0 | ak(ak_##x##_REG_ROR); \
\
const u32 A_##x##_POST_IMM = A_##k | A_Read16 | A_Write16 | ak(ak_##x##_POST_IMM); \
const u32 A_##x##_POST_REG_LSL = A_##k | A_Read16 | A_Write16 | A_Read0 | ak(ak_##x##_POST_REG_LSL); \
const u32 A_##x##_POST_REG_LSR = A_##k | A_Read16 | A_Write16 | A_Read0 | ak(ak_##x##_POST_REG_LSR); \
const u32 A_##x##_POST_REG_ASR = A_##k | A_Read16 | A_Write16 | A_Read0 | ak(ak_##x##_POST_REG_ASR); \
const u32 A_##x##_POST_REG_ROR = A_##k | A_RRXReadC | A_Read16 | A_Write16 | A_Read0 | ak(ak_##x##_POST_REG_ROR);
A_IMPLEMENT_WB_LDRSTR(STR,STR)
A_IMPLEMENT_WB_LDRSTR(STRB,STR)
A_IMPLEMENT_WB_LDRSTR(LDR,LDR)
A_IMPLEMENT_WB_LDRSTR(LDRB,LDR)
#define A_LDRD A_Write12Double | A_LoadMem
#define A_STRD A_Read12Double | A_WriteMem
#define A_IMPLEMENT_HD_LDRSTR(x,k) \
const u32 A_##x##_IMM = A_##k | A_Read16 | A_MemWriteback | ak(ak_##x##_IMM); \
const u32 A_##x##_REG = A_##k | A_Read16 | A_MemWriteback | A_Read0 | ak(ak_##x##_REG); \
const u32 A_##x##_POST_IMM = A_##k | A_Read16 | A_Write16 | ak(ak_##x##_POST_IMM); \
const u32 A_##x##_POST_REG = A_##k | A_Read16 | A_Write16 | A_Read0 | ak(ak_##x##_POST_REG);
A_IMPLEMENT_HD_LDRSTR(STRH,STR)
A_IMPLEMENT_HD_LDRSTR(LDRD,LDRD)
A_IMPLEMENT_HD_LDRSTR(STRD,STRD)
A_IMPLEMENT_HD_LDRSTR(LDRH,LDR)
A_IMPLEMENT_HD_LDRSTR(LDRSB,LDR)
A_IMPLEMENT_HD_LDRSTR(LDRSH,LDR)
const u32 A_SWP = A_Write12 | A_Read16 | A_Read0 | A_LoadMem | A_WriteMem | ak(ak_SWP);
const u32 A_SWPB = A_Write12 | A_Read16 | A_Read0 | A_LoadMem | A_WriteMem | ak(ak_SWPB);
const u32 A_LDM = A_Read16 | A_MemWriteback | A_LoadMem | ak(ak_LDM);
const u32 A_STM = A_Read16 | A_MemWriteback | A_WriteMem | ak(ak_STM);
const u32 A_B = A_BranchAlways | ak(ak_B);
const u32 A_BL = A_BranchAlways | A_Link | ak(ak_BL);
const u32 A_BLX_IMM = A_BranchAlways | A_Link | ak(ak_BLX_IMM);
const u32 A_BX = A_BranchAlways | A_Read0 | ak(ak_BX);
const u32 A_BLX_REG = A_BranchAlways | A_Link | A_Read0 | ak(ak_BLX_REG);
const u32 A_UNK = A_BranchAlways | A_Link | ak(ak_UNK);
const u32 A_MSR_IMM = ak(ak_MSR_IMM);
const u32 A_MSR_REG = A_Read0 | ak(ak_MSR_REG);
const u32 A_MRS = A_Write12 | ak(ak_MRS);
const u32 A_MCR = A_Read12 | ak(ak_MCR);
const u32 A_MRC = A_Write12 | ak(ak_MRC);
const u32 A_SVC = A_BranchAlways | A_Link | ak(ak_SVC);
// THUMB
#define tk(x) ((x) << 22)
enum {
T_Read0 = 1 << 0,
T_Read3 = 1 << 1,
T_Read6 = 1 << 2,
T_Read8 = 1 << 3,
T_Write0 = 1 << 4,
T_Write8 = 1 << 5,
T_ReadHi0 = 1 << 6,
T_ReadHi3 = 1 << 7,
T_WriteHi0 = 1 << 8,
T_ReadR13 = 1 << 9,
T_WriteR13 = 1 << 10,
T_BranchAlways = 1 << 12,
T_ReadR14 = 1 << 13,
T_WriteR14 = 1 << 14,
T_SetNZ = 1 << 15,
T_SetCV = 1 << 16,
T_SetMaybeC = 1 << 17,
T_ReadC = 1 << 18,
T_SetC = 1 << 19,
T_WriteMem = 1 << 20,
T_LoadMem = 1 << 21,
};
const u32 T_LSL_IMM = T_SetNZ | T_SetMaybeC | T_Write0 | T_Read3 | tk(tk_LSL_IMM);
const u32 T_LSR_IMM = T_SetNZ | T_SetC | T_Write0 | T_Read3 | tk(tk_LSR_IMM);
const u32 T_ASR_IMM = T_SetNZ | T_SetC | T_Write0 | T_Read3 | tk(tk_ASR_IMM);
const u32 T_ADD_REG_ = T_SetNZ | T_SetCV | T_Write0 | T_Read3 | T_Read6 | tk(tk_ADD_REG_);
const u32 T_SUB_REG_ = T_SetNZ | T_SetCV | T_Write0 | T_Read3 | T_Read6 | tk(tk_SUB_REG_);
const u32 T_ADD_IMM_ = T_SetNZ | T_SetCV | T_Write0 | T_Read3 | tk(tk_ADD_IMM_);
const u32 T_SUB_IMM_ = T_SetNZ | T_SetCV | T_Write0 | T_Read3 | tk(tk_SUB_IMM_);
const u32 T_MOV_IMM = T_SetNZ | T_Write8 | tk(tk_MOV_IMM);
const u32 T_CMP_IMM = T_SetNZ | T_SetCV | T_Read8 | tk(tk_CMP_IMM);
const u32 T_ADD_IMM = T_SetNZ | T_SetCV | T_Write8 | T_Read8 | tk(tk_ADD_IMM);
const u32 T_SUB_IMM = T_SetNZ | T_SetCV | T_Write8 | T_Read8 | tk(tk_SUB_IMM);
const u32 T_AND_REG = T_SetNZ | T_Write0 | T_Read0 | T_Read3 | tk(tk_AND_REG);
const u32 T_EOR_REG = T_SetNZ | T_Write0 | T_Read0 | T_Read3 | tk(tk_EOR_REG);
const u32 T_LSL_REG = T_SetNZ | T_SetMaybeC | T_Write0 | T_Read0 | T_Read3 | tk(tk_LSL_REG);
const u32 T_LSR_REG = T_SetNZ | T_SetMaybeC | T_Write0 | T_Read0 | T_Read3 | tk(tk_LSR_REG);
const u32 T_ASR_REG = T_SetNZ | T_SetMaybeC | T_Write0 | T_Read0 | T_Read3 | tk(tk_ASR_REG);
const u32 T_ADC_REG = T_ReadC | T_SetNZ | T_SetCV | T_Write0 | T_Read0 | T_Read3 | tk(tk_ADC_REG);
const u32 T_SBC_REG = T_ReadC | T_SetNZ | T_SetCV | T_Write0 | T_Read0 | T_Read3 | tk(tk_SBC_REG);
const u32 T_ROR_REG = T_SetNZ | T_SetMaybeC | T_Write0 | T_Read0 | T_Read3 | tk(tk_ROR_REG);
const u32 T_TST_REG = T_SetNZ | T_Read0 | T_Read3 | tk(tk_TST_REG);
const u32 T_NEG_REG = T_SetNZ | T_SetCV | T_Write0 | T_Read3 | tk(tk_NEG_REG);
const u32 T_CMP_REG = T_SetNZ | T_SetCV | T_Read0 | T_Read3 | tk(tk_CMP_REG);
const u32 T_CMN_REG = T_SetNZ | T_SetCV | T_Read0 | T_Read3 | tk(tk_CMN_REG);
const u32 T_ORR_REG = T_SetNZ | T_Write0 | T_Read0 | T_Read3 | tk(tk_ORR_REG);
const u32 T_MUL_REG = T_SetNZ | T_Write0 | T_Read0 | T_Read3 | tk(tk_MUL_REG);
const u32 T_BIC_REG = T_SetNZ | T_Write0 | T_Read0 | T_Read3 | tk(tk_BIC_REG);
const u32 T_MVN_REG = T_SetNZ | T_Write0 | T_Read3 | tk(tk_MVN_REG);
const u32 T_ADD_HIREG = T_WriteHi0 | T_ReadHi0 | T_ReadHi3 | tk(tk_ADD_HIREG);
const u32 T_CMP_HIREG = T_SetNZ | T_SetCV | T_ReadHi0 | T_ReadHi3 | tk(tk_CMP_HIREG);
const u32 T_MOV_HIREG = T_WriteHi0 | T_ReadHi3 | tk(tk_MOV_HIREG);
const u32 T_ADD_PCREL = T_Write8 | tk(tk_ADD_PCREL);
const u32 T_ADD_SPREL = T_Write8 | T_ReadR13 | tk(tk_ADD_SPREL);
const u32 T_ADD_SP = T_WriteR13 | T_ReadR13 | tk(tk_ADD_SP);
const u32 T_LDR_PCREL = T_Write8 | T_LoadMem | tk(tk_LDR_PCREL);
const u32 T_STR_REG = T_Read0 | T_Read3 | T_Read6 | T_WriteMem | tk(tk_STR_REG);
const u32 T_STRB_REG = T_Read0 | T_Read3 | T_Read6 | T_WriteMem | tk(tk_STRB_REG);
const u32 T_LDR_REG = T_Write0 | T_Read3 | T_Read6 | T_LoadMem | tk(tk_LDR_REG);
const u32 T_LDRB_REG = T_Write0 | T_Read3 | T_Read6 | T_LoadMem | tk(tk_LDRB_REG);
const u32 T_STRH_REG = T_Read0 | T_Read3 | T_Read6 | T_WriteMem | tk(tk_STRH_REG);
const u32 T_LDRSB_REG = T_Write0 | T_Read3 | T_Read6 | T_LoadMem | tk(tk_LDRSB_REG);
const u32 T_LDRH_REG = T_Write0 | T_Read3 | T_Read6 | T_LoadMem | tk(tk_LDRH_REG);
const u32 T_LDRSH_REG = T_Write0 | T_Read3 | T_Read6 | T_LoadMem | tk(tk_LDRSH_REG);
const u32 T_STR_IMM = T_Read0 | T_Read3 | T_WriteMem | tk(tk_STR_IMM);
const u32 T_LDR_IMM = T_Write0 | T_Read3 | T_LoadMem | tk(tk_LDR_IMM);
const u32 T_STRB_IMM = T_Read0 | T_Read3 | T_WriteMem | tk(tk_STRB_IMM);
const u32 T_LDRB_IMM = T_Write0 | T_Read3 | T_LoadMem | tk(tk_LDRB_IMM);
const u32 T_STRH_IMM = T_Read0 | T_Read3 | T_WriteMem | tk(tk_STRH_IMM);
const u32 T_LDRH_IMM = T_Write0 | T_Read3 | T_LoadMem | tk(tk_LDRH_IMM);
const u32 T_STR_SPREL = T_Read8 | T_ReadR13 | T_WriteMem | tk(tk_STR_SPREL);
const u32 T_LDR_SPREL = T_Write8 | T_ReadR13 | T_LoadMem | tk(tk_LDR_SPREL);
const u32 T_PUSH = T_ReadR13 | T_WriteR13 | T_WriteMem | tk(tk_PUSH);
const u32 T_POP = T_ReadR13 | T_WriteR13 | T_LoadMem | tk(tk_POP);
const u32 T_LDMIA = T_Read8 | T_Write8 | T_LoadMem | tk(tk_LDMIA);
const u32 T_STMIA = T_Read8 | T_Write8 | T_WriteMem | tk(tk_STMIA);
const u32 T_BCOND = T_BranchAlways | tk(tk_BCOND);
const u32 T_BX = T_BranchAlways | T_ReadHi3 | tk(tk_BX);
const u32 T_BLX_REG = T_BranchAlways | T_WriteR14 | T_ReadHi3 | tk(tk_BLX_REG);
const u32 T_B = T_BranchAlways | tk(tk_B);
const u32 T_BL_LONG_1 = T_WriteR14 | tk(tk_BL_LONG_1);
const u32 T_BL_LONG_2 = T_BranchAlways | T_ReadR14 | T_WriteR14 | tk(tk_BL_LONG_2);
const u32 T_UNK = T_BranchAlways | T_WriteR14 | tk(tk_UNK);
const u32 T_SVC = T_BranchAlways | T_WriteR14 | tk(tk_SVC);
#define INSTRFUNC_PROTO(x) u32 x
#include "ARM_InstrTable.h"
#undef INSTRFUNC_PROTO
Info Decode(bool thumb, u32 num, u32 instr)
{
const u8 FlagsReadPerCond[7] = {
flag_Z,
flag_C,
flag_N,
flag_V,
flag_C | flag_Z,
flag_N | flag_V,
flag_Z | flag_N | flag_V};
Info res = {0};
if (thumb)
{
u32 data = THUMBInstrTable[(instr >> 6) & 0x3FF];
res.Kind = (data >> 22) & 0x3F;
if (data & T_Read0)
res.SrcRegs |= 1 << (instr & 0x7);
if (data & T_Read3)
res.SrcRegs |= 1 << ((instr >> 3) & 0x7);
if (data & T_Read6)
res.SrcRegs |= 1 << ((instr >> 6) & 0x7);
if (data & T_Read8)
res.SrcRegs |= 1 << ((instr >> 8) & 0x7);
if (data & T_Write0)
res.DstRegs |= 1 << (instr & 0x7);
if (data & T_Write8)
res.DstRegs |= 1 << ((instr >> 8) & 0x7);
if (data & T_ReadHi0)
res.SrcRegs |= 1 << ((instr & 0x7) | ((instr >> 4) & 0x8));
if (data & T_ReadHi3)
res.SrcRegs |= 1 << ((instr >> 3) & 0xF);
if (data & T_WriteHi0)
res.DstRegs |= 1 << ((instr & 0x7) | ((instr >> 4) & 0x8));
if (data & T_ReadR13)
res.SrcRegs |= (1 << 13);
if (data & T_WriteR13)
res.DstRegs |= (1 << 13);
if (data & T_WriteR14)
res.DstRegs |= (1 << 14);
if (data & T_ReadR14)
res.SrcRegs |= (1 << 14);
if (data & T_BranchAlways)
res.DstRegs |= (1 << 15);
if (res.Kind == tk_POP && instr & (1 << 8))
res.DstRegs |= 1 << 15;
if (data & T_SetNZ)
res.WriteFlags |= flag_N | flag_Z;
if (data & T_SetCV)
res.WriteFlags |= flag_C | flag_V;
if (data & T_SetMaybeC)
res.WriteFlags |= flag_C << 4;
if (data & T_ReadC)
res.ReadFlags |= flag_C;
if (data & T_SetC)
res.WriteFlags |= flag_C;
if (data & T_WriteMem)
res.SpecialKind = special_WriteMem;
if (data & T_LoadMem)
{
if (res.Kind == tk_LDR_PCREL)
{
if (!Config::JIT_LiteralOptimisations)
res.SrcRegs |= 1 << 15;
res.SpecialKind = special_LoadLiteral;
}
else
{
res.SpecialKind = special_LoadMem;
}
}
if (res.Kind == tk_LDMIA || res.Kind == tk_POP)
{
u32 set = (instr & 0xFF);
res.NotStrictlyNeeded |= set & ~(res.DstRegs|res.SrcRegs);
res.DstRegs |= set;
}
if (res.Kind == tk_STMIA || res.Kind == tk_PUSH)
{
u32 set = (instr & 0xFF);
if (res.Kind == tk_PUSH && instr & (1 << 8))
set |= (1 << 14);
res.NotStrictlyNeeded |= set & ~(res.DstRegs|res.SrcRegs);
res.SrcRegs |= set;
}
res.EndBlock |= res.Branches();
if (res.Kind == tk_BCOND)
res.ReadFlags |= FlagsReadPerCond[(instr >> 9) & 0x7];
return res;
}
else
{
u32 data = ARMInstrTable[((instr >> 4) & 0xF) | ((instr >> 16) & 0xFF0)];
if (num == 0 && (instr & 0xFE000000) == 0xFA000000)
data = A_BLX_IMM;
else if ((instr >> 28) == 0xF)
data = ak(ak_Nop);
if (data & A_UnkOnARM7 && num == 1)
data = A_UNK;
res.Kind = (data >> 22) & 0x1FF;
if (res.Kind >= ak_SMLAxy && res.Kind <= ak_SMULxy && num == 1)
{
data = ak(ak_Nop);
res.Kind = ak_Nop;
}
if (res.Kind == ak_MCR)
{
u32 cn = (instr >> 16) & 0xF;
u32 cm = instr & 0xF;
u32 cpinfo = (instr >> 5) & 0x7;
u32 id = (cn<<8)|(cm<<4)|cpinfo;
if (id == 0x704 || id == 0x782 || id == 0x750 || id == 0x751 || id == 0x752)
res.EndBlock |= true;
if (id == 0x704 || id == 0x782)
res.SpecialKind = special_WaitForInterrupt;
}
if (res.Kind == ak_MCR || res.Kind == ak_MRC)
{
u32 cp = ((instr >> 8) & 0xF);
if ((num == 0 && cp != 15) || (num == 1 && cp != 14))
{
data = A_UNK;
res.Kind = ak_UNK;
}
}
if (res.Kind == ak_MRS && !(instr & (1 << 22)))
res.ReadFlags |= flag_N | flag_Z | flag_C | flag_V;
if ((res.Kind == ak_MSR_IMM || res.Kind == ak_MSR_REG) && instr & (1 << 19))
res.WriteFlags |= flag_N | flag_Z | flag_C | flag_V;
if (data & A_Read0)
res.SrcRegs |= 1 << (instr & 0xF);
if (data & A_Read16)
res.SrcRegs |= 1 << ((instr >> 16) & 0xF);
if (data & A_Read8)
res.SrcRegs |= 1 << ((instr >> 8) & 0xF);
if (data & A_Read12)
res.SrcRegs |= 1 << ((instr >> 12) & 0xF);
if (data & A_Write12)
res.DstRegs |= 1 << ((instr >> 12) & 0xF);
if (data & A_Write16)
res.DstRegs |= 1 << ((instr >> 16) & 0xF);
if (data & A_MemWriteback && instr & (1 << 21))
res.DstRegs |= 1 << ((instr >> 16) & 0xF);
if (data & A_BranchAlways)
res.DstRegs |= 1 << 15;
if (data & A_Read12Double)
{
res.SrcRegs |= 1 << ((instr >> 12) & 0xF);
res.SrcRegs |= 1 << (((instr >> 12) & 0xF) + 1);
}
if (data & A_Write12Double)
{
res.DstRegs |= 1 << ((instr >> 12) & 0xF);
res.DstRegs |= 1 << (((instr >> 12) & 0xF) + 1);
}
if (data & A_Link)
res.DstRegs |= 1 << 14;
if (res.Kind == ak_LDM)
res.DstRegs |= instr & (1 << 15); // this is right
if (res.Kind == ak_STM)
res.SrcRegs |= instr & (1 << 15);
if (data & A_SetNZ)
res.WriteFlags |= flag_N | flag_Z;
if (data & A_SetCV)
res.WriteFlags |= flag_C | flag_V;
if (data & A_SetMaybeC)
res.WriteFlags |= flag_C << 4;
if ((data & A_MulFlags) && (instr & (1 << 20)))
res.WriteFlags |= flag_N | flag_Z;
if (data & A_ReadC)
res.ReadFlags |= flag_C;
if ((data & A_RRXReadC) && !((instr >> 7) & 0x1F))
res.ReadFlags |= flag_C;
if ((data & A_SetC) || ((data & A_StaticShiftSetC) && ((instr >> 7) & 0x1F)))
res.WriteFlags |= flag_C;
if (data & A_WriteMem)
res.SpecialKind = special_WriteMem;
if (data & A_LoadMem)
{
if (res.SrcRegs == (1 << 15))
res.SpecialKind = special_LoadLiteral;
else
res.SpecialKind = special_LoadMem;
}
if (res.Kind == ak_LDM)
{
u16 set = (instr & 0xFFFF);
res.NotStrictlyNeeded |= set & ~(res.SrcRegs|res.DstRegs|(1<<15));
res.DstRegs |= set;
}
if (res.Kind == ak_STM)
{
u16 set = (instr & 0xFFFF);
res.NotStrictlyNeeded |= set & ~(res.SrcRegs|res.DstRegs|(1<<15));
res.SrcRegs |= set;
}
if ((instr >> 28) < 0xE)
{
// make non conditional flag sets conditional
res.WriteFlags = (res.WriteFlags | (res.WriteFlags << 4)) & 0xF0;
res.ReadFlags |= FlagsReadPerCond[instr >> 29];
}
res.EndBlock |= res.Branches();
return res;
}
}
}

263
src/ARM_InstrInfo.h Normal file
View File

@ -0,0 +1,263 @@
#ifndef ARMINSTRINFO_H
#define ARMINSTRINFO_H
#include "types.h"
namespace ARMInstrInfo
{
// Instruction kinds, for faster dispatch
#define ak_ALU(n) \
ak_##n##_REG_LSL_IMM, \
ak_##n##_REG_LSR_IMM, \
ak_##n##_REG_ASR_IMM, \
ak_##n##_REG_ROR_IMM, \
\
ak_##n##_REG_LSL_REG, \
ak_##n##_REG_LSR_REG, \
ak_##n##_REG_ASR_REG, \
ak_##n##_REG_ROR_REG, \
\
ak_##n##_IMM, \
\
ak_##n##_REG_LSL_IMM_S, \
ak_##n##_REG_LSR_IMM_S, \
ak_##n##_REG_ASR_IMM_S, \
ak_##n##_REG_ROR_IMM_S, \
\
ak_##n##_REG_LSL_REG_S, \
ak_##n##_REG_LSR_REG_S, \
ak_##n##_REG_ASR_REG_S, \
ak_##n##_REG_ROR_REG_S, \
\
ak_##n##_IMM_S \
#define ak_Test(n) \
ak_##n##_REG_LSL_IMM, \
ak_##n##_REG_LSR_IMM, \
ak_##n##_REG_ASR_IMM, \
ak_##n##_REG_ROR_IMM, \
\
ak_##n##_REG_LSL_REG, \
ak_##n##_REG_LSR_REG, \
ak_##n##_REG_ASR_REG, \
ak_##n##_REG_ROR_REG, \
\
ak_##n##_IMM
#define ak_WB_LDRSTR(n) \
ak_##n##_REG_LSL, \
ak_##n##_REG_LSR, \
ak_##n##_REG_ASR, \
ak_##n##_REG_ROR, \
\
ak_##n##_IMM, \
\
ak_##n##_POST_REG_LSL, \
ak_##n##_POST_REG_LSR, \
ak_##n##_POST_REG_ASR, \
ak_##n##_POST_REG_ROR, \
\
ak_##n##_POST_IMM
#define ak_HD_LDRSTR(n) \
ak_##n##_REG, \
ak_##n##_IMM, \
\
ak_##n##_POST_REG, \
ak_##n##_POST_IMM
enum
{
ak_ALU(AND),
ak_ALU(EOR),
ak_ALU(SUB),
ak_ALU(RSB),
ak_ALU(ADD),
ak_ALU(ADC),
ak_ALU(SBC),
ak_ALU(RSC),
ak_ALU(ORR),
ak_ALU(MOV),
ak_ALU(BIC),
ak_ALU(MVN),
ak_Test(TST),
ak_Test(TEQ),
ak_Test(CMP),
ak_Test(CMN),
ak_MUL,
ak_MLA,
ak_UMULL,
ak_UMLAL,
ak_SMULL,
ak_SMLAL,
ak_SMLAxy,
ak_SMLAWy,
ak_SMULWy,
ak_SMLALxy,
ak_SMULxy,
ak_CLZ,
ak_QADD,
ak_QSUB,
ak_QDADD,
ak_QDSUB,
ak_WB_LDRSTR(STR),
ak_WB_LDRSTR(STRB),
ak_WB_LDRSTR(LDR),
ak_WB_LDRSTR(LDRB),
ak_HD_LDRSTR(STRH),
ak_HD_LDRSTR(LDRD),
ak_HD_LDRSTR(STRD),
ak_HD_LDRSTR(LDRH),
ak_HD_LDRSTR(LDRSB),
ak_HD_LDRSTR(LDRSH),
ak_SWP,
ak_SWPB,
ak_LDM,
ak_STM,
ak_B,
ak_BL,
ak_BLX_IMM,
ak_BX,
ak_BLX_REG,
ak_UNK,
ak_MSR_IMM,
ak_MSR_REG,
ak_MRS,
ak_MCR,
ak_MRC,
ak_SVC,
ak_Nop,
ak_Count,
tk_LSL_IMM = 0,
tk_LSR_IMM,
tk_ASR_IMM,
tk_ADD_REG_,
tk_SUB_REG_,
tk_ADD_IMM_,
tk_SUB_IMM_,
tk_MOV_IMM,
tk_CMP_IMM,
tk_ADD_IMM,
tk_SUB_IMM,
tk_AND_REG,
tk_EOR_REG,
tk_LSL_REG,
tk_LSR_REG,
tk_ASR_REG,
tk_ADC_REG,
tk_SBC_REG,
tk_ROR_REG,
tk_TST_REG,
tk_NEG_REG,
tk_CMP_REG,
tk_CMN_REG,
tk_ORR_REG,
tk_MUL_REG,
tk_BIC_REG,
tk_MVN_REG,
tk_ADD_HIREG,
tk_CMP_HIREG,
tk_MOV_HIREG,
tk_ADD_PCREL,
tk_ADD_SPREL,
tk_ADD_SP,
tk_LDR_PCREL,
tk_STR_REG,
tk_STRB_REG,
tk_LDR_REG,
tk_LDRB_REG,
tk_STRH_REG,
tk_LDRSB_REG,
tk_LDRH_REG,
tk_LDRSH_REG,
tk_STR_IMM,
tk_LDR_IMM,
tk_STRB_IMM,
tk_LDRB_IMM,
tk_STRH_IMM,
tk_LDRH_IMM,
tk_STR_SPREL,
tk_LDR_SPREL,
tk_PUSH,
tk_POP,
tk_LDMIA,
tk_STMIA,
tk_BCOND,
tk_BX,
tk_BLX_REG,
tk_B,
tk_BL_LONG_1,
tk_BL_LONG_2,
tk_UNK,
tk_SVC,
// not a real instruction
tk_BL_LONG,
tk_Count
};
enum
{
flag_N = 1 << 3,
flag_Z = 1 << 2,
flag_C = 1 << 1,
flag_V = 1 << 0,
};
enum
{
special_NotSpecialAtAll = 0,
special_WriteMem,
special_LoadMem,
special_WaitForInterrupt,
special_LoadLiteral
};
struct Info
{
u16 DstRegs, SrcRegs, NotStrictlyNeeded;
u16 Kind;
u8 SpecialKind;
u8 ReadFlags;
// lower 4 bits - set always
// upper 4 bits - might set flag
u8 WriteFlags;
bool EndBlock;
bool Branches() const
{
return DstRegs & (1 << 15);
}
};
Info Decode(bool thumb, u32 num, u32 instr);
}
#endif

View File

@ -1,5 +1,7 @@
project(core)
set (CMAKE_CXX_STANDARD 14)
add_library(core STATIC
ARCodeList.cpp
AREngine.cpp
@ -44,10 +46,53 @@ add_library(core STATIC
version.h
Wifi.cpp
WifiAP.cpp
tiny-AES-c/aes.c
xxhash/xxhash.c
)
if (ENABLE_JIT)
enable_language(ASM)
target_sources(core PRIVATE
ARM_InstrInfo.cpp
ARMJIT.cpp
ARMJIT_Memory.cpp
dolphin/CommonFuncs.cpp
)
if (ARCHITECTURE STREQUAL x86_64)
target_sources(core PRIVATE
dolphin/x64ABI.cpp
dolphin/x64CPUDetect.cpp
dolphin/x64Emitter.cpp
ARMJIT_x64/ARMJIT_Compiler.cpp
ARMJIT_x64/ARMJIT_ALU.cpp
ARMJIT_x64/ARMJIT_LoadStore.cpp
ARMJIT_x64/ARMJIT_Branch.cpp
ARMJIT_x64/ARMJIT_Linkage.s
)
set_source_files_properties(ARMJIT_x64/ARMJIT_Linkage.s PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp")
endif()
if (ARCHITECTURE STREQUAL ARM64)
target_sources(core PRIVATE
dolphin/Arm64Emitter.cpp
dolphin/MathUtil.cpp
ARMJIT_A64/ARMJIT_Compiler.cpp
ARMJIT_A64/ARMJIT_ALU.cpp
ARMJIT_A64/ARMJIT_LoadStore.cpp
ARMJIT_A64/ARMJIT_Branch.cpp
ARMJIT_A64/ARMJIT_Linkage.s
)
endif()
endif()
if (WIN32)
target_link_libraries(core ole32 comctl32 ws2_32 opengl32)
else()

View File

@ -21,6 +21,8 @@
#include "NDS.h"
#include "DSi.h"
#include "ARM.h"
#include "ARMJIT.h"
#include "ARMJIT_Memory.h"
// access timing for cached regions
@ -41,8 +43,8 @@ void ARMv5::CP15Reset()
DTCMSetting = 0;
ITCMSetting = 0;
memset(ITCM, 0, 0x8000);
memset(DTCM, 0, 0x4000);
memset(ITCM, 0, ITCMPhysicalSize);
memset(DTCM, 0, DTCMPhysicalSize);
ITCMSize = 0;
DTCMBase = 0xFFFFFFFF;
@ -74,8 +76,8 @@ void ARMv5::CP15DoSavestate(Savestate* file)
file->Var32(&DTCMSetting);
file->Var32(&ITCMSetting);
file->VarArray(ITCM, 0x8000);
file->VarArray(DTCM, 0x4000);
file->VarArray(ITCM, ITCMPhysicalSize);
file->VarArray(DTCM, DTCMPhysicalSize);
file->Var32(&PU_CodeCacheable);
file->Var32(&PU_DataCacheable);
@ -97,18 +99,26 @@ void ARMv5::CP15DoSavestate(Savestate* file)
void ARMv5::UpdateDTCMSetting()
{
u32 newDTCMBase;
u32 newDTCMSize;
if (CP15Control & (1<<16))
{
DTCMBase = DTCMSetting & 0xFFFFF000;
DTCMSize = 0x200 << ((DTCMSetting >> 1) & 0x1F);
newDTCMBase = DTCMSetting & 0xFFFFF000;
newDTCMSize = 0x200 << ((DTCMSetting >> 1) & 0x1F);
//printf("DTCM [%08X] enabled at %08X, size %X\n", DTCMSetting, DTCMBase, DTCMSize);
}
else
{
DTCMBase = 0xFFFFFFFF;
DTCMSize = 0;
newDTCMBase = 0xFFFFFFFF;
newDTCMSize = 0;
//printf("DTCM disabled\n");
}
if (newDTCMBase != DTCMBase || newDTCMSize != DTCMSize)
{
ARMJIT_Memory::RemapDTCM(newDTCMBase, newDTCMSize);
DTCMBase = newDTCMBase;
DTCMSize = newDTCMSize;
}
}
void ARMv5::UpdateITCMSetting()
@ -562,12 +572,15 @@ void ARMv5::CP15Write(u32 id, u32 val)
case 0x750:
ICacheInvalidateAll();
//Halt(255);
return;
case 0x751:
ICacheInvalidateByAddr(val);
//Halt(255);
return;
case 0x752:
printf("CP15: ICACHE INVALIDATE WEIRD. %08X\n", val);
//Halt(255);
return;
@ -595,6 +608,27 @@ void ARMv5::CP15Write(u32 id, u32 val)
ITCMSetting = val;
UpdateITCMSetting();
return;
case 0xF00:
//printf("cache debug index register %08X\n", val);
return;
case 0xF10:
//printf("cache debug instruction tag %08X\n", val);
return;
case 0xF20:
//printf("cache debug data tag %08X\n", val);
return;
case 0xF30:
//printf("cache debug instruction cache %08X\n", val);
return;
case 0xF40:
//printf("cache debug data cache %08X\n", val);
return;
}
if ((id&0xF00)!=0x700)
@ -704,7 +738,7 @@ u32 ARMv5::CodeRead32(u32 addr, bool branch)
if (addr < ITCMSize)
{
CodeCycles = 1;
return *(u32*)&ITCM[addr & 0x7FFF];
return *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)];
}
CodeCycles = RegionCodeCycles;
@ -726,16 +760,18 @@ u32 ARMv5::CodeRead32(u32 addr, bool branch)
void ARMv5::DataRead8(u32 addr, u32* val)
{
DataRegion = addr;
if (addr < ITCMSize)
{
DataCycles = 1;
*val = *(u8*)&ITCM[addr & 0x7FFF];
*val = *(u8*)&ITCM[addr & (ITCMPhysicalSize - 1)];
return;
}
if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
{
DataCycles = 1;
*val = *(u8*)&DTCM[(addr - DTCMBase) & 0x3FFF];
*val = *(u8*)&DTCM[(addr - DTCMBase) & (DTCMPhysicalSize - 1)];
return;
}
@ -745,18 +781,20 @@ void ARMv5::DataRead8(u32 addr, u32* val)
void ARMv5::DataRead16(u32 addr, u32* val)
{
DataRegion = addr;
addr &= ~1;
if (addr < ITCMSize)
{
DataCycles = 1;
*val = *(u16*)&ITCM[addr & 0x7FFF];
*val = *(u16*)&ITCM[addr & (ITCMPhysicalSize - 1)];
return;
}
if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
{
DataCycles = 1;
*val = *(u16*)&DTCM[(addr - DTCMBase) & 0x3FFF];
*val = *(u16*)&DTCM[(addr - DTCMBase) & (DTCMPhysicalSize - 1)];
return;
}
@ -766,18 +804,20 @@ void ARMv5::DataRead16(u32 addr, u32* val)
void ARMv5::DataRead32(u32 addr, u32* val)
{
DataRegion = addr;
addr &= ~3;
if (addr < ITCMSize)
{
DataCycles = 1;
*val = *(u32*)&ITCM[addr & 0x7FFF];
*val = *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)];
return;
}
if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
{
DataCycles = 1;
*val = *(u32*)&DTCM[(addr - DTCMBase) & 0x3FFF];
*val = *(u32*)&DTCM[(addr - DTCMBase) & (DTCMPhysicalSize - 1)];
return;
}
@ -792,13 +832,13 @@ void ARMv5::DataRead32S(u32 addr, u32* val)
if (addr < ITCMSize)
{
DataCycles += 1;
*val = *(u32*)&ITCM[addr & 0x7FFF];
*val = *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)];
return;
}
if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
{
DataCycles += 1;
*val = *(u32*)&DTCM[(addr - DTCMBase) & 0x3FFF];
*val = *(u32*)&DTCM[(addr - DTCMBase) & (DTCMPhysicalSize - 1)];
return;
}
@ -808,16 +848,21 @@ void ARMv5::DataRead32S(u32 addr, u32* val)
void ARMv5::DataWrite8(u32 addr, u8 val)
{
DataRegion = addr;
if (addr < ITCMSize)
{
DataCycles = 1;
*(u8*)&ITCM[addr & 0x7FFF] = val;
*(u8*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val;
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr);
#endif
return;
}
if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
{
DataCycles = 1;
*(u8*)&DTCM[(addr - DTCMBase) & 0x3FFF] = val;
*(u8*)&DTCM[(addr - DTCMBase) & (DTCMPhysicalSize - 1)] = val;
return;
}
@ -827,18 +872,23 @@ void ARMv5::DataWrite8(u32 addr, u8 val)
void ARMv5::DataWrite16(u32 addr, u16 val)
{
DataRegion = addr;
addr &= ~1;
if (addr < ITCMSize)
{
DataCycles = 1;
*(u16*)&ITCM[addr & 0x7FFF] = val;
*(u16*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val;
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr);
#endif
return;
}
if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
{
DataCycles = 1;
*(u16*)&DTCM[(addr - DTCMBase) & 0x3FFF] = val;
*(u16*)&DTCM[(addr - DTCMBase) & (DTCMPhysicalSize - 1)] = val;
return;
}
@ -848,18 +898,23 @@ void ARMv5::DataWrite16(u32 addr, u16 val)
void ARMv5::DataWrite32(u32 addr, u32 val)
{
DataRegion = addr;
addr &= ~3;
if (addr < ITCMSize)
{
DataCycles = 1;
*(u32*)&ITCM[addr & 0x7FFF] = val;
*(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val;
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr);
#endif
return;
}
if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
{
DataCycles = 1;
*(u32*)&DTCM[(addr - DTCMBase) & 0x3FFF] = val;
*(u32*)&DTCM[(addr - DTCMBase) & (DTCMPhysicalSize - 1)] = val;
return;
}
@ -874,13 +929,16 @@ void ARMv5::DataWrite32S(u32 addr, u32 val)
if (addr < ITCMSize)
{
DataCycles += 1;
*(u32*)&ITCM[addr & 0x7FFF] = val;
*(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val;
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr);
#endif
return;
}
if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
{
DataCycles += 1;
*(u32*)&DTCM[(addr - DTCMBase) & 0x3FFF] = val;
*(u32*)&DTCM[(addr - DTCMBase) & (DTCMPhysicalSize - 1)] = val;
return;
}

View File

@ -37,6 +37,14 @@ char DSiBIOS7Path[1024];
char DSiFirmwarePath[1024];
char DSiNANDPath[1024];
#ifdef JIT_ENABLED
int JIT_Enable = false;
int JIT_MaxBlockSize = 32;
int JIT_BranchOptimisations = 2;
int JIT_LiteralOptimisations = true;
int JIT_FastMemory = true;
#endif
ConfigEntry ConfigFile[] =
{
{"BIOS9Path", 1, BIOS9Path, 0, "", 1023},
@ -48,6 +56,14 @@ ConfigEntry ConfigFile[] =
{"DSiFirmwarePath", 1, DSiFirmwarePath, 0, "", 1023},
{"DSiNANDPath", 1, DSiNANDPath, 0, "", 1023},
#ifdef JIT_ENABLED
{"JIT_Enable", 0, &JIT_Enable, 0, NULL, 0},
{"JIT_MaxBlockSize", 0, &JIT_MaxBlockSize, 32, NULL, 0},
{"JIT_BranchOptimisations", 0, &JIT_BranchOptimisations, 2, NULL, 0},
{"JIT_LiteralOptimisations", 0, &JIT_LiteralOptimisations, 1, NULL, 0},
{"JIT_FastMemory", 0, &JIT_FastMemory, 1, NULL, 0},
#endif
{"", -1, NULL, 0, NULL, 0}
};

View File

@ -51,6 +51,14 @@ extern char DSiBIOS7Path[1024];
extern char DSiFirmwarePath[1024];
extern char DSiNANDPath[1024];
#ifdef JIT_ENABLED
extern int JIT_Enable;
extern int JIT_MaxBlockSize;
extern int JIT_BranchOptimisations;
extern int JIT_LiteralOptimisations;
extern int JIT_FastMemory;
#endif
}
#endif // CONFIG_H

View File

@ -26,6 +26,11 @@
#include "NDSCart.h"
#include "Platform.h"
#ifdef JIT_ENABLED
#include "ARMJIT.h"
#include "ARMJIT_Memory.h"
#endif
#include "DSi_NDMA.h"
#include "DSi_I2C.h"
#include "DSi_SD.h"
@ -34,15 +39,6 @@
#include "tiny-AES-c/aes.hpp"
namespace NDS
{
extern ARMv5* ARM9;
extern ARMv4* ARM7;
}
namespace DSi
{
@ -59,9 +55,9 @@ u8 ARM7iBIOS[0x10000];
u32 MBK[2][9];
u8 NWRAM_A[0x40000];
u8 NWRAM_B[0x40000];
u8 NWRAM_C[0x40000];
u8* NWRAM_A;
u8* NWRAM_B;
u8* NWRAM_C;
u8* NWRAMMap_A[2][4];
u8* NWRAMMap_B[3][8];
@ -86,6 +82,12 @@ u8 ARM7Init[0x3C00];
bool Init()
{
#ifndef JIT_ENABLED
NWRAM_A = new u8[NWRAMSize];
NWRAM_B = new u8[NWRAMSize];
NWRAM_C = new u8[NWRAMSize];
#endif
if (!DSi_I2C::Init()) return false;
if (!DSi_AES::Init()) return false;
@ -106,6 +108,12 @@ bool Init()
void DeInit()
{
#ifndef JIT_ENABLED
delete[] NWRAM_A;
delete[] NWRAM_B;
delete[] NWRAM_C;
#endif
DSi_I2C::DeInit();
DSi_AES::DeInit();
@ -176,7 +184,12 @@ void SoftReset()
NDS::ARM9->Reset();
NDS::ARM7->Reset();
NDS::ARM9->CP15Reset();
memcpy(NDS::ARM9->ITCM, ITCMInit, 0x8000);
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidateITCM();
#endif
DSi_AES::Reset();
@ -274,9 +287,9 @@ bool LoadNAND()
{
printf("Loading DSi NAND\n");
memset(NWRAM_A, 0, 0x40000);
memset(NWRAM_B, 0, 0x40000);
memset(NWRAM_C, 0, 0x40000);
memset(NWRAM_A, 0, NWRAMSize);
memset(NWRAM_B, 0, NWRAMSize);
memset(NWRAM_C, 0, NWRAMSize);
memset(MBK, 0, sizeof(MBK));
memset(NWRAMMap_A, 0, sizeof(NWRAMMap_A));
@ -527,6 +540,8 @@ void MapNWRAM_A(u32 num, u8 val)
return;
}
ARMJIT_Memory::RemapNWRAM(0);
int mbkn = 0, mbks = 8*num;
u8 oldval = (MBK[0][mbkn] >> mbks) & 0xFF;
@ -558,6 +573,8 @@ void MapNWRAM_B(u32 num, u8 val)
return;
}
ARMJIT_Memory::RemapNWRAM(1);
int mbkn = 1+(num>>2), mbks = 8*(num&3);
u8 oldval = (MBK[0][mbkn] >> mbks) & 0xFF;
@ -593,6 +610,8 @@ void MapNWRAM_C(u32 num, u8 val)
return;
}
ARMJIT_Memory::RemapNWRAM(2);
int mbkn = 3+(num>>2), mbks = 8*(num&3);
u8 oldval = (MBK[0][mbkn] >> mbks) & 0xFF;
@ -625,6 +644,8 @@ void MapNWRAMRange(u32 cpu, u32 num, u32 val)
u32 oldval = MBK[cpu][5+num];
if (oldval == val) return;
ARMJIT_Memory::RemapNWRAM(num);
MBK[cpu][5+num] = val;
// TODO: what happens when the ranges are 'out of range'????
@ -826,19 +847,31 @@ void ARM9Write8(u32 addr, u8 val)
if (addr >= NWRAMStart[0][0] && addr < NWRAMEnd[0][0])
{
u8* ptr = NWRAMMap_A[0][(addr >> 16) & NWRAMMask[0][0]];
if (ptr) *(u8*)&ptr[addr & 0xFFFF] = val;
if (ptr)
{
*(u8*)&ptr[addr & 0xFFFF] = val;
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_A>(addr);
}
return;
}
if (addr >= NWRAMStart[0][1] && addr < NWRAMEnd[0][1])
{
u8* ptr = NWRAMMap_B[0][(addr >> 15) & NWRAMMask[0][1]];
if (ptr) *(u8*)&ptr[addr & 0x7FFF] = val;
if (ptr)
{
*(u8*)&ptr[addr & 0x7FFF] = val;
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_B>(addr);
}
return;
}
if (addr >= NWRAMStart[0][2] && addr < NWRAMEnd[0][2])
{
u8* ptr = NWRAMMap_C[0][(addr >> 15) & NWRAMMask[0][2]];
if (ptr) *(u8*)&ptr[addr & 0x7FFF] = val;
if (ptr)
{
*(u8*)&ptr[addr & 0x7FFF] = val;
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_C>(addr);
}
return;
}
return NDS::ARM9Write8(addr, val);
@ -859,19 +892,31 @@ void ARM9Write16(u32 addr, u16 val)
if (addr >= NWRAMStart[0][0] && addr < NWRAMEnd[0][0])
{
u8* ptr = NWRAMMap_A[0][(addr >> 16) & NWRAMMask[0][0]];
if (ptr) *(u16*)&ptr[addr & 0xFFFF] = val;
if (ptr)
{
*(u16*)&ptr[addr & 0xFFFF] = val;
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_A>(addr);
}
return;
}
if (addr >= NWRAMStart[0][1] && addr < NWRAMEnd[0][1])
{
u8* ptr = NWRAMMap_B[0][(addr >> 15) & NWRAMMask[0][1]];
if (ptr) *(u16*)&ptr[addr & 0x7FFF] = val;
if (ptr)
{
*(u16*)&ptr[addr & 0x7FFF] = val;
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_B>(addr);
}
return;
}
if (addr >= NWRAMStart[0][2] && addr < NWRAMEnd[0][2])
{
u8* ptr = NWRAMMap_C[0][(addr >> 15) & NWRAMMask[0][2]];
if (ptr) *(u16*)&ptr[addr & 0x7FFF] = val;
if (ptr)
{
*(u16*)&ptr[addr & 0x7FFF] = val;
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_C>(addr);
}
return;
}
return NDS::ARM9Write16(addr, val);
@ -892,19 +937,31 @@ void ARM9Write32(u32 addr, u32 val)
if (addr >= NWRAMStart[0][0] && addr < NWRAMEnd[0][0])
{
u8* ptr = NWRAMMap_A[0][(addr >> 16) & NWRAMMask[0][0]];
if (ptr) *(u32*)&ptr[addr & 0xFFFF] = val;
if (ptr)
{
*(u32*)&ptr[addr & 0xFFFF] = val;
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_A>(addr);
}
return;
}
if (addr >= NWRAMStart[0][1] && addr < NWRAMEnd[0][1])
{
u8* ptr = NWRAMMap_B[0][(addr >> 15) & NWRAMMask[0][1]];
if (ptr) *(u32*)&ptr[addr & 0x7FFF] = val;
if (ptr)
{
*(u32*)&ptr[addr & 0x7FFF] = val;
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_B>(addr);
}
return;
}
if (addr >= NWRAMStart[0][2] && addr < NWRAMEnd[0][2])
{
u8* ptr = NWRAMMap_C[0][(addr >> 15) & NWRAMMask[0][2]];
if (ptr) *(u32*)&ptr[addr & 0x7FFF] = val;
if (ptr)
{
*(u32*)&ptr[addr & 0x7FFF] = val;
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_C>(addr);
}
return;
}
return NDS::ARM9Write32(addr, val);
@ -1085,19 +1142,37 @@ void ARM7Write8(u32 addr, u8 val)
if (addr >= NWRAMStart[1][0] && addr < NWRAMEnd[1][0])
{
u8* ptr = NWRAMMap_A[1][(addr >> 16) & NWRAMMask[1][0]];
if (ptr) *(u8*)&ptr[addr & 0xFFFF] = val;
if (ptr)
{
*(u8*)&ptr[addr & 0xFFFF] = val;
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_A>(addr);
#endif
}
return;
}
if (addr >= NWRAMStart[1][1] && addr < NWRAMEnd[1][1])
{
u8* ptr = NWRAMMap_B[1][(addr >> 15) & NWRAMMask[1][1]];
if (ptr) *(u8*)&ptr[addr & 0x7FFF] = val;
if (ptr)
{
*(u8*)&ptr[addr & 0x7FFF] = val;
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_B>(addr);
#endif
}
return;
}
if (addr >= NWRAMStart[1][2] && addr < NWRAMEnd[1][2])
{
u8* ptr = NWRAMMap_C[1][(addr >> 15) & NWRAMMask[1][2]];
if (ptr) *(u8*)&ptr[addr & 0x7FFF] = val;
if (ptr)
{
*(u8*)&ptr[addr & 0x7FFF] = val;
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_C>(addr);
#endif
}
return;
}
return NDS::ARM7Write8(addr, val);
@ -1118,19 +1193,31 @@ void ARM7Write16(u32 addr, u16 val)
if (addr >= NWRAMStart[1][0] && addr < NWRAMEnd[1][0])
{
u8* ptr = NWRAMMap_A[1][(addr >> 16) & NWRAMMask[1][0]];
if (ptr) *(u16*)&ptr[addr & 0xFFFF] = val;
if (ptr)
{
*(u16*)&ptr[addr & 0xFFFF] = val;
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_A>(addr);
}
return;
}
if (addr >= NWRAMStart[1][1] && addr < NWRAMEnd[1][1])
{
u8* ptr = NWRAMMap_B[1][(addr >> 15) & NWRAMMask[1][1]];
if (ptr) *(u16*)&ptr[addr & 0x7FFF] = val;
if (ptr)
{
*(u16*)&ptr[addr & 0x7FFF] = val;
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_B>(addr);
}
return;
}
if (addr >= NWRAMStart[1][2] && addr < NWRAMEnd[1][2])
{
u8* ptr = NWRAMMap_C[1][(addr >> 15) & NWRAMMask[1][2]];
if (ptr) *(u16*)&ptr[addr & 0x7FFF] = val;
if (ptr)
{
*(u16*)&ptr[addr & 0x7FFF] = val;
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_C>(addr);
}
return;
}
return NDS::ARM7Write16(addr, val);
@ -1151,19 +1238,31 @@ void ARM7Write32(u32 addr, u32 val)
if (addr >= NWRAMStart[1][0] && addr < NWRAMEnd[1][0])
{
u8* ptr = NWRAMMap_A[1][(addr >> 16) & NWRAMMask[1][0]];
if (ptr) *(u32*)&ptr[addr & 0xFFFF] = val;
if (ptr)
{
*(u32*)&ptr[addr & 0xFFFF] = val;
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_A>(addr);
}
return;
}
if (addr >= NWRAMStart[1][1] && addr < NWRAMEnd[1][1])
{
u8* ptr = NWRAMMap_B[1][(addr >> 15) & NWRAMMask[1][1]];
if (ptr) *(u32*)&ptr[addr & 0x7FFF] = val;
if (ptr)
{
*(u32*)&ptr[addr & 0x7FFF] = val;
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_B>(addr);
}
return;
}
if (addr >= NWRAMStart[1][2] && addr < NWRAMEnd[1][2])
{
u8* ptr = NWRAMMap_C[1][(addr >> 15) & NWRAMMask[1][2]];
if (ptr) *(u32*)&ptr[addr & 0x7FFF] = val;
if (ptr)
{
*(u32*)&ptr[addr & 0x7FFF] = val;
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_C>(addr);
}
return;
}
return NDS::ARM7Write32(addr, val);
@ -1521,7 +1620,7 @@ u8 ARM7IORead8(u32 addr)
case 0x04004501: return DSi_I2C::Cnt;
case 0x04004D00: if (SCFG_BIOS & (1<<10)) return 0; return ConsoleID & 0xFF;
case 0x04004D01: if (SCFG_BIOS & (1<<10)) return 0; return (ConsoleID >> 8) & 0xFF;
case 0x04004fD01: if (SCFG_BIOS & (1<<10)) return 0; return (ConsoleID >> 8) & 0xFF;
case 0x04004D02: if (SCFG_BIOS & (1<<10)) return 0; return (ConsoleID >> 16) & 0xFF;
case 0x04004D03: if (SCFG_BIOS & (1<<10)) return 0; return (ConsoleID >> 24) & 0xFF;
case 0x04004D04: if (SCFG_BIOS & (1<<10)) return 0; return (ConsoleID >> 32) & 0xFF;

View File

@ -25,6 +25,8 @@
namespace DSi
{
extern u16 SCFG_BIOS;
extern u8 ARM9iBIOS[0x10000];
extern u8 ARM7iBIOS[0x10000];
@ -34,6 +36,19 @@ extern u64 ConsoleID;
extern DSi_SDHost* SDMMC;
extern DSi_SDHost* SDIO;
const u32 NWRAMSize = 0x40000;
extern u8* NWRAM_A;
extern u8* NWRAM_B;
extern u8* NWRAM_C;
extern u8* NWRAMMap_A[2][4];
extern u8* NWRAMMap_B[3][8];
extern u8* NWRAMMap_C[3][8];
extern u32 NWRAMStart[2][3];
extern u32 NWRAMEnd[2][3];
extern u32 NWRAMMask[2][3];
bool Init();
void DeInit();

View File

@ -21,6 +21,7 @@
#include "DSi.h"
#include "DSi_I2C.h"
#include "DSi_Camera.h"
#include "ARM.h"
namespace DSi_BPTWL
@ -108,7 +109,8 @@ void Write(u8 val, bool last)
printf("BPTWL: soft-reset\n");
val = 0; // checkme
// TODO: soft-reset might need to be scheduled later!
DSi::SoftReset();
// TODO: this has been moved for the JIT to work, nothing is confirmed here
NDS::ARM7->Halt(4);
CurPos = -1;
return;
}

View File

@ -33,6 +33,11 @@
#include "AREngine.h"
#include "Platform.h"
#ifdef JIT_ENABLED
#include "ARMJIT.h"
#include "ARMJIT_Memory.h"
#endif
#include "DSi.h"
#include "DSi_SPI_TSC.h"
@ -93,17 +98,17 @@ u32 CPUStop;
u8 ARM9BIOS[0x1000];
u8 ARM7BIOS[0x4000];
u8 MainRAM[0x1000000];
u8* MainRAM;
u32 MainRAMMask;
u8 SharedWRAM[0x8000];
u8* SharedWRAM;
u8 WRAMCnt;
u8* SWRAM_ARM9;
u8* SWRAM_ARM7;
u32 SWRAM_ARM9Mask;
u32 SWRAM_ARM7Mask;
u8 ARM7WRAM[0x10000];
// putting them together so they're always next to each other
MemRegion SWRAM_ARM9;
MemRegion SWRAM_ARM7;
u8* ARM7WRAM;
u16 ExMemCnt[2];
@ -168,6 +173,14 @@ bool Init()
ARM9 = new ARMv5();
ARM7 = new ARMv4();
#ifdef JIT_ENABLED
ARMJIT::Init();
#else
MainRAM = new u8[0x1000000];
ARM7WRAM = new u8[ARM7WRAMSize];
SharedWRAM = new u8[SharedWRAMSize];
#endif
DMAs[0] = new DMA(0, 0);
DMAs[1] = new DMA(0, 1);
DMAs[2] = new DMA(0, 2);
@ -200,6 +213,10 @@ void DeInit()
delete ARM9;
delete ARM7;
#ifdef JIT_ENABLED
ARMJIT::DeInit();
#endif
for (int i = 0; i < 8; i++)
delete DMAs[i];
@ -249,11 +266,9 @@ void SetARM9RegionTimings(u32 addrstart, u32 addrend, int buswidth, int nonseq,
ARM9MemTimings[i][3] = S32;
}
addrstart <<= 14;
addrend <<= 14;
if (!addrend) addrend = 0xFFFFFFFF;
ARM9->UpdateRegionTimings(addrstart, addrend);
ARM9->UpdateRegionTimings(addrstart<<14, addrend == 0x40000
? 0xFFFFFFFF
: (addrend<<14));
}
void SetARM7RegionTimings(u32 addrstart, u32 addrend, int buswidth, int nonseq, int seq)
@ -478,6 +493,10 @@ void Reset()
printf("ARM7 BIOS loaded\n");
fclose(f);
}
#ifdef JIT_ENABLED
ARMJIT::Reset();
#endif
if (ConsoleType == 1)
{
@ -492,6 +511,10 @@ void Reset()
ARM9ClockShift = 1;
MainRAMMask = 0x3FFFFF;
}
// has to be called before InitTimings
// otherwise some PU settings are completely
// unitialised on the first run
ARM9->CP15Reset();
ARM9Timestamp = 0; ARM9Target = 0;
ARM7Timestamp = 0; ARM7Target = 0;
@ -499,7 +522,7 @@ void Reset()
InitTimings();
memset(MainRAM, 0, 0x1000000);
memset(MainRAM, 0, MainRAMMask + 1);
memset(SharedWRAM, 0, 0x8000);
memset(ARM7WRAM, 0, 0x10000);
@ -690,7 +713,7 @@ bool DoSavestate(Savestate* file)
file->VarArray(MainRAM, 0x400000);
file->VarArray(SharedWRAM, 0x8000);
file->VarArray(ARM7WRAM, 0x10000);
file->VarArray(ARM7WRAM, ARM7WRAMSize);
file->VarArray(ExMemCnt, 2*sizeof(u16));
file->VarArray(ROMSeed0, 2*8);
@ -787,6 +810,13 @@ bool DoSavestate(Savestate* file)
GPU::SetPowerCnt(PowerControl9);
}
#ifdef JIT_ENABLED
if (!file->Saving)
{
ARMJIT::ResetBlockCache();
}
#endif
return true;
}
@ -877,6 +907,7 @@ void RunSystem(u64 timestamp)
}
}
template <bool EnableJIT>
u32 RunFrame()
{
FrameStartTimestamp = SysTimestamp;
@ -910,7 +941,12 @@ u32 RunFrame()
}
else
{
ARM9->Execute();
#ifdef JIT_ENABLED
if (EnableJIT)
ARM9->ExecuteJIT();
else
#endif
ARM9->Execute();
}
RunTimers(0);
@ -933,7 +969,12 @@ u32 RunFrame()
}
else
{
ARM7->Execute();
#ifdef JIT_ENABLED
if (EnableJIT)
ARM7->ExecuteJIT();
else
#endif
ARM7->Execute();
}
RunTimers(1);
@ -963,6 +1004,16 @@ u32 RunFrame()
return GPU::TotalScanlines;
}
u32 RunFrame()
{
#ifdef JIT_ENABLED
if (Config::JIT_Enable)
return RunFrame<true>();
else
#endif
return RunFrame<false>();
}
void Reschedule(u64 target)
{
if (CurCPU == 0)
@ -1082,36 +1133,41 @@ void Halt()
void MapSharedWRAM(u8 val)
{
if (val == WRAMCnt)
return;
ARMJIT_Memory::RemapSWRAM();
WRAMCnt = val;
switch (WRAMCnt & 0x3)
{
case 0:
SWRAM_ARM9 = &SharedWRAM[0];
SWRAM_ARM9Mask = 0x7FFF;
SWRAM_ARM7 = NULL;
SWRAM_ARM7Mask = 0;
SWRAM_ARM9.Mem = &SharedWRAM[0];
SWRAM_ARM9.Mask = 0x7FFF;
SWRAM_ARM7.Mem = NULL;
SWRAM_ARM7.Mask = 0;
break;
case 1:
SWRAM_ARM9 = &SharedWRAM[0x4000];
SWRAM_ARM9Mask = 0x3FFF;
SWRAM_ARM7 = &SharedWRAM[0];
SWRAM_ARM7Mask = 0x3FFF;
SWRAM_ARM9.Mem = &SharedWRAM[0x4000];
SWRAM_ARM9.Mask = 0x3FFF;
SWRAM_ARM7.Mem = &SharedWRAM[0];
SWRAM_ARM7.Mask = 0x3FFF;
break;
case 2:
SWRAM_ARM9 = &SharedWRAM[0];
SWRAM_ARM9Mask = 0x3FFF;
SWRAM_ARM7 = &SharedWRAM[0x4000];
SWRAM_ARM7Mask = 0x3FFF;
SWRAM_ARM9.Mem = &SharedWRAM[0];
SWRAM_ARM9.Mask = 0x3FFF;
SWRAM_ARM7.Mem = &SharedWRAM[0x4000];
SWRAM_ARM7.Mask = 0x3FFF;
break;
case 3:
SWRAM_ARM9 = NULL;
SWRAM_ARM9Mask = 0;
SWRAM_ARM7 = &SharedWRAM[0];
SWRAM_ARM7Mask = 0x7FFF;
SWRAM_ARM9.Mem = NULL;
SWRAM_ARM9.Mask = 0;
SWRAM_ARM7.Mem = &SharedWRAM[0];
SWRAM_ARM7.Mask = 0x7FFF;
break;
}
}
@ -1166,9 +1222,9 @@ void UpdateIRQ(u32 cpu)
if (IME[cpu] & 0x1)
{
arm->IRQ = IE[cpu] & IF[cpu];
arm->IRQ = !!(IE[cpu] & IF[cpu]);
if ((ConsoleType == 1) && cpu)
arm->IRQ |= (IE2 & IF2);
arm->IRQ |= !!(IE2 & IF2);
}
else
{
@ -1787,9 +1843,9 @@ u8 ARM9Read8(u32 addr)
return *(u8*)&MainRAM[addr & MainRAMMask];
case 0x03000000:
if (SWRAM_ARM9)
if (SWRAM_ARM9.Mem)
{
return *(u8*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask];
return *(u8*)&SWRAM_ARM9.Mem[addr & SWRAM_ARM9.Mask];
}
else
{
@ -1852,9 +1908,9 @@ u16 ARM9Read16(u32 addr)
return *(u16*)&MainRAM[addr & MainRAMMask];
case 0x03000000:
if (SWRAM_ARM9)
if (SWRAM_ARM9.Mem)
{
return *(u16*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask];
return *(u16*)&SWRAM_ARM9.Mem[addr & SWRAM_ARM9.Mask];
}
else
{
@ -1917,9 +1973,9 @@ u32 ARM9Read32(u32 addr)
return *(u32*)&MainRAM[addr & MainRAMMask];
case 0x03000000:
if (SWRAM_ARM9)
if (SWRAM_ARM9.Mem)
{
return *(u32*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask];
return *(u32*)&SWRAM_ARM9.Mem[addr & SWRAM_ARM9.Mask];
}
else
{
@ -1974,13 +2030,19 @@ void ARM9Write8(u32 addr, u8 val)
switch (addr & 0xFF000000)
{
case 0x02000000:
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_MainRAM>(addr);
#endif
*(u8*)&MainRAM[addr & MainRAMMask] = val;
return;
case 0x03000000:
if (SWRAM_ARM9)
if (SWRAM_ARM9.Mem)
{
*(u8*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val;
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_SharedWRAM>(addr);
#endif
*(u8*)&SWRAM_ARM9.Mem[addr & SWRAM_ARM9.Mask] = val;
}
return;
@ -2024,13 +2086,19 @@ void ARM9Write16(u32 addr, u16 val)
switch (addr & 0xFF000000)
{
case 0x02000000:
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_MainRAM>(addr);
#endif
*(u16*)&MainRAM[addr & MainRAMMask] = val;
return;
case 0x03000000:
if (SWRAM_ARM9)
if (SWRAM_ARM9.Mem)
{
*(u16*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val;
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_SharedWRAM>(addr);
#endif
*(u16*)&SWRAM_ARM9.Mem[addr & SWRAM_ARM9.Mask] = val;
}
return;
@ -2044,13 +2112,16 @@ void ARM9Write16(u32 addr, u16 val)
return;
case 0x06000000:
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_VRAM>(addr);
#endif
switch (addr & 0x00E00000)
{
case 0x00000000: GPU::WriteVRAM_ABG<u16>(addr, val); return;
case 0x00200000: GPU::WriteVRAM_BBG<u16>(addr, val); return;
case 0x00400000: GPU::WriteVRAM_AOBJ<u16>(addr, val); return;
case 0x00600000: GPU::WriteVRAM_BOBJ<u16>(addr, val); return;
default: GPU::WriteVRAM_LCDC<u16>(addr, val); return;
default: GPU::WriteVRAM_LCDC<u16>(addr, val); return;
}
case 0x07000000:
@ -2090,13 +2161,19 @@ void ARM9Write32(u32 addr, u32 val)
switch (addr & 0xFF000000)
{
case 0x02000000:
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_MainRAM>(addr);
#endif
*(u32*)&MainRAM[addr & MainRAMMask] = val;
return ;
case 0x03000000:
if (SWRAM_ARM9)
if (SWRAM_ARM9.Mem)
{
*(u32*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val;
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_SharedWRAM>(addr);
#endif
*(u32*)&SWRAM_ARM9.Mem[addr & SWRAM_ARM9.Mask] = val;
}
return;
@ -2110,13 +2187,16 @@ void ARM9Write32(u32 addr, u32 val)
return;
case 0x06000000:
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_VRAM>(addr);
#endif
switch (addr & 0x00E00000)
{
case 0x00000000: GPU::WriteVRAM_ABG<u32>(addr, val); return;
case 0x00200000: GPU::WriteVRAM_BBG<u32>(addr, val); return;
case 0x00400000: GPU::WriteVRAM_AOBJ<u32>(addr, val); return;
case 0x00600000: GPU::WriteVRAM_BOBJ<u32>(addr, val); return;
default: GPU::WriteVRAM_LCDC<u32>(addr, val); return;
default: GPU::WriteVRAM_LCDC<u32>(addr, val); return;
}
case 0x07000000:
@ -2149,7 +2229,7 @@ void ARM9Write32(u32 addr, u32 val)
return;
}
printf("unknown arm9 write32 %08X %08X | %08X\n", addr, val, ARM9->R[15]);
//printf("unknown arm9 write32 %08X %08X | %08X\n", addr, val, ARM9->R[15]);
}
bool ARM9GetMemRegion(u32 addr, bool write, MemRegion* region)
@ -2162,10 +2242,10 @@ bool ARM9GetMemRegion(u32 addr, bool write, MemRegion* region)
return true;
case 0x03000000:
if (SWRAM_ARM9)
if (SWRAM_ARM9.Mem)
{
region->Mem = SWRAM_ARM9;
region->Mask = SWRAM_ARM9Mask;
region->Mem = SWRAM_ARM9.Mem;
region->Mask = SWRAM_ARM9.Mask;
return true;
}
break;
@ -2204,17 +2284,17 @@ u8 ARM7Read8(u32 addr)
return *(u8*)&MainRAM[addr & MainRAMMask];
case 0x03000000:
if (SWRAM_ARM7)
if (SWRAM_ARM7.Mem)
{
return *(u8*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask];
return *(u8*)&SWRAM_ARM7.Mem[addr & SWRAM_ARM7.Mask];
}
else
{
return *(u8*)&ARM7WRAM[addr & 0xFFFF];
return *(u8*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)];
}
case 0x03800000:
return *(u8*)&ARM7WRAM[addr & 0xFFFF];
return *(u8*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)];
case 0x04000000:
return ARM7IORead8(addr);
@ -2264,17 +2344,17 @@ u16 ARM7Read16(u32 addr)
return *(u16*)&MainRAM[addr & MainRAMMask];
case 0x03000000:
if (SWRAM_ARM7)
if (SWRAM_ARM7.Mem)
{
return *(u16*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask];
return *(u16*)&SWRAM_ARM7.Mem[addr & SWRAM_ARM7.Mask];
}
else
{
return *(u16*)&ARM7WRAM[addr & 0xFFFF];
return *(u16*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)];
}
case 0x03800000:
return *(u16*)&ARM7WRAM[addr & 0xFFFF];
return *(u16*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)];
case 0x04000000:
return ARM7IORead16(addr);
@ -2331,17 +2411,17 @@ u32 ARM7Read32(u32 addr)
return *(u32*)&MainRAM[addr & MainRAMMask];
case 0x03000000:
if (SWRAM_ARM7)
if (SWRAM_ARM7.Mem)
{
return *(u32*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask];
return *(u32*)&SWRAM_ARM7.Mem[addr & SWRAM_ARM7.Mask];
}
else
{
return *(u32*)&ARM7WRAM[addr & 0xFFFF];
return *(u32*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)];
}
case 0x03800000:
return *(u32*)&ARM7WRAM[addr & 0xFFFF];
return *(u32*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)];
case 0x04000000:
return ARM7IORead32(addr);
@ -2385,23 +2465,35 @@ void ARM7Write8(u32 addr, u8 val)
{
case 0x02000000:
case 0x02800000:
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_MainRAM>(addr);
#endif
*(u8*)&MainRAM[addr & MainRAMMask] = val;
return;
case 0x03000000:
if (SWRAM_ARM7)
if (SWRAM_ARM7.Mem)
{
*(u8*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val;
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_SharedWRAM>(addr);
#endif
*(u8*)&SWRAM_ARM7.Mem[addr & SWRAM_ARM7.Mask] = val;
return;
}
else
{
*(u8*)&ARM7WRAM[addr & 0xFFFF] = val;
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr);
#endif
*(u8*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)] = val;
return;
}
case 0x03800000:
*(u8*)&ARM7WRAM[addr & 0xFFFF] = val;
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr);
#endif
*(u8*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)] = val;
return;
case 0x04000000:
@ -2410,6 +2502,9 @@ void ARM7Write8(u32 addr, u8 val)
case 0x06000000:
case 0x06800000:
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_VWRAM>(addr);
#endif
GPU::WriteVRAM_ARM7<u8>(addr, val);
return;
@ -2444,23 +2539,35 @@ void ARM7Write16(u32 addr, u16 val)
{
case 0x02000000:
case 0x02800000:
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_MainRAM>(addr);
#endif
*(u16*)&MainRAM[addr & MainRAMMask] = val;
return;
case 0x03000000:
if (SWRAM_ARM7)
if (SWRAM_ARM7.Mem)
{
*(u16*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val;
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_SharedWRAM>(addr);
#endif
*(u16*)&SWRAM_ARM7.Mem[addr & SWRAM_ARM7.Mask] = val;
return;
}
else
{
*(u16*)&ARM7WRAM[addr & 0xFFFF] = val;
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr);
#endif
*(u16*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)] = val;
return;
}
case 0x03800000:
*(u16*)&ARM7WRAM[addr & 0xFFFF] = val;
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr);
#endif
*(u16*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)] = val;
return;
case 0x04000000:
@ -2477,6 +2584,9 @@ void ARM7Write16(u32 addr, u16 val)
case 0x06000000:
case 0x06800000:
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_VWRAM>(addr);
#endif
GPU::WriteVRAM_ARM7<u16>(addr, val);
return;
@ -2513,23 +2623,35 @@ void ARM7Write32(u32 addr, u32 val)
{
case 0x02000000:
case 0x02800000:
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_MainRAM>(addr);
#endif
*(u32*)&MainRAM[addr & MainRAMMask] = val;
return;
case 0x03000000:
if (SWRAM_ARM7)
if (SWRAM_ARM7.Mem)
{
*(u32*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val;
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_SharedWRAM>(addr);
#endif
*(u32*)&SWRAM_ARM7.Mem[addr & SWRAM_ARM7.Mask] = val;
return;
}
else
{
*(u32*)&ARM7WRAM[addr & 0xFFFF] = val;
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr);
#endif
*(u32*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)] = val;
return;
}
case 0x03800000:
*(u32*)&ARM7WRAM[addr & 0xFFFF] = val;
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr);
#endif
*(u32*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)] = val;
return;
case 0x04000000:
@ -2547,6 +2669,9 @@ void ARM7Write32(u32 addr, u32 val)
case 0x06000000:
case 0x06800000:
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_VWRAM>(addr);
#endif
GPU::WriteVRAM_ARM7<u32>(addr, val);
return;
@ -2594,17 +2719,17 @@ bool ARM7GetMemRegion(u32 addr, bool write, MemRegion* region)
// then access all the WRAM as one contiguous block starting at 0x037F8000
// this case needs a bit of a hack to cover
// it's not really worth bothering anyway
if (!SWRAM_ARM7)
if (!SWRAM_ARM7.Mem)
{
region->Mem = ARM7WRAM;
region->Mask = 0xFFFF;
region->Mask = ARM7WRAMSize-1;
return true;
}
break;
case 0x03800000:
region->Mem = ARM7WRAM;
region->Mask = 0xFFFF;
region->Mask = ARM7WRAMSize-1;
return true;
}

View File

@ -80,7 +80,7 @@ enum
IRQ_IPCSendDone,
IRQ_IPCRecv,
IRQ_CartSendDone, // TODO: less misleading name
IRQ_CartIREQMC, // IRQ triggered by game cart (example: Pokémon Typing Adventure, BT controller)
IRQ_CartIREQMC, // IRQ triggered by game cart (example: Pok<EFBFBD>mon Typing Adventure, BT controller)
IRQ_GXFIFO,
IRQ_LidOpen,
IRQ_SPI,
@ -134,6 +134,7 @@ typedef struct
} MemRegion;
extern int ConsoleType;
extern int CurCPU;
extern u8 ARM9MemTimings[0x40000][4];
extern u8 ARM7MemTimings[0x20000][4];
@ -161,11 +162,22 @@ extern u8 ARM9BIOS[0x1000];
extern u8 ARM7BIOS[0x4000];
extern u16 ARM7BIOSProt;
extern u8 MainRAM[0x1000000];
extern u8* MainRAM;
extern u32 MainRAMMask;
const u32 MainRAMMaxSize = 0x1000000;
const u32 SharedWRAMSize = 0x8000;
extern u8* SharedWRAM;
extern MemRegion SWRAM_ARM9;
extern MemRegion SWRAM_ARM7;
extern u32 KeyInput;
const u32 ARM7WRAMSize = 0x10000;
extern u8* ARM7WRAM;
bool Init();
void DeInit();
void Reset();

24
src/dolphin/Align.h Normal file
View File

@ -0,0 +1,24 @@
// This file is under the public domain.
#pragma once
#include <cstddef>
#include <type_traits>
namespace Common
{
template <typename T>
constexpr T AlignUp(T value, size_t size)
{
static_assert(std::is_unsigned<T>(), "T must be an unsigned value.");
return static_cast<T>(value + (size - value % size) % size);
}
template <typename T>
constexpr T AlignDown(T value, size_t size)
{
static_assert(std::is_unsigned<T>(), "T must be an unsigned value.");
return static_cast<T>(value - value % size);
}
} // namespace Common

4466
src/dolphin/Arm64Emitter.cpp Normal file

File diff suppressed because it is too large Load Diff

1151
src/dolphin/Arm64Emitter.h Normal file

File diff suppressed because it is too large Load Diff

27
src/dolphin/ArmCommon.h Normal file
View File

@ -0,0 +1,27 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "../types.h"
enum CCFlags
{
CC_EQ = 0, // Equal
CC_NEQ, // Not equal
CC_CS, // Carry Set
CC_CC, // Carry Clear
CC_MI, // Minus (Negative)
CC_PL, // Plus
CC_VS, // Overflow
CC_VC, // No Overflow
CC_HI, // Unsigned higher
CC_LS, // Unsigned lower or same
CC_GE, // Signed greater than or equal
CC_LT, // Signed less than
CC_GT, // Signed greater than
CC_LE, // Signed less than or equal
CC_AL, // Always (unconditional) 14
CC_HS = CC_CS, // Alias of CC_CS Unsigned higher or same
CC_LO = CC_CC, // Alias of CC_CC Unsigned lower
};
const u32 NO_COND = 0xE0000000;

218
src/dolphin/BitSet.h Normal file
View File

@ -0,0 +1,218 @@
// This file is under the public domain.
#pragma once
#include <cstddef>
#include <initializer_list>
#include <type_traits>
#include "../types.h"
#ifdef _WIN32
#include <intrin.h>
namespace Common
{
template <typename T>
constexpr int CountSetBits(T v)
{
// from https://graphics.stanford.edu/~seander/bithacks.html
// GCC has this built in, but MSVC's intrinsic will only emit the actual
// POPCNT instruction, which we're not depending on
v = v - ((v >> 1) & (T) ~(T)0 / 3);
v = (v & (T) ~(T)0 / 15 * 3) + ((v >> 2) & (T) ~(T)0 / 15 * 3);
v = (v + (v >> 4)) & (T) ~(T)0 / 255 * 15;
return (T)(v * ((T) ~(T)0 / 255)) >> (sizeof(T) - 1) * 8;
}
inline int LeastSignificantSetBit(u8 val)
{
unsigned long index;
_BitScanForward(&index, val);
return (int)index;
}
inline int LeastSignificantSetBit(u16 val)
{
unsigned long index;
_BitScanForward(&index, val);
return (int)index;
}
inline int LeastSignificantSetBit(u32 val)
{
unsigned long index;
_BitScanForward(&index, val);
return (int)index;
}
inline int LeastSignificantSetBit(u64 val)
{
unsigned long index;
_BitScanForward64(&index, val);
return (int)index;
}
#else
namespace Common
{
constexpr int CountSetBits(u8 val)
{
return __builtin_popcount(val);
}
constexpr int CountSetBits(u16 val)
{
return __builtin_popcount(val);
}
constexpr int CountSetBits(u32 val)
{
return __builtin_popcount(val);
}
constexpr int CountSetBits(u64 val)
{
return __builtin_popcountll(val);
}
inline int LeastSignificantSetBit(u8 val)
{
return __builtin_ctz(val);
}
inline int LeastSignificantSetBit(u16 val)
{
return __builtin_ctz(val);
}
inline int LeastSignificantSetBit(u32 val)
{
return __builtin_ctz(val);
}
inline int LeastSignificantSetBit(u64 val)
{
return __builtin_ctzll(val);
}
#endif
// Similar to std::bitset, this is a class which encapsulates a bitset, i.e.
// using the set bits of an integer to represent a set of integers. Like that
// class, it acts like an array of bools:
// BitSet32 bs;
// bs[1] = true;
// but also like the underlying integer ([0] = least significant bit):
// BitSet32 bs2 = ...;
// bs = (bs ^ bs2) & BitSet32(0xffff);
// The following additional functionality is provided:
// - Construction using an initializer list.
// BitSet bs { 1, 2, 4, 8 };
// - Efficiently iterating through the set bits:
// for (int i : bs)
// [i is the *index* of a set bit]
// (This uses the appropriate CPU instruction to find the next set bit in one
// operation.)
// - Counting set bits using .Count() - see comment on that method.
// TODO: use constexpr when MSVC gets out of the Dark Ages
template <typename IntTy>
class BitSet
{
static_assert(!std::is_signed<IntTy>::value, "BitSet should not be used with signed types");
public:
// A reference to a particular bit, returned from operator[].
class Ref
{
public:
constexpr Ref(Ref&& other) : m_bs(other.m_bs), m_mask(other.m_mask) {}
constexpr Ref(BitSet* bs, IntTy mask) : m_bs(bs), m_mask(mask) {}
constexpr operator bool() const { return (m_bs->m_val & m_mask) != 0; }
bool operator=(bool set)
{
m_bs->m_val = (m_bs->m_val & ~m_mask) | (set ? m_mask : 0);
return set;
}
private:
BitSet* m_bs;
IntTy m_mask;
};
// A STL-like iterator is required to be able to use range-based for loops.
class Iterator
{
public:
constexpr Iterator(const Iterator& other) : m_val(other.m_val), m_bit(other.m_bit) {}
constexpr Iterator(IntTy val, int bit) : m_val(val), m_bit(bit) {}
Iterator& operator=(Iterator other)
{
new (this) Iterator(other);
return *this;
}
Iterator& operator++()
{
if (m_val == 0)
{
m_bit = -1;
}
else
{
int bit = LeastSignificantSetBit(m_val);
m_val &= ~(1 << bit);
m_bit = bit;
}
return *this;
}
Iterator operator++(int)
{
Iterator other(*this);
++*this;
return other;
}
constexpr int operator*() const { return m_bit; }
constexpr bool operator==(Iterator other) const { return m_bit == other.m_bit; }
constexpr bool operator!=(Iterator other) const { return m_bit != other.m_bit; }
private:
IntTy m_val;
int m_bit;
};
constexpr BitSet() : m_val(0) {}
constexpr explicit BitSet(IntTy val) : m_val(val) {}
BitSet(std::initializer_list<int> init)
{
m_val = 0;
for (int bit : init)
m_val |= (IntTy)1 << bit;
}
constexpr static BitSet AllTrue(size_t count)
{
return BitSet(count == sizeof(IntTy) * 8 ? ~(IntTy)0 : (((IntTy)1 << count) - 1));
}
Ref operator[](size_t bit) { return Ref(this, (IntTy)1 << bit); }
constexpr const Ref operator[](size_t bit) const { return (*const_cast<BitSet*>(this))[bit]; }
constexpr bool operator==(BitSet other) const { return m_val == other.m_val; }
constexpr bool operator!=(BitSet other) const { return m_val != other.m_val; }
constexpr bool operator<(BitSet other) const { return m_val < other.m_val; }
constexpr bool operator>(BitSet other) const { return m_val > other.m_val; }
constexpr BitSet operator|(BitSet other) const { return BitSet(m_val | other.m_val); }
constexpr BitSet operator&(BitSet other) const { return BitSet(m_val & other.m_val); }
constexpr BitSet operator^(BitSet other) const { return BitSet(m_val ^ other.m_val); }
constexpr BitSet operator~() const { return BitSet(~m_val); }
constexpr BitSet operator<<(IntTy shift) const { return BitSet(m_val << shift); }
constexpr BitSet operator>>(IntTy shift) const { return BitSet(m_val >> shift); }
constexpr explicit operator bool() const { return m_val != 0; }
BitSet& operator|=(BitSet other) { return *this = *this | other; }
BitSet& operator&=(BitSet other) { return *this = *this & other; }
BitSet& operator^=(BitSet other) { return *this = *this ^ other; }
BitSet& operator<<=(IntTy shift) { return *this = *this << shift; }
BitSet& operator>>=(IntTy shift) { return *this = *this >> shift; }
// Warning: Even though on modern CPUs this is a single fast instruction,
// Dolphin's official builds do not currently assume POPCNT support on x86,
// so slower explicit bit twiddling is generated. Still should generally
// be faster than a loop.
constexpr unsigned int Count() const { return CountSetBits(m_val); }
constexpr Iterator begin() const { return ++Iterator(m_val, 0); }
constexpr Iterator end() const { return Iterator(m_val, -1); }
IntTy m_val;
};
} // namespace Common
using BitSet8 = Common::BitSet<u8>;
using BitSet16 = Common::BitSet<u16>;
using BitSet32 = Common::BitSet<u32>;
using BitSet64 = Common::BitSet<u64>;

254
src/dolphin/BitUtils.h Normal file
View File

@ -0,0 +1,254 @@
// Copyright 2017 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <climits>
#include <cstddef>
#include <cstring>
#include <type_traits>
namespace Common
{
///
/// Retrieves the size of a type in bits.
///
/// @tparam T Type to get the size of.
///
/// @return the size of the type in bits.
///
template <typename T>
constexpr size_t BitSize() noexcept
{
return sizeof(T) * CHAR_BIT;
}
///
/// Extracts a bit from a value.
///
/// @param src The value to extract a bit from.
/// @param bit The bit to extract.
///
/// @tparam T The type of the value.
///
/// @return The extracted bit.
///
template <typename T>
constexpr T ExtractBit(const T src, const size_t bit) noexcept
{
return (src >> bit) & static_cast<T>(1);
}
///
/// Extracts a bit from a value.
///
/// @param src The value to extract a bit from.
///
/// @tparam bit The bit to extract.
/// @tparam T The type of the value.
///
/// @return The extracted bit.
///
template <size_t bit, typename T>
constexpr T ExtractBit(const T src) noexcept
{
static_assert(bit < BitSize<T>(), "Specified bit must be within T's bit width.");
return ExtractBit(src, bit);
}
///
/// Extracts a range of bits from a value.
///
/// @param src The value to extract the bits from.
/// @param begin The beginning of the bit range. This is inclusive.
/// @param end The ending of the bit range. This is inclusive.
///
/// @tparam T The type of the value.
/// @tparam Result The returned result type. This is the unsigned analog
/// of a signed type if a signed type is passed as T.
///
/// @return The extracted bits.
///
template <typename T, typename Result = std::make_unsigned_t<T>>
constexpr Result ExtractBits(const T src, const size_t begin, const size_t end) noexcept
{
return static_cast<Result>(((static_cast<Result>(src) << ((BitSize<T>() - 1) - end)) >>
(BitSize<T>() - end + begin - 1)));
}
///
/// Extracts a range of bits from a value.
///
/// @param src The value to extract the bits from.
///
/// @tparam begin The beginning of the bit range. This is inclusive.
/// @tparam end The ending of the bit range. This is inclusive.
/// @tparam T The type of the value.
/// @tparam Result The returned result type. This is the unsigned analog
/// of a signed type if a signed type is passed as T.
///
/// @return The extracted bits.
///
template <size_t begin, size_t end, typename T, typename Result = std::make_unsigned_t<T>>
constexpr Result ExtractBits(const T src) noexcept
{
static_assert(begin < end, "Beginning bit must be less than the ending bit.");
static_assert(begin < BitSize<T>(), "Beginning bit is larger than T's bit width.");
static_assert(end < BitSize<T>(), "Ending bit is larger than T's bit width.");
return ExtractBits<T, Result>(src, begin, end);
}
///
/// Rotates a value left (ROL).
///
/// @param value The value to rotate.
/// @param amount The number of bits to rotate the value.
/// @tparam T An unsigned type.
///
/// @return The rotated value.
///
template <typename T>
constexpr T RotateLeft(const T value, size_t amount) noexcept
{
static_assert(std::is_unsigned<T>(), "Can only rotate unsigned types left.");
amount %= BitSize<T>();
if (amount == 0)
return value;
return static_cast<T>((value << amount) | (value >> (BitSize<T>() - amount)));
}
///
/// Rotates a value right (ROR).
///
/// @param value The value to rotate.
/// @param amount The number of bits to rotate the value.
/// @tparam T An unsigned type.
///
/// @return The rotated value.
///
template <typename T>
constexpr T RotateRight(const T value, size_t amount) noexcept
{
static_assert(std::is_unsigned<T>(), "Can only rotate unsigned types right.");
amount %= BitSize<T>();
if (amount == 0)
return value;
return static_cast<T>((value >> amount) | (value << (BitSize<T>() - amount)));
}
///
/// Verifies whether the supplied value is a valid bit mask of the form 0b00...0011...11.
/// Both edge cases of all zeros and all ones are considered valid masks, too.
///
/// @param mask The mask value to test for validity.
///
/// @tparam T The type of the value.
///
/// @return A bool indicating whether the mask is valid.
///
template <typename T>
constexpr bool IsValidLowMask(const T mask) noexcept
{
static_assert(std::is_integral<T>::value, "Mask must be an integral type.");
static_assert(std::is_unsigned<T>::value, "Signed masks can introduce hard to find bugs.");
// Can be efficiently determined without looping or bit counting. It's the counterpart
// to https://graphics.stanford.edu/~seander/bithacks.html#DetermineIfPowerOf2
// and doesn't require special casing either edge case.
return (mask & (mask + 1)) == 0;
}
///
/// Reinterpret objects of one type as another by bit-casting between object representations.
///
/// @remark This is the example implementation of std::bit_cast which is to be included
/// in C++2a. See http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/p0476r2.html
/// for more details. The only difference is this variant is not constexpr,
/// as the mechanism for bit_cast requires a compiler built-in to have that quality.
///
/// @param source The source object to convert to another representation.
///
/// @tparam To The type to reinterpret source as.
/// @tparam From The initial type representation of source.
///
/// @return The representation of type From as type To.
///
/// @pre Both To and From types must be the same size
/// @pre Both To and From types must satisfy the TriviallyCopyable concept.
///
template <typename To, typename From>
inline To BitCast(const From& source) noexcept
{
static_assert(sizeof(From) == sizeof(To),
"BitCast source and destination types must be equal in size.");
static_assert(std::is_trivially_copyable<From>(),
"BitCast source type must be trivially copyable.");
static_assert(std::is_trivially_copyable<To>(),
"BitCast destination type must be trivially copyable.");
std::aligned_storage_t<sizeof(To), alignof(To)> storage;
std::memcpy(&storage, &source, sizeof(storage));
return reinterpret_cast<To&>(storage);
}
template <typename T, typename PtrType>
class BitCastPtrType
{
public:
static_assert(std::is_trivially_copyable<PtrType>(),
"BitCastPtr source type must be trivially copyable.");
static_assert(std::is_trivially_copyable<T>(),
"BitCastPtr destination type must be trivially copyable.");
explicit BitCastPtrType(PtrType* ptr) : m_ptr(ptr) {}
// Enable operator= only for pointers to non-const data
template <typename S>
inline typename std::enable_if<std::is_same<S, T>() && !std::is_const<PtrType>()>::type
operator=(const S& source)
{
std::memcpy(m_ptr, &source, sizeof(source));
}
inline operator T() const
{
T result;
std::memcpy(&result, m_ptr, sizeof(result));
return result;
}
private:
PtrType* m_ptr;
};
// Provides an aliasing-safe alternative to reinterpret_cast'ing pointers to structs
// Conversion constructor and operator= provided for a convenient syntax.
// Usage: MyStruct s = BitCastPtr<MyStruct>(some_ptr);
// BitCastPtr<MyStruct>(some_ptr) = s;
template <typename T, typename PtrType>
inline auto BitCastPtr(PtrType* ptr) noexcept -> BitCastPtrType<T, PtrType>
{
return BitCastPtrType<T, PtrType>{ptr};
}
template <typename T>
void SetBit(T& value, size_t bit_number, bool bit_value)
{
static_assert(std::is_unsigned<T>(), "SetBit is only sane on unsigned types.");
if (bit_value)
value |= (T{1} << bit_number);
else
value &= ~(T{1} << bit_number);
}
} // namespace Common

76
src/dolphin/CPUDetect.h Normal file
View File

@ -0,0 +1,76 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license_dolphin.txt file included.
// Detect the CPU, so we'll know which optimizations to use
#pragma once
#include <string>
enum class CPUVendor
{
Intel,
AMD,
ARM,
Other,
};
struct CPUInfo
{
CPUVendor vendor = CPUVendor::Intel;
char cpu_string[0x41] = {};
char brand_string[0x21] = {};
bool OS64bit = false;
bool CPU64bit = false;
bool Mode64bit = false;
bool HTT = false;
int num_cores = 0;
int logical_cpu_count = 0;
bool bSSE = false;
bool bSSE2 = false;
bool bSSE3 = false;
bool bSSSE3 = false;
bool bPOPCNT = false;
bool bSSE4_1 = false;
bool bSSE4_2 = false;
bool bLZCNT = false;
bool bSSE4A = false;
bool bAVX = false;
bool bAVX2 = false;
bool bBMI1 = false;
bool bBMI2 = false;
bool bFMA = false;
bool bFMA4 = false;
bool bAES = false;
// FXSAVE/FXRSTOR
bool bFXSR = false;
bool bMOVBE = false;
// This flag indicates that the hardware supports some mode
// in which denormal inputs _and_ outputs are automatically set to (signed) zero.
bool bFlushToZero = false;
bool bLAHFSAHF64 = false;
bool bLongMode = false;
bool bAtom = false;
// ARMv8 specific
bool bFP = false;
bool bASIMD = false;
bool bCRC32 = false;
bool bSHA1 = false;
bool bSHA2 = false;
// Call Detect()
explicit CPUInfo();
// Turn the CPU info into a string we can show
std::string Summarize();
private:
// Detects the various CPU features
void Detect();
};
extern CPUInfo cpu_info;

View File

@ -0,0 +1,52 @@
// Copyright 2009 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license_dolphin.txt file included.
#include <cstddef>
#include <cstring>
#include <errno.h>
#include <type_traits>
#include "CommonFuncs.h"
#ifdef _WIN32
#include <windows.h>
#define strerror_r(err, buf, len) strerror_s(buf, len, err)
#endif
constexpr size_t BUFFER_SIZE = 256;
// Wrapper function to get last strerror(errno) string.
// This function might change the error code.
std::string LastStrerrorString()
{
char error_message[BUFFER_SIZE];
// There are two variants of strerror_r. The XSI version stores the message to the passed-in
// buffer and returns an int (0 on success). The GNU version returns a pointer to the message,
// which might have been stored in the passed-in buffer or might be a static string.
// We check defines in order to figure out variant is in use, and we store the returned value
// to a variable so that we'll get a compile-time check that our assumption was correct.
#if defined(__GLIBC__) && (_GNU_SOURCE || (_POSIX_C_SOURCE < 200112L && _XOPEN_SOURCE < 600))
const char* str = strerror_r(errno, error_message, BUFFER_SIZE);
return std::string(str);
#else
int error_code = strerror_r(errno, error_message, BUFFER_SIZE);
return error_code == 0 ? std::string(error_message) : "";
#endif
}
#ifdef _WIN32
// Wrapper function to get GetLastError() string.
// This function might change the error code.
std::string GetLastErrorString()
{
char error_message[BUFFER_SIZE];
FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, nullptr, GetLastError(),
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), error_message, BUFFER_SIZE, nullptr);
return std::string(error_message);
}
#endif

58
src/dolphin/CommonFuncs.h Normal file
View File

@ -0,0 +1,58 @@
// Copyright 2009 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license_dolphin.txt file included.
#pragma once
#include <cstddef>
#include <string>
#include "../types.h"
// Will fail to compile on a non-array:
template <typename T, size_t N>
constexpr size_t ArraySize(T (&arr)[N])
{
return N;
}
#ifndef _WIN32
// go to debugger mode
#define Crash() \
{ \
__builtin_trap(); \
}
#else // WIN32
// Function Cross-Compatibility
#define strcasecmp _stricmp
#define strncasecmp _strnicmp
#define unlink _unlink
#define vscprintf _vscprintf
// 64 bit offsets for Windows
#define fseeko _fseeki64
#define ftello _ftelli64
#define atoll _atoi64
#define stat _stat64
#define fstat _fstat64
#define fileno _fileno
extern "C" {
__declspec(dllimport) void __stdcall DebugBreak(void);
}
#define Crash() \
{ \
DebugBreak(); \
}
#endif // WIN32 ndef
// Wrapper function to get last strerror(errno) string.
// This function might change the error code.
std::string LastStrerrorString();
#ifdef _WIN32
// Wrapper function to get GetLastError() string.
// This function might change the error code.
std::string GetLastErrorString();
#endif

75
src/dolphin/Compat.h Normal file
View File

@ -0,0 +1,75 @@
// Stubs for Assert.h and Log.h
#pragma once
#include <assert.h>
// Assert stub
#define ASSERT_MSG(_t_, _a_, _fmt_, ...) \
assert(_a_) \
/*do \
{ \
if (!(_a_)) \
{ \
if (!PanicYesNo(_fmt_, ##__VA_ARGS__)) \
Crash(); \
} \
} while (0)*/
#define DEBUG_ASSERT_MSG(_t_, _a_, _msg_, ...) \
assert(_a_); \
/*do \
{ \
if (MAX_LOGLEVEL >= LogTypes::LOG_LEVELS::LDEBUG && !(_a_)) \
{ \
ERROR_LOG(_t_, _msg_, ##__VA_ARGS__); \
if (!PanicYesNo(_msg_, ##__VA_ARGS__)) \
Crash(); \
} \
} while (0)*/
#define ASSERT(_a_) \
assert(_a_) \
/*do \
{ \
ASSERT_MSG(MASTER_LOG, _a_, \
_trans("An error occurred.\n\n Line: %d\n File: %s\n\nIgnore and continue?"), \
__LINE__, __FILE__); \
} while (0)*/
#define DEBUG_ASSERT(_a_) \
assert(_a_) \
/*do \
{ \
if (MAX_LOGLEVEL >= LogTypes::LOG_LEVELS::LDEBUG) \
ASSERT(_a_); \
} while (0)*/
// Log Stub
#include <cstdio>
#define PanicAlert(fmt, ...) \
do \
{ \
printf(fmt "\n", ## __VA_ARGS__); \
abort(); \
} while (false)
#define DYNA_REC 0
#define ERROR_LOG(which, fmt, ...) \
do \
{ \
printf(fmt "\n", ## __VA_ARGS__); \
} while (false)
#if __cplusplus < 201703L
// cheat
namespace std
{
template <typename T>
T clamp(const T& v, const T& lo, const T& hi)
{
return v < lo ? lo : (v > hi ? hi : v);
}
}
#endif

13
src/dolphin/MathUtil.cpp Normal file
View File

@ -0,0 +1,13 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "MathUtil.h"
#include <numeric>
// Calculate sum of a float list
float MathFloatVectorSum(const std::vector<float>& Vec)
{
return std::accumulate(Vec.begin(), Vec.end(), 0.0f);
}

121
src/dolphin/MathUtil.h Normal file
View File

@ -0,0 +1,121 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <algorithm>
#include <vector>
#include "Compat.h"
#include "../types.h"
#ifdef _MSC_VER
#include <intrin.h>
#endif
namespace MathUtil
{
constexpr double TAU = 6.2831853071795865;
constexpr double PI = TAU / 2;
template <typename T>
constexpr auto Sign(const T& val) -> decltype((T{} < val) - (val < T{}))
{
return (T{} < val) - (val < T{});
}
template <typename T, typename F>
constexpr auto Lerp(const T& x, const T& y, const F& a) -> decltype(x + (y - x) * a)
{
return x + (y - x) * a;
}
template <typename T>
constexpr bool IsPow2(T imm)
{
return imm > 0 && (imm & (imm - 1)) == 0;
}
constexpr u32 NextPowerOf2(u32 value)
{
--value;
value |= value >> 1;
value |= value >> 2;
value |= value >> 4;
value |= value >> 8;
value |= value >> 16;
++value;
return value;
}
template <class T>
struct Rectangle
{
T left{};
T top{};
T right{};
T bottom{};
constexpr Rectangle() = default;
constexpr Rectangle(T theLeft, T theTop, T theRight, T theBottom)
: left(theLeft), top(theTop), right(theRight), bottom(theBottom)
{
}
constexpr bool operator==(const Rectangle& r) const
{
return left == r.left && top == r.top && right == r.right && bottom == r.bottom;
}
T GetWidth() const { return abs(right - left); }
T GetHeight() const { return abs(bottom - top); }
// If the rectangle is in a coordinate system with a lower-left origin, use
// this Clamp.
void ClampLL(T x1, T y1, T x2, T y2)
{
left = std::clamp(left, x1, x2);
right = std::clamp(right, x1, x2);
top = std::clamp(top, y2, y1);
bottom = std::clamp(bottom, y2, y1);
}
// If the rectangle is in a coordinate system with an upper-left origin,
// use this Clamp.
void ClampUL(T x1, T y1, T x2, T y2)
{
left = std::clamp(left, x1, x2);
right = std::clamp(right, x1, x2);
top = std::clamp(top, y1, y2);
bottom = std::clamp(bottom, y1, y2);
}
};
} // namespace MathUtil
float MathFloatVectorSum(const std::vector<float>&);
// Rounds down. 0 -> undefined
inline int IntLog2(u64 val)
{
#if defined(__GNUC__)
return 63 - __builtin_clzll(val);
#elif defined(_MSC_VER)
unsigned long result = ULONG_MAX;
_BitScanReverse64(&result, val);
return result;
#else
int result = -1;
while (val != 0)
{
val >>= 1;
++result;
}
return result;
#endif
}

View File

@ -0,0 +1,339 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Lesser General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License.

119
src/dolphin/x64ABI.cpp Normal file
View File

@ -0,0 +1,119 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license_dolphin.txt file included.
#include "../types.h"
#include "x64ABI.h"
#include "x64Emitter.h"
using namespace Gen;
// Shared code between Win64 and Unix64
void XEmitter::ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size,
size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp)
{
size_t shadow = 0;
#if defined(_WIN32)
shadow = 0x20;
#endif
int count = (mask & ABI_ALL_GPRS).Count();
rsp_alignment -= count * 8;
size_t subtraction = 0;
int fpr_count = (mask & ABI_ALL_FPRS).Count();
if (fpr_count)
{
// If we have any XMMs to save, we must align the stack here.
subtraction = rsp_alignment & 0xf;
}
subtraction += 16 * fpr_count;
size_t xmm_base_subtraction = subtraction;
subtraction += needed_frame_size;
subtraction += shadow;
// Final alignment.
rsp_alignment -= subtraction;
subtraction += rsp_alignment & 0xf;
*shadowp = shadow;
*subtractionp = subtraction;
*xmm_offsetp = subtraction - xmm_base_subtraction;
}
size_t XEmitter::ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment,
size_t needed_frame_size)
{
size_t shadow, subtraction, xmm_offset;
ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction,
&xmm_offset);
for (int r : mask& ABI_ALL_GPRS)
PUSH((X64Reg)r);
if (subtraction)
SUB(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction));
for (int x : mask& ABI_ALL_FPRS)
{
MOVAPD(MDisp(RSP, (int)xmm_offset), (X64Reg)(x - 16));
xmm_offset += 16;
}
return shadow;
}
void XEmitter::ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment,
size_t needed_frame_size)
{
size_t shadow, subtraction, xmm_offset;
ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction,
&xmm_offset);
for (int x : mask& ABI_ALL_FPRS)
{
MOVAPD((X64Reg)(x - 16), MDisp(RSP, (int)xmm_offset));
xmm_offset += 16;
}
if (subtraction)
ADD(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction));
for (int r = 15; r >= 0; r--)
{
if (mask[r])
POP((X64Reg)r);
}
}
void XEmitter::MOVTwo(int bits, Gen::X64Reg dst1, Gen::X64Reg src1, s32 offset1, Gen::X64Reg dst2,
Gen::X64Reg src2)
{
if (dst1 == src2 && dst2 == src1)
{
XCHG(bits, R(src1), R(src2));
if (offset1)
ADD(bits, R(dst1), Imm32(offset1));
}
else if (src2 != dst1)
{
if (dst1 != src1 && offset1)
LEA(bits, dst1, MDisp(src1, offset1));
else if (dst1 != src1)
MOV(bits, R(dst1), R(src1));
else if (offset1)
ADD(bits, R(dst1), Imm32(offset1));
if (dst2 != src2)
MOV(bits, R(dst2), R(src2));
}
else
{
if (dst2 != src2)
MOV(bits, R(dst2), R(src2));
if (dst1 != src1 && offset1)
LEA(bits, dst1, MDisp(src1, offset1));
else if (dst1 != src1)
MOV(bits, R(dst1), R(src1));
else if (offset1)
ADD(bits, R(dst1), Imm32(offset1));
}
}

58
src/dolphin/x64ABI.h Normal file
View File

@ -0,0 +1,58 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license_dolphin.txt file included.
#pragma once
#include "BitSet.h"
#include "x64Reg.h"
// x64 ABI:s, and helpers to help follow them when JIT-ing code.
// All convensions return values in EAX (+ possibly EDX).
// Windows 64-bit
// * 4-reg "fastcall" variant, very new-skool stack handling
// * Callee moves stack pointer, to make room for shadow regs for the biggest function _it itself
// calls_
// * Parameters passed in RCX, RDX, ... further parameters are MOVed into the allocated stack space.
// Scratch: RAX RCX RDX R8 R9 R10 R11
// Callee-save: RBX RSI RDI RBP R12 R13 R14 R15
// Parameters: RCX RDX R8 R9, further MOV-ed
// Linux 64-bit
// * 6-reg "fastcall" variant, old skool stack handling (parameters are pushed)
// Scratch: RAX RCX RDX RSI RDI R8 R9 R10 R11
// Callee-save: RBX RBP R12 R13 R14 R15
// Parameters: RDI RSI RDX RCX R8 R9
#define ABI_ALL_FPRS BitSet32(0xffff0000)
#define ABI_ALL_GPRS BitSet32(0x0000ffff)
#ifdef _WIN32 // 64-bit Windows - the really exotic calling convention
#define ABI_PARAM1 RCX
#define ABI_PARAM2 RDX
#define ABI_PARAM3 R8
#define ABI_PARAM4 R9
// xmm0-xmm15 use the upper 16 bits in the functions that push/pop registers.
#define ABI_ALL_CALLER_SAVED \
(BitSet32{RAX, RCX, RDX, R8, R9, R10, R11, XMM0 + 16, XMM1 + 16, XMM2 + 16, XMM3 + 16, \
XMM4 + 16, XMM5 + 16})
#else // 64-bit Unix / OS X
#define ABI_PARAM1 RDI
#define ABI_PARAM2 RSI
#define ABI_PARAM3 RDX
#define ABI_PARAM4 RCX
#define ABI_PARAM5 R8
#define ABI_PARAM6 R9
// FIXME: avoid pushing all 16 XMM registers when possible? most functions we call probably
// don't actually clobber them.
#define ABI_ALL_CALLER_SAVED (BitSet32{RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11} | ABI_ALL_FPRS)
#endif // WIN32
#define ABI_ALL_CALLEE_SAVED (~ABI_ALL_CALLER_SAVED)
#define ABI_RETURN RAX

View File

@ -0,0 +1,273 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license_dolphin.txt file included.
#include <cstring>
#include <string>
#include "CPUDetect.h"
#include "../types.h"
#ifndef _MSVC_VER
#ifdef __FreeBSD__
#include <unistd.h>
#include <machine/cpufunc.h>
#include <sys/types.h>
#endif
static inline void __cpuidex(int info[4], int function_id, int subfunction_id)
{
#ifdef __FreeBSD__
// Despite the name, this is just do_cpuid() with ECX as second input.
cpuid_count((u_int)function_id, (u_int)subfunction_id, (u_int*)info);
#else
info[0] = function_id; // eax
info[2] = subfunction_id; // ecx
__asm__("cpuid"
: "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3])
: "a"(function_id), "c"(subfunction_id));
#endif
}
static inline void __cpuid(int info[4], int function_id)
{
return __cpuidex(info, function_id, 0);
}
#endif // ifndef _WIN32
#ifdef _MSVC_VER
static u64 xgetbv(u32 index)
{
return _xgetbv(index);
}
constexpr u32 XCR_XFEATURE_ENABLED_MASK = _XCR_XFEATURE_ENABLED_MASK;
#else
static u64 xgetbv(u32 index)
{
u32 eax, edx;
__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
return ((u64)edx << 32) | eax;
}
constexpr u32 XCR_XFEATURE_ENABLED_MASK = 0;
#endif // ifdef _WIN32
CPUInfo cpu_info;
CPUInfo::CPUInfo()
{
Detect();
}
// Detects the various CPU features
void CPUInfo::Detect()
{
#ifdef _M_X86_64
Mode64bit = true;
OS64bit = true;
#endif
num_cores = 1;
// Set obvious defaults, for extra safety
if (Mode64bit)
{
bSSE = true;
bSSE2 = true;
bLongMode = true;
}
// Assume CPU supports the CPUID instruction. Those that don't can barely
// boot modern OS:es anyway.
int cpu_id[4];
// Detect CPU's CPUID capabilities, and grab CPU string
__cpuid(cpu_id, 0x00000000);
u32 max_std_fn = cpu_id[0]; // EAX
std::memcpy(&brand_string[0], &cpu_id[1], sizeof(int));
std::memcpy(&brand_string[4], &cpu_id[3], sizeof(int));
std::memcpy(&brand_string[8], &cpu_id[2], sizeof(int));
__cpuid(cpu_id, 0x80000000);
u32 max_ex_fn = cpu_id[0];
if (!strcmp(brand_string, "GenuineIntel"))
vendor = CPUVendor::Intel;
else if (!strcmp(brand_string, "AuthenticAMD"))
vendor = CPUVendor::AMD;
else
vendor = CPUVendor::Other;
// Set reasonable default brand string even if brand string not available.
strcpy(cpu_string, brand_string);
// Detect family and other misc stuff.
bool ht = false;
HTT = ht;
logical_cpu_count = 1;
if (max_std_fn >= 1)
{
__cpuid(cpu_id, 0x00000001);
int family = ((cpu_id[0] >> 8) & 0xf) + ((cpu_id[0] >> 20) & 0xff);
int model = ((cpu_id[0] >> 4) & 0xf) + ((cpu_id[0] >> 12) & 0xf0);
// Detect people unfortunate enough to be running Dolphin on an Atom
if (family == 6 &&
(model == 0x1C || model == 0x26 || model == 0x27 || model == 0x35 || model == 0x36 ||
model == 0x37 || model == 0x4A || model == 0x4D || model == 0x5A || model == 0x5D))
bAtom = true;
logical_cpu_count = (cpu_id[1] >> 16) & 0xFF;
ht = (cpu_id[3] >> 28) & 1;
if ((cpu_id[3] >> 25) & 1)
bSSE = true;
if ((cpu_id[3] >> 26) & 1)
bSSE2 = true;
if ((cpu_id[2]) & 1)
bSSE3 = true;
if ((cpu_id[2] >> 9) & 1)
bSSSE3 = true;
if ((cpu_id[2] >> 19) & 1)
bSSE4_1 = true;
if ((cpu_id[2] >> 20) & 1)
bSSE4_2 = true;
if ((cpu_id[2] >> 22) & 1)
bMOVBE = true;
if ((cpu_id[2] >> 25) & 1)
bAES = true;
if ((cpu_id[3] >> 24) & 1)
{
// We can use FXSAVE.
bFXSR = true;
}
// AVX support requires 3 separate checks:
// - Is the AVX bit set in CPUID?
// - Is the XSAVE bit set in CPUID?
// - XGETBV result has the XCR bit set.
if (((cpu_id[2] >> 28) & 1) && ((cpu_id[2] >> 27) & 1))
{
if ((xgetbv(XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6)
{
bAVX = true;
if ((cpu_id[2] >> 12) & 1)
bFMA = true;
}
}
if (max_std_fn >= 7)
{
__cpuidex(cpu_id, 0x00000007, 0x00000000);
// careful; we can't enable AVX2 unless the XSAVE/XGETBV checks above passed
if ((cpu_id[1] >> 5) & 1)
bAVX2 = bAVX;
if ((cpu_id[1] >> 3) & 1)
bBMI1 = true;
if ((cpu_id[1] >> 8) & 1)
bBMI2 = true;
}
}
bFlushToZero = bSSE;
if (max_ex_fn >= 0x80000004)
{
// Extract CPU model string
__cpuid(cpu_id, 0x80000002);
memcpy(cpu_string, cpu_id, sizeof(cpu_id));
__cpuid(cpu_id, 0x80000003);
memcpy(cpu_string + 16, cpu_id, sizeof(cpu_id));
__cpuid(cpu_id, 0x80000004);
memcpy(cpu_string + 32, cpu_id, sizeof(cpu_id));
}
if (max_ex_fn >= 0x80000001)
{
// Check for more features.
__cpuid(cpu_id, 0x80000001);
if (cpu_id[2] & 1)
bLAHFSAHF64 = true;
if ((cpu_id[2] >> 5) & 1)
bLZCNT = true;
if ((cpu_id[2] >> 16) & 1)
bFMA4 = true;
if ((cpu_id[3] >> 29) & 1)
bLongMode = true;
}
num_cores = (logical_cpu_count == 0) ? 1 : logical_cpu_count;
if (max_ex_fn >= 0x80000008)
{
// Get number of cores. This is a bit complicated. Following AMD manual here.
__cpuid(cpu_id, 0x80000008);
int apic_id_core_id_size = (cpu_id[2] >> 12) & 0xF;
if (apic_id_core_id_size == 0)
{
if (ht)
{
// New mechanism for modern Intel CPUs.
if (vendor == CPUVendor::Intel)
{
__cpuidex(cpu_id, 0x00000004, 0x00000000);
int cores_x_package = ((cpu_id[0] >> 26) & 0x3F) + 1;
HTT = (cores_x_package < logical_cpu_count);
cores_x_package = ((logical_cpu_count % cores_x_package) == 0) ? cores_x_package : 1;
num_cores = (cores_x_package > 1) ? cores_x_package : num_cores;
logical_cpu_count /= cores_x_package;
}
}
}
else
{
// Use AMD's new method.
num_cores = (cpu_id[2] & 0xFF) + 1;
}
}
}
// Turn the CPU info into a string we can show
std::string CPUInfo::Summarize()
{
std::string sum(cpu_string);
sum += " (";
sum += brand_string;
sum += ")";
if (bSSE)
sum += ", SSE";
if (bSSE2)
{
sum += ", SSE2";
if (!bFlushToZero)
sum += " (but not DAZ!)";
}
if (bSSE3)
sum += ", SSE3";
if (bSSSE3)
sum += ", SSSE3";
if (bSSE4_1)
sum += ", SSE4.1";
if (bSSE4_2)
sum += ", SSE4.2";
if (HTT)
sum += ", HTT";
if (bAVX)
sum += ", AVX";
if (bAVX2)
sum += ", AVX2";
if (bBMI1)
sum += ", BMI1";
if (bBMI2)
sum += ", BMI2";
if (bFMA)
sum += ", FMA";
if (bAES)
sum += ", AES";
if (bMOVBE)
sum += ", MOVBE";
if (bLongMode)
sum += ", 64-bit support";
return sum;
}

3399
src/dolphin/x64Emitter.cpp Normal file

File diff suppressed because it is too large Load Diff

1169
src/dolphin/x64Emitter.h Normal file

File diff suppressed because it is too large Load Diff

96
src/dolphin/x64Reg.h Normal file
View File

@ -0,0 +1,96 @@
// Copyright 2016 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license_dolphin.txt file included.
#pragma once
namespace Gen
{
enum X64Reg
{
EAX = 0,
EBX = 3,
ECX = 1,
EDX = 2,
ESI = 6,
EDI = 7,
EBP = 5,
ESP = 4,
RAX = 0,
RBX = 3,
RCX = 1,
RDX = 2,
RSI = 6,
RDI = 7,
RBP = 5,
RSP = 4,
R8 = 8,
R9 = 9,
R10 = 10,
R11 = 11,
R12 = 12,
R13 = 13,
R14 = 14,
R15 = 15,
AL = 0,
BL = 3,
CL = 1,
DL = 2,
SIL = 6,
DIL = 7,
BPL = 5,
SPL = 4,
AH = 0x104,
BH = 0x107,
CH = 0x105,
DH = 0x106,
AX = 0,
BX = 3,
CX = 1,
DX = 2,
SI = 6,
DI = 7,
BP = 5,
SP = 4,
XMM0 = 0,
XMM1,
XMM2,
XMM3,
XMM4,
XMM5,
XMM6,
XMM7,
XMM8,
XMM9,
XMM10,
XMM11,
XMM12,
XMM13,
XMM14,
XMM15,
YMM0 = 0,
YMM1,
YMM2,
YMM3,
YMM4,
YMM5,
YMM6,
YMM7,
YMM8,
YMM9,
YMM10,
YMM11,
YMM12,
YMM13,
YMM14,
YMM15,
INVALID_REG = 0xFFFFFFFF
};
} // namespace Gen

View File

@ -32,6 +32,7 @@
EmuSettingsDialog* EmuSettingsDialog::currentDlg = nullptr;
extern char* EmuDirectory;
extern bool RunningSomething;
EmuSettingsDialog::EmuSettingsDialog(QWidget* parent) : QDialog(parent), ui(new Ui::EmuSettingsDialog)
@ -53,6 +54,22 @@ EmuSettingsDialog::EmuSettingsDialog(QWidget* parent) : QDialog(parent), ui(new
ui->cbxConsoleType->setCurrentIndex(Config::ConsoleType);
ui->chkDirectBoot->setChecked(Config::DirectBoot != 0);
#ifdef JIT_ENABLED
ui->chkEnableJIT->setChecked(Config::JIT_Enable != 0);
ui->chkJITBranchOptimisations->setChecked(Config::JIT_BranchOptimisations != 0);
ui->chkJITLiteralOptimisations->setChecked(Config::JIT_LiteralOptimisations != 0);
ui->chkJITFastMemory->setChecked(Config::JIT_FastMemory != 0);
ui->spnJITMaximumBlockSize->setValue(Config::JIT_MaxBlockSize);
#else
ui->chkEnableJIT->setDisabled(true);
ui->chkJITBranchOptimisations->setDisabled(true);
ui->chkJITLiteralOptimisations->setDisabled(true);
ui->chkJITFastMemory->setDisabled(true);
ui->spnJITMaximumBlockSize->setDisabled(true);
#endif
on_chkEnableJIT_toggled();
}
EmuSettingsDialog::~EmuSettingsDialog()
@ -102,29 +119,78 @@ void EmuSettingsDialog::verifyFirmware()
}
}
void EmuSettingsDialog::on_EmuSettingsDialog_accepted()
void EmuSettingsDialog::done(int r)
{
verifyFirmware();
if (r == QDialog::Accepted)
{
verifyFirmware();
strncpy(Config::BIOS9Path, ui->txtBIOS9Path->text().toStdString().c_str(), 1023); Config::BIOS9Path[1023] = '\0';
strncpy(Config::BIOS7Path, ui->txtBIOS7Path->text().toStdString().c_str(), 1023); Config::BIOS7Path[1023] = '\0';
strncpy(Config::FirmwarePath, ui->txtFirmwarePath->text().toStdString().c_str(), 1023); Config::FirmwarePath[1023] = '\0';
int consoleType = ui->cbxConsoleType->currentIndex();
int directBoot = ui->chkDirectBoot->isChecked() ? 1:0;
strncpy(Config::DSiBIOS9Path, ui->txtDSiBIOS9Path->text().toStdString().c_str(), 1023); Config::DSiBIOS9Path[1023] = '\0';
strncpy(Config::DSiBIOS7Path, ui->txtDSiBIOS7Path->text().toStdString().c_str(), 1023); Config::DSiBIOS7Path[1023] = '\0';
strncpy(Config::DSiFirmwarePath, ui->txtDSiFirmwarePath->text().toStdString().c_str(), 1023); Config::DSiFirmwarePath[1023] = '\0';
strncpy(Config::DSiNANDPath, ui->txtDSiNANDPath->text().toStdString().c_str(), 1023); Config::DSiNANDPath[1023] = '\0';
int jitEnable = ui->chkEnableJIT->isChecked() ? 1:0;
int jitMaxBlockSize = ui->spnJITMaximumBlockSize->value();
int jitBranchOptimisations = ui->chkJITBranchOptimisations->isChecked() ? 1:0;
int jitLiteralOptimisations = ui->chkJITLiteralOptimisations->isChecked() ? 1:0;
int jitFastMemory = ui->chkJITFastMemory->isChecked() ? 1:0;
Config::ConsoleType = ui->cbxConsoleType->currentIndex();
Config::DirectBoot = ui->chkDirectBoot->isChecked() ? 1:0;
std::string bios9Path = ui->txtBIOS9Path->text().toStdString();
std::string bios7Path = ui->txtBIOS7Path->text().toStdString();
std::string firmwarePath = ui->txtFirmwarePath->text().toStdString();
std::string dsiBios9Path = ui->txtDSiBIOS9Path->text().toStdString();
std::string dsiBios7Path = ui->txtDSiBIOS7Path->text().toStdString();
std::string dsiFirmwarePath = ui->txtDSiFirmwarePath->text().toStdString();
std::string dsiNANDPath = ui->txtDSiNANDPath->text().toStdString();
Config::Save();
if (consoleType != Config::ConsoleType
|| directBoot != Config::DirectBoot
#ifdef JIT_ENABLED
|| jitEnable != Config::JIT_Enable
|| jitMaxBlockSize != Config::JIT_MaxBlockSize
|| jitBranchOptimisations != Config::JIT_BranchOptimisations
|| jitLiteralOptimisations != Config::JIT_LiteralOptimisations
|| jitFastMemory != Config::JIT_FastMemory
#endif
|| strcmp(Config::BIOS9Path, bios9Path.c_str()) != 0
|| strcmp(Config::BIOS7Path, bios7Path.c_str()) != 0
|| strcmp(Config::FirmwarePath, firmwarePath.c_str()) != 0
|| strcmp(Config::DSiBIOS9Path, dsiBios9Path.c_str()) != 0
|| strcmp(Config::DSiBIOS7Path, dsiBios7Path.c_str()) != 0
|| strcmp(Config::DSiFirmwarePath, dsiFirmwarePath.c_str()) != 0
|| strcmp(Config::DSiNANDPath, dsiNANDPath.c_str()) != 0)
{
if (RunningSomething
&& QMessageBox::warning(this, "Reset necessary to apply changes",
"The emulation will be reset for the changes to take place",
QMessageBox::Yes, QMessageBox::Cancel) != QMessageBox::Yes)
return;
closeDlg();
}
strncpy(Config::BIOS9Path, bios9Path.c_str(), 1023); Config::BIOS9Path[1023] = '\0';
strncpy(Config::BIOS7Path, bios7Path.c_str(), 1023); Config::BIOS7Path[1023] = '\0';
strncpy(Config::FirmwarePath, firmwarePath.c_str(), 1023); Config::FirmwarePath[1023] = '\0';
strncpy(Config::DSiBIOS9Path, dsiBios9Path.c_str(), 1023); Config::DSiBIOS9Path[1023] = '\0';
strncpy(Config::DSiBIOS7Path, dsiBios7Path.c_str(), 1023); Config::DSiBIOS7Path[1023] = '\0';
strncpy(Config::DSiFirmwarePath, dsiFirmwarePath.c_str(), 1023); Config::DSiFirmwarePath[1023] = '\0';
strncpy(Config::DSiNANDPath, dsiNANDPath.c_str(), 1023); Config::DSiNANDPath[1023] = '\0';
#ifdef JIT_ENABLED
Config::JIT_Enable = jitEnable;
Config::JIT_MaxBlockSize = jitMaxBlockSize;
Config::JIT_BranchOptimisations = jitBranchOptimisations;
Config::JIT_LiteralOptimisations = jitLiteralOptimisations;
Config::JIT_FastMemory = jitFastMemory;
#endif
Config::ConsoleType = consoleType;
Config::DirectBoot = directBoot;
Config::Save();
}
}
QDialog::done(r);
void EmuSettingsDialog::on_EmuSettingsDialog_rejected()
{
closeDlg();
}
@ -211,3 +277,12 @@ void EmuSettingsDialog::on_btnDSiNANDBrowse_clicked()
ui->txtDSiNANDPath->setText(file);
}
void EmuSettingsDialog::on_chkEnableJIT_toggled()
{
bool disabled = !ui->chkEnableJIT->isChecked();
ui->chkJITBranchOptimisations->setDisabled(disabled);
ui->chkJITLiteralOptimisations->setDisabled(disabled);
ui->chkJITFastMemory->setDisabled(disabled);
ui->spnJITMaximumBlockSize->setDisabled(disabled);
}

View File

@ -51,8 +51,7 @@ public:
}
private slots:
void on_EmuSettingsDialog_accepted();
void on_EmuSettingsDialog_rejected();
void done(int r);
void on_btnBIOS9Browse_clicked();
void on_btnBIOS7Browse_clicked();
@ -63,6 +62,8 @@ private slots:
void on_btnDSiFirmwareBrowse_clicked();
void on_btnDSiNANDBrowse_clicked();
void on_chkEnableJIT_toggled();
private:
void verifyFirmware();

View File

@ -6,8 +6,8 @@
<rect>
<x>0</x>
<y>0</y>
<width>490</width>
<height>392</height>
<width>514</width>
<height>359</height>
</rect>
</property>
<property name="sizePolicy">
@ -24,243 +24,336 @@
<enum>QLayout::SetFixedSize</enum>
</property>
<item>
<widget class="QGroupBox" name="groupBox">
<property name="title">
<string>DS mode</string>
<widget class="QTabWidget" name="tabWidget">
<property name="currentIndex">
<number>0</number>
</property>
<layout class="QGridLayout" name="gridLayout_2">
<item row="0" column="1">
<widget class="QLineEdit" name="txtBIOS9Path">
<property name="sizePolicy">
<sizepolicy hsizetype="Expanding" vsizetype="Fixed">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="minimumSize">
<size>
<width>290</width>
<height>0</height>
</size>
</property>
<property name="statusTip">
<string/>
</property>
<property name="whatsThis">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;DS-mode ARM9 BIOS&lt;/p&gt;&lt;p&gt;Size should be 4 KB&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
</widget>
</item>
<item row="2" column="0">
<widget class="QLabel" name="label_3">
<property name="text">
<string>DS firmware:</string>
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QLabel" name="label_2">
<property name="text">
<string>DS ARM7 BIOS:</string>
</property>
</widget>
</item>
<item row="0" column="0">
<widget class="QLabel" name="label">
<property name="text">
<string>DS ARM9 BIOS:</string>
</property>
</widget>
</item>
<item row="0" column="2">
<widget class="QPushButton" name="btnBIOS9Browse">
<property name="sizePolicy">
<sizepolicy hsizetype="Minimum" vsizetype="Fixed">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="text">
<string>Browse...</string>
</property>
<property name="autoDefault">
<bool>true</bool>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QLineEdit" name="txtBIOS7Path">
<property name="whatsThis">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;DS-mode ARM7 BIOS&lt;/p&gt;&lt;p&gt;Size should be 16 KB&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
</widget>
</item>
<item row="1" column="2">
<widget class="QPushButton" name="btnBIOS7Browse">
<property name="text">
<string>Browse...</string>
</property>
</widget>
</item>
<item row="2" column="1">
<widget class="QLineEdit" name="txtFirmwarePath">
<property name="whatsThis">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;DS-mode firmware&lt;/p&gt;&lt;p&gt;&lt;br/&gt;&lt;/p&gt;&lt;p&gt;Possible firmwares:&lt;/p&gt;&lt;p&gt;* 128 KB: DS-mode firmware from a DSi or 3DS. Not bootable.&lt;/p&gt;&lt;p&gt;* 256 KB: regular DS firmware.&lt;/p&gt;&lt;p&gt;* 512 KB: iQue DS firmware.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
</widget>
</item>
<item row="2" column="2">
<widget class="QPushButton" name="btnFirmwareBrowse">
<property name="text">
<string>Browse...</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>
<item>
<widget class="QGroupBox" name="groupBox_3">
<property name="title">
<string>DSi mode</string>
</property>
<layout class="QGridLayout" name="gridLayout_3">
<item row="0" column="2">
<widget class="QPushButton" name="btnDSiBIOS9Browse">
<property name="text">
<string>Browse...</string>
</property>
</widget>
</item>
<item row="0" column="0">
<widget class="QLabel" name="label_5">
<property name="text">
<string>DSi ARM9 BIOS:</string>
</property>
</widget>
</item>
<item row="2" column="2">
<widget class="QPushButton" name="btnDSiFirmwareBrowse">
<property name="text">
<string>Browse...</string>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QLineEdit" name="txtDSiBIOS7Path">
<property name="whatsThis">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;DSi-mode ARM7 BIOS&lt;/p&gt;&lt;p&gt;&lt;br/&gt;&lt;/p&gt;&lt;p&gt;Size should be 64 KB&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
</widget>
</item>
<item row="2" column="1">
<widget class="QLineEdit" name="txtDSiFirmwarePath">
<property name="whatsThis">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;DSi-mode firmware (used for DS-mode backwards compatibility)&lt;/p&gt;&lt;p&gt;&lt;br/&gt;&lt;/p&gt;&lt;p&gt;Size should be 128 KB&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QLabel" name="label_6">
<property name="text">
<string>DSi ARM7 BIOS:</string>
</property>
</widget>
</item>
<item row="2" column="0">
<widget class="QLabel" name="label_7">
<property name="text">
<string>DSi firmware:</string>
</property>
</widget>
</item>
<item row="1" column="2">
<widget class="QPushButton" name="btnDSiBIOS7Browse">
<property name="text">
<string>Browse...</string>
</property>
</widget>
</item>
<item row="0" column="1">
<widget class="QLineEdit" name="txtDSiBIOS9Path">
<property name="sizePolicy">
<sizepolicy hsizetype="Expanding" vsizetype="Fixed">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="whatsThis">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;DSi-mode ARM9 BIOS&lt;/p&gt;&lt;p&gt;&lt;br/&gt;&lt;/p&gt;&lt;p&gt;Size should be 64 KB&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
</widget>
</item>
<item row="3" column="0">
<widget class="QLabel" name="label_8">
<property name="text">
<string>DSi NAND:</string>
</property>
</widget>
</item>
<item row="3" column="1">
<widget class="QLineEdit" name="txtDSiNANDPath">
<property name="whatsThis">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;DSi NAND dump&lt;/p&gt;&lt;p&gt;&lt;br/&gt;&lt;/p&gt;&lt;p&gt;Should have 'nocash footer' at the end&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
</widget>
</item>
<item row="3" column="2">
<widget class="QPushButton" name="btnDSiNANDBrowse">
<property name="text">
<string>Browse...</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>
<item>
<widget class="QGroupBox" name="groupBox_2">
<property name="title">
<string>General</string>
</property>
<layout class="QGridLayout" name="gridLayout">
<item row="0" column="0">
<widget class="QLabel" name="label_4">
<property name="sizePolicy">
<sizepolicy hsizetype="Preferred" vsizetype="Fixed">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="text">
<string>Console type:</string>
</property>
</widget>
</item>
<item row="0" column="1">
<widget class="QComboBox" name="cbxConsoleType">
<property name="sizePolicy">
<sizepolicy hsizetype="MinimumExpanding" vsizetype="Fixed">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="whatsThis">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;The type of console to emulate&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
</widget>
</item>
<item row="1" column="0" colspan="2">
<widget class="QCheckBox" name="chkDirectBoot">
<property name="whatsThis">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;When loading a ROM, completely skip the regular boot process (&amp;quot;Nintendo DS&amp;quot; screen) to boot the ROM directly.&lt;/p&gt;&lt;p&gt;&lt;br/&gt;&lt;/p&gt;&lt;p&gt;Note: if your firmware dump isn't bootable, the ROM will be booted directly regardless of this setting.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
<property name="text">
<string>Boot game directly</string>
</property>
</widget>
</item>
</layout>
<widget class="QWidget" name="tab">
<attribute name="title">
<string>General</string>
</attribute>
<layout class="QFormLayout" name="formLayout_4">
<item row="1" column="1">
<widget class="QComboBox" name="cbxConsoleType">
<property name="sizePolicy">
<sizepolicy hsizetype="MinimumExpanding" vsizetype="Fixed">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="whatsThis">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;The type of console to emulate&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
</widget>
</item>
<item row="2" column="1">
<widget class="QCheckBox" name="chkDirectBoot">
<property name="whatsThis">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;When loading a ROM, completely skip the regular boot process (&amp;quot;Nintendo DS&amp;quot; screen) to boot the ROM directly.&lt;/p&gt;&lt;p&gt;&lt;br/&gt;&lt;/p&gt;&lt;p&gt;Note: if your firmware dump isn't bootable, the ROM will be booted directly regardless of this setting.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
<property name="text">
<string>Boot game directly</string>
</property>
</widget>
</item>
<item row="3" column="0">
<spacer name="verticalSpacer_2">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>40</height>
</size>
</property>
</spacer>
</item>
<item row="1" column="0">
<widget class="QLabel" name="label_4">
<property name="sizePolicy">
<sizepolicy hsizetype="Preferred" vsizetype="Fixed">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="text">
<string>Console type:</string>
</property>
</widget>
</item>
</layout>
</widget>
<widget class="QWidget" name="tab_2">
<attribute name="title">
<string>BIOS Files</string>
</attribute>
<layout class="QVBoxLayout" name="verticalLayout_2">
<item>
<widget class="QGroupBox" name="groupBox">
<property name="title">
<string>DS mode</string>
</property>
<layout class="QGridLayout" name="gridLayout_2">
<item row="2" column="0">
<widget class="QLabel" name="label_3">
<property name="text">
<string>DS firmware:</string>
</property>
</widget>
</item>
<item row="2" column="1">
<widget class="QLineEdit" name="txtFirmwarePath">
<property name="whatsThis">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;DS-mode firmware&lt;/p&gt;&lt;p&gt;&lt;br/&gt;&lt;/p&gt;&lt;p&gt;Possible firmwares:&lt;/p&gt;&lt;p&gt;* 128 KB: DS-mode firmware from a DSi or 3DS. Not bootable.&lt;/p&gt;&lt;p&gt;* 256 KB: regular DS firmware.&lt;/p&gt;&lt;p&gt;* 512 KB: iQue DS firmware.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QLineEdit" name="txtBIOS7Path">
<property name="whatsThis">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;DS-mode ARM7 BIOS&lt;/p&gt;&lt;p&gt;Size should be 16 KB&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
</widget>
</item>
<item row="0" column="2">
<widget class="QPushButton" name="btnBIOS9Browse">
<property name="sizePolicy">
<sizepolicy hsizetype="Minimum" vsizetype="Fixed">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="text">
<string>Browse...</string>
</property>
<property name="autoDefault">
<bool>true</bool>
</property>
</widget>
</item>
<item row="2" column="2">
<widget class="QPushButton" name="btnFirmwareBrowse">
<property name="text">
<string>Browse...</string>
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QLabel" name="label_2">
<property name="text">
<string>DS ARM7 BIOS:</string>
</property>
</widget>
</item>
<item row="0" column="0">
<widget class="QLabel" name="label">
<property name="text">
<string>DS ARM9 BIOS:</string>
</property>
</widget>
</item>
<item row="1" column="2">
<widget class="QPushButton" name="btnBIOS7Browse">
<property name="text">
<string>Browse...</string>
</property>
</widget>
</item>
<item row="0" column="1">
<widget class="QLineEdit" name="txtBIOS9Path">
<property name="sizePolicy">
<sizepolicy hsizetype="Expanding" vsizetype="Fixed">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="minimumSize">
<size>
<width>290</width>
<height>0</height>
</size>
</property>
<property name="statusTip">
<string/>
</property>
<property name="whatsThis">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;DS-mode ARM9 BIOS&lt;/p&gt;&lt;p&gt;Size should be 4 KB&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>
<item>
<widget class="QGroupBox" name="groupBox_3">
<property name="title">
<string>DSi mode</string>
</property>
<layout class="QGridLayout" name="gridLayout_3">
<item row="0" column="2">
<widget class="QPushButton" name="btnDSiBIOS9Browse">
<property name="text">
<string>Browse...</string>
</property>
</widget>
</item>
<item row="0" column="0">
<widget class="QLabel" name="label_5">
<property name="text">
<string>DSi ARM9 BIOS:</string>
</property>
</widget>
</item>
<item row="2" column="2">
<widget class="QPushButton" name="btnDSiFirmwareBrowse">
<property name="text">
<string>Browse...</string>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QLineEdit" name="txtDSiBIOS7Path">
<property name="whatsThis">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;DSi-mode ARM7 BIOS&lt;/p&gt;&lt;p&gt;&lt;br/&gt;&lt;/p&gt;&lt;p&gt;Size should be 64 KB&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
</widget>
</item>
<item row="2" column="1">
<widget class="QLineEdit" name="txtDSiFirmwarePath">
<property name="whatsThis">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;DSi-mode firmware (used for DS-mode backwards compatibility)&lt;/p&gt;&lt;p&gt;&lt;br/&gt;&lt;/p&gt;&lt;p&gt;Size should be 128 KB&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QLabel" name="label_6">
<property name="text">
<string>DSi ARM7 BIOS:</string>
</property>
</widget>
</item>
<item row="2" column="0">
<widget class="QLabel" name="label_7">
<property name="text">
<string>DSi firmware:</string>
</property>
</widget>
</item>
<item row="1" column="2">
<widget class="QPushButton" name="btnDSiBIOS7Browse">
<property name="text">
<string>Browse...</string>
</property>
</widget>
</item>
<item row="0" column="1">
<widget class="QLineEdit" name="txtDSiBIOS9Path">
<property name="sizePolicy">
<sizepolicy hsizetype="Expanding" vsizetype="Fixed">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="whatsThis">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;DSi-mode ARM9 BIOS&lt;/p&gt;&lt;p&gt;&lt;br/&gt;&lt;/p&gt;&lt;p&gt;Size should be 64 KB&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
</widget>
</item>
<item row="3" column="0">
<widget class="QLabel" name="label_8">
<property name="text">
<string>DSi NAND:</string>
</property>
</widget>
</item>
<item row="3" column="1">
<widget class="QLineEdit" name="txtDSiNANDPath">
<property name="whatsThis">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;DSi NAND dump&lt;/p&gt;&lt;p&gt;&lt;br/&gt;&lt;/p&gt;&lt;p&gt;Should have 'nocash footer' at the end&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
</widget>
</item>
<item row="3" column="2">
<widget class="QPushButton" name="btnDSiNANDBrowse">
<property name="text">
<string>Browse...</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>
</layout>
</widget>
<widget class="QWidget" name="tab_3">
<attribute name="title">
<string>CPU Emulation</string>
</attribute>
<layout class="QFormLayout" name="formLayout_5">
<item row="0" column="0">
<widget class="QCheckBox" name="chkEnableJIT">
<property name="text">
<string>Enable JIT recompiler</string>
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QLabel" name="label_9">
<property name="text">
<string>Maximum JIT block size:</string>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QSpinBox" name="spnJITMaximumBlockSize">
<property name="minimum">
<number>1</number>
</property>
<property name="maximum">
<number>32</number>
</property>
<property name="value">
<number>32</number>
</property>
</widget>
</item>
<item row="2" column="0">
<widget class="QCheckBox" name="chkJITBranchOptimisations">
<property name="text">
<string>Branch Optimisations</string>
</property>
</widget>
</item>
<item row="3" column="0">
<widget class="QCheckBox" name="chkJITLiteralOptimisations">
<property name="text">
<string>Literal Optimisations</string>
</property>
</widget>
</item>
<item row="4" column="0">
<widget class="QCheckBox" name="chkJITFastMemory">
<property name="text">
<string>Fast Memory</string>
</property>
</widget>
</item>
<item row="5" column="0">
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>40</height>
</size>
</property>
</spacer>
</item>
</layout>
</widget>
</widget>
</item>
<item>
@ -275,6 +368,27 @@
</item>
</layout>
</widget>
<tabstops>
<tabstop>tabWidget</tabstop>
<tabstop>cbxConsoleType</tabstop>
<tabstop>chkDirectBoot</tabstop>
<tabstop>txtBIOS9Path</tabstop>
<tabstop>txtBIOS7Path</tabstop>
<tabstop>txtFirmwarePath</tabstop>
<tabstop>txtDSiBIOS9Path</tabstop>
<tabstop>txtDSiBIOS7Path</tabstop>
<tabstop>txtDSiFirmwarePath</tabstop>
<tabstop>txtDSiNANDPath</tabstop>
<tabstop>btnBIOS9Browse</tabstop>
<tabstop>btnBIOS7Browse</tabstop>
<tabstop>btnFirmwareBrowse</tabstop>
<tabstop>btnDSiBIOS9Browse</tabstop>
<tabstop>btnDSiBIOS7Browse</tabstop>
<tabstop>btnDSiFirmwareBrowse</tabstop>
<tabstop>btnDSiNANDBrowse</tabstop>
<tabstop>chkEnableJIT</tabstop>
<tabstop>spnJITMaximumBlockSize</tabstop>
</tabstops>
<resources/>
<connections>
<connection>
@ -284,8 +398,8 @@
<slot>accept()</slot>
<hints>
<hint type="sourcelabel">
<x>248</x>
<y>254</y>
<x>257</x>
<y>349</y>
</hint>
<hint type="destinationlabel">
<x>157</x>
@ -300,8 +414,8 @@
<slot>reject()</slot>
<hints>
<hint type="sourcelabel">
<x>316</x>
<y>260</y>
<x>325</x>
<y>349</y>
</hint>
<hint type="destinationlabel">
<x>286</x>

View File

@ -72,6 +72,7 @@ char MicWavPath[1024];
char LastROMFolder[1024];
bool EnableJIT;
ConfigEntry PlatformConfigFile[] =
{

View File

@ -1641,7 +1641,14 @@ void MainWindow::onStop()
void MainWindow::onOpenEmuSettings()
{
EmuSettingsDialog::openDlg(this);
EmuSettingsDialog* dlg = EmuSettingsDialog::openDlg(this);
connect(dlg, &EmuSettingsDialog::finished, this, &MainWindow::onEmuSettingsDialogFinished);
}
void MainWindow::onEmuSettingsDialogFinished(int res)
{
if (RunningSomething)
onReset();
}
void MainWindow::onOpenInputConfig()

View File

@ -199,6 +199,7 @@ private slots:
void onStop();
void onOpenEmuSettings();
void onEmuSettingsDialogFinished(int res);
void onOpenInputConfig();
void onInputConfigFinished(int res);
void onOpenVideoSettings();

View File

@ -19,7 +19,7 @@
#ifndef VERSION_H
#define VERSION_H
#define MELONDS_VERSION "0.8.3"
#define MELONDS_VERSION "0.8.3-JIT"
#define MELONDS_URL "http://melonds.kuribo64.net/"

2390
src/xxhash/xxh3.h Normal file

File diff suppressed because it is too large Load Diff

43
src/xxhash/xxhash.c Normal file
View File

@ -0,0 +1,43 @@
/*
* xxHash - Extremely Fast Hash algorithm
* Copyright (C) 2012-2020 Yann Collet
*
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* You can contact the author at:
* - xxHash homepage: https://www.xxhash.com
* - xxHash source repository: https://github.com/Cyan4973/xxHash
*/
/*
* xxhash.c instantiates functions defined in xxhash.h
*/
#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
#define XXH_IMPLEMENTATION /* access definitions */
#include "xxhash.h"

1965
src/xxhash/xxhash.h Normal file

File diff suppressed because it is too large Load Diff