implement block linking + some refactoring

currently only supported for x64
This commit is contained in:
RSDuck 2020-04-25 13:40:51 +02:00
parent 1ad90cb334
commit 1c07932b40
18 changed files with 4870 additions and 150 deletions

2
.gitignore vendored
View File

@ -9,3 +9,5 @@ melon_grc.h
cmake-build
cmake-build-debug
.idea
*.exe

View File

@ -252,15 +252,15 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr)
if (addr & 0x2)
{
NextInstr[0] = CodeRead32(addr-2, true) >> 16;
Cycles += CodeCycles;
Cycles -= CodeCycles;
NextInstr[1] = CodeRead32(addr+2, false);
Cycles += CodeCycles;
Cycles -= CodeCycles;
}
else
{
NextInstr[0] = CodeRead32(addr, true);
NextInstr[1] = NextInstr[0] >> 16;
Cycles += CodeCycles;
Cycles -= CodeCycles;
}
CPSR |= 0x20;
@ -273,9 +273,9 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr)
if (newregion != oldregion) SetupCodeMem(addr);
NextInstr[0] = CodeRead32(addr, true);
Cycles += CodeCycles;
Cycles -= CodeCycles;
NextInstr[1] = CodeRead32(addr+4, false);
Cycles += CodeCycles;
Cycles -= CodeCycles;
CPSR &= ~0x20;
}
@ -315,7 +315,7 @@ void ARMv4::JumpTo(u32 addr, bool restorecpsr)
NextInstr[0] = CodeRead16(addr);
NextInstr[1] = CodeRead16(addr+2);
Cycles += NDS::ARM7MemTimings[CodeCycles][0] + NDS::ARM7MemTimings[CodeCycles][1];
Cycles -= NDS::ARM7MemTimings[CodeCycles][0] + NDS::ARM7MemTimings[CodeCycles][1];
CPSR |= 0x20;
}
@ -328,7 +328,7 @@ void ARMv4::JumpTo(u32 addr, bool restorecpsr)
NextInstr[0] = CodeRead32(addr);
NextInstr[1] = CodeRead32(addr+4);
Cycles += NDS::ARM7MemTimings[CodeCycles][2] + NDS::ARM7MemTimings[CodeCycles][3];
Cycles -= NDS::ARM7MemTimings[CodeCycles][2] + NDS::ARM7MemTimings[CodeCycles][3];
CPSR &= ~0x20;
}
@ -587,7 +587,7 @@ void ARMv5::Execute()
}*/
if (IRQ) TriggerIRQ();
NDS::ARM9Timestamp += Cycles;
NDS::ARM9Timestamp -= Cycles;
Cycles = 0;
}
@ -627,14 +627,16 @@ void ARMv5::ExecuteJIT()
return;
}
ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlock<0>(instrAddr);
// hack so Cycles <= 0 becomes Cycles < 0
Cycles = NDS::ARM9Target - NDS::ARM9Timestamp - 1;
ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlockEntry(ARMJIT::TranslateAddr<0>(instrAddr));
if (block)
Cycles += block();
ARM_Dispatch(this, block);
else
ARMJIT::CompileBlock(this);
NDS::ARM9Timestamp += Cycles;
Cycles = 0;
NDS::ARM9Timestamp = NDS::ARM9Target - (Cycles + 1);
if (StopExecution)
{
@ -728,7 +730,7 @@ void ARMv4::Execute()
}*/
if (IRQ) TriggerIRQ();
NDS::ARM7Timestamp += Cycles;
NDS::ARM7Timestamp -= Cycles;
Cycles = 0;
}
@ -768,14 +770,15 @@ void ARMv4::ExecuteJIT()
return;
}
ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlock<1>(instrAddr);
Cycles = NDS::ARM7Target - NDS::ARM7Timestamp - 1;
ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlockEntry(ARMJIT::TranslateAddr<1>(instrAddr));
if (block)
Cycles += block();
ARM_Dispatch(this, block);
else
ARMJIT::CompileBlock(this);
NDS::ARM7Timestamp += Cycles;
Cycles = 0;
NDS::ARM7Timestamp = NDS::ARM7Target - (Cycles + 1);
// TODO optimize this shit!!!
if (StopExecution)

View File

@ -193,14 +193,14 @@ public:
{
// code only. always nonseq 32-bit for ARM9.
s32 numC = (R[15] & 0x2) ? 0 : CodeCycles;
Cycles += numC;
Cycles -= numC;
}
void AddCycles_CI(s32 numI)
{
// code+internal
s32 numC = (R[15] & 0x2) ? 0 : CodeCycles;
Cycles += numC + numI;
Cycles -= numC + numI;
}
void AddCycles_CDI()
@ -211,9 +211,9 @@ public:
s32 numD = DataCycles;
//if (DataRegion != CodeRegion)
Cycles += std::max(numC + numD - 6, std::max(numC, numD));
Cycles -= std::max(numC + numD - 6, std::max(numC, numD));
//else
// Cycles += numC + numD;
// Cycles -= numC + numD;
}
void AddCycles_CD()
@ -223,9 +223,9 @@ public:
s32 numD = DataCycles;
//if (DataRegion != CodeRegion)
Cycles += std::max(numC + numD - 6, std::max(numC, numD));
Cycles -= std::max(numC + numD - 6, std::max(numC, numD));
//else
// Cycles += numC + numD;
// Cycles -= numC + numD;
}
void GetCodeMemRegion(u32 addr, NDS::MemRegion* region);
@ -387,13 +387,13 @@ public:
void AddCycles_C()
{
// code only. this code fetch is sequential.
Cycles += NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?1:3];
Cycles -= NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?1:3];
}
void AddCycles_CI(s32 num)
{
// code+internal. results in a nonseq code fetch.
Cycles += NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?0:2] + num;
Cycles -= NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?0:2] + num;
}
void AddCycles_CDI()
@ -405,21 +405,21 @@ public:
if ((DataRegion >> 4) == 0x02) // mainRAM
{
if (CodeRegion == 0x02)
Cycles += numC + numD;
Cycles -= numC + numD;
else
{
numC++;
Cycles += std::max(numC + numD - 3, std::max(numC, numD));
Cycles -= std::max(numC + numD - 3, std::max(numC, numD));
}
}
else if (CodeRegion == 0x02)
{
numD++;
Cycles += std::max(numC + numD - 3, std::max(numC, numD));
Cycles -= std::max(numC + numD - 3, std::max(numC, numD));
}
else
{
Cycles += numC + numD + 1;
Cycles -= numC + numD + 1;
}
}
@ -432,17 +432,17 @@ public:
if ((DataRegion >> 4) == 0x02)
{
if (CodeRegion == 0x02)
Cycles += numC + numD;
Cycles -= numC + numD;
else
Cycles += std::max(numC + numD - 3, std::max(numC, numD));
Cycles -= std::max(numC + numD - 3, std::max(numC, numD));
}
else if (CodeRegion == 0x02)
{
Cycles += std::max(numC + numD - 3, std::max(numC, numD));
Cycles -= std::max(numC + numD - 3, std::max(numC, numD));
}
else
{
Cycles += numC + numD;
Cycles -= numC + numD;
}
}
};

View File

@ -2,6 +2,10 @@
#include <string.h>
#include <assert.h>
#include <unordered_map>
#define XXH_STATIC_LINKING_ONLY
#include "xxhash/xxhash.h"
#include "Config.h"
@ -113,16 +117,101 @@ const static ExeMemKind JIT_MEM[2][32] = {
u32 AddrTranslate9[0x2000];
u32 AddrTranslate7[0x4000];
JitBlockEntry FastBlockAccess[ExeMemSpaceSize / 2];
AddressRange CodeRanges[ExeMemSpaceSize / 512];
TinyVector<JitBlock*> JitBlocks;
JitBlock* RestoreCandidates[0x1000] = {NULL};
std::unordered_map<u32, JitBlock*> JitBlocks;
u32 HashRestoreCandidate(u32 pseudoPhysicalAddr)
template <typename K, typename V, int Size, V InvalidValue>
struct UnreliableHashTable
{
return (u32)(((u64)pseudoPhysicalAddr * 11400714819323198485llu) >> 53);
}
struct Bucket
{
K KeyA, KeyB;
V ValA, ValB;
};
Bucket Table[Size];
void Reset()
{
for (int i = 0; i < Size; i++)
{
Table[i].ValA = Table[i].ValB = InvalidValue;
}
}
UnreliableHashTable()
{
Reset();
}
V Insert(K key, V value)
{
u32 slot = XXH3_64bits(&key, sizeof(K)) & (Size - 1);
Bucket* bucket = &Table[slot];
if (bucket->ValA == value || bucket->ValB == value)
{
return InvalidValue;
}
else if (bucket->ValA == InvalidValue)
{
bucket->KeyA = key;
bucket->ValA = value;
}
else if (bucket->ValB == InvalidValue)
{
bucket->KeyB = key;
bucket->ValB = value;
}
else
{
V prevVal = bucket->ValB;
bucket->KeyB = bucket->KeyA;
bucket->ValB = bucket->ValA;
bucket->KeyA = key;
bucket->ValA = value;
return prevVal;
}
return InvalidValue;
}
void Remove(K key)
{
u32 slot = XXH3_64bits(&key, sizeof(K)) & (Size - 1);
Bucket* bucket = &Table[slot];
if (bucket->KeyA == key && bucket->ValA != InvalidValue)
{
bucket->ValA = InvalidValue;
if (bucket->ValB != InvalidValue)
{
bucket->KeyA = bucket->KeyB;
bucket->ValA = bucket->ValB;
bucket->ValB = InvalidValue;
}
}
if (bucket->KeyB == key && bucket->ValB != InvalidValue)
bucket->ValB = InvalidValue;
}
V LookUp(K addr)
{
u32 slot = XXH3_64bits(&addr, 4) & (Size - 1);
Bucket* bucket = &Table[slot];
if (bucket->ValA != InvalidValue && bucket->KeyA == addr)
return bucket->ValA;
if (bucket->ValB != InvalidValue && bucket->KeyB == addr)
return bucket->ValB;
return InvalidValue;
}
};
UnreliableHashTable<u32, JitBlock*, 0x800, nullptr> RestoreCandidates;
UnreliableHashTable<u32, u32, 0x1000, UINT32_MAX> FastBlockLookUp;
void Init()
{
@ -396,9 +485,8 @@ void CompileBlock(ARM* cpu)
u32 nextInstr[2] = {cpu->NextInstr[0], cpu->NextInstr[1]};
u32 nextInstrAddr[2] = {blockAddr, r15};
JIT_DEBUGPRINT("start block %x %08x (%x) %p %p (region invalidates %dx)\n",
blockAddr, cpu->CPSR, pseudoPhysicalAddr, FastBlockAccess[pseudoPhysicalAddr / 2],
cpu->Num == 0 ? LookUpBlock<0>(blockAddr) : LookUpBlock<1>(blockAddr),
JIT_DEBUGPRINT("start block %x %08x (%x) (region invalidates %dx)\n",
blockAddr, cpu->CPSR, pseudoPhysicalAddr,
CodeRanges[pseudoPhysicalAddr / 512].TimesInvalidated);
u32 lastSegmentStart = blockAddr;
@ -534,6 +622,8 @@ void CompileBlock(ARM* cpu)
if (staticBranch)
{
instrs[i].BranchFlags |= branch_StaticTarget;
bool isBackJump = false;
if (hasBranched)
{
@ -604,12 +694,11 @@ void CompileBlock(ARM* cpu)
FloodFillSetFlags(instrs, i - 2, !secondaryFlagReadCond ? instrs[i - 1].Info.ReadFlags : 0xF);
} while(!instrs[i - 1].Info.EndBlock && i < Config::JIT_MaxBlockSize && !cpu->Halted && (!cpu->IRQ || (cpu->CPSR & 0x80)));
u32 restoreSlot = HashRestoreCandidate(pseudoPhysicalAddr);
JitBlock* prevBlock = RestoreCandidates[restoreSlot];
JitBlock* prevBlock = RestoreCandidates.LookUp(pseudoPhysicalAddr);
bool mayRestore = true;
if (prevBlock && prevBlock->PseudoPhysicalAddr == pseudoPhysicalAddr)
if (prevBlock)
{
RestoreCandidates[restoreSlot] = NULL;
RestoreCandidates.Remove(pseudoPhysicalAddr);
if (prevBlock->NumInstrs == i)
{
for (int j = 0; j < i; j++)
@ -661,7 +750,7 @@ void CompileBlock(ARM* cpu)
FloodFillSetFlags(instrs, i - 1, 0xF);
block->EntryPoint = compiler->CompileBlock(cpu, thumb, instrs, i);
block->EntryPoint = compiler->CompileBlock(pseudoPhysicalAddr, cpu, thumb, instrs, i);
}
else
{
@ -675,9 +764,8 @@ void CompileBlock(ARM* cpu)
CodeRanges[addresseRanges[j] / 512].Blocks.Add(block);
}
FastBlockAccess[block->PseudoPhysicalAddr / 2] = block->EntryPoint;
JitBlocks.Add(block);
JitBlocks[pseudoPhysicalAddr] = block;
FastBlockLookUp.Insert(pseudoPhysicalAddr, compiler->SubEntryOffset(block->EntryPoint));
}
void InvalidateByAddr(u32 pseudoPhysical, bool mayRestore)
@ -701,18 +789,17 @@ void InvalidateByAddr(u32 pseudoPhysical, bool mayRestore)
}
}
bool removed = JitBlocks.RemoveByValue(block);
assert(removed);
for (int j = 0; j < block->NumLinks(); j++)
compiler->UnlinkBlock(block->Links()[j]);
FastBlockAccess[block->PseudoPhysicalAddr / 2] = NULL;
JitBlocks.erase(block->PseudoPhysicalAddr);
FastBlockLookUp.Remove(block->PseudoPhysicalAddr);
if (mayRestore)
{
u32 slot = HashRestoreCandidate(block->PseudoPhysicalAddr);
if (RestoreCandidates[slot] && RestoreCandidates[slot] != block)
delete RestoreCandidates[slot];
RestoreCandidates[slot] = block;
JitBlock* prevBlock = RestoreCandidates.Insert(block->PseudoPhysicalAddr, block);
if (prevBlock)
delete prevBlock;
}
}
if ((range->TimesInvalidated + 1) > range->TimesInvalidated)
@ -738,47 +825,54 @@ void InvalidateITCM(u32 addr)
void InvalidateAll()
{
JIT_DEBUGPRINT("invalidating all %x\n", JitBlocks.Length);
for (int i = 0; i < JitBlocks.Length; i++)
for (auto it : JitBlocks)
{
JitBlock* block = JitBlocks[i];
JitBlock* block = it.second;
FastBlockAccess[block->PseudoPhysicalAddr / 2] = NULL;
FastBlockLookUp.Remove(block->PseudoPhysicalAddr);
for (int j = 0; j < block->NumAddresses; j++)
for (int i = 0; i < block->NumAddresses; i++)
{
u32 addr = block->AddressRanges()[j];
u32 addr = block->AddressRanges()[i];
AddressRange* range = &CodeRanges[addr / 512];
range->Blocks.Clear();
if (range->TimesInvalidated + 1 > range->TimesInvalidated)
range->TimesInvalidated++;
}
for (int i = 0; i < block->NumLinks(); i++)
compiler->UnlinkBlock(block->Links()[i]);
block->ResetLinks();
u32 slot = HashRestoreCandidate(block->PseudoPhysicalAddr);
if (RestoreCandidates[slot] && RestoreCandidates[slot] != block)
delete RestoreCandidates[slot];
RestoreCandidates[slot] = block;
JitBlock* prevBlock = RestoreCandidates.Insert(block->PseudoPhysicalAddr, block);
if (prevBlock)
delete prevBlock;
}
JitBlocks.Clear();
JitBlocks.clear();
}
void ResetBlockCache()
{
printf("Resetting JIT block cache...\n");
memset(FastBlockAccess, 0, sizeof(FastBlockAccess));
for (int i = 0; i < sizeof(RestoreCandidates)/sizeof(RestoreCandidates[0]); i++)
FastBlockLookUp.Reset();
RestoreCandidates.Reset();
for (int i = 0; i < sizeof(RestoreCandidates.Table)/sizeof(RestoreCandidates.Table[0]); i++)
{
if (RestoreCandidates[i])
if (RestoreCandidates.Table[i].ValA)
{
delete RestoreCandidates[i];
RestoreCandidates[i] = NULL;
delete RestoreCandidates.Table[i].ValA;
RestoreCandidates.Table[i].ValA = NULL;
}
if (RestoreCandidates.Table[i].ValA)
{
delete RestoreCandidates.Table[i].ValB;
RestoreCandidates.Table[i].ValB = NULL;
}
}
for (int i = 0; i < JitBlocks.Length; i++)
for (auto it : JitBlocks)
{
JitBlock* block = JitBlocks[i];
JitBlock* block = it.second;
for (int j = 0; j < block->NumAddresses; j++)
{
u32 addr = block->AddressRanges()[j];
@ -788,11 +882,43 @@ void ResetBlockCache()
}
delete block;
}
JitBlocks.Clear();
JitBlocks.clear();
compiler->Reset();
}
JitBlockEntry LookUpBlockEntry(u32 addr)
{
u32 entryOffset = FastBlockLookUp.LookUp(addr);
if (entryOffset != UINT32_MAX)
return compiler->AddEntryOffset(entryOffset);
auto block = JitBlocks.find(addr);
if (block != JitBlocks.end())
{
FastBlockLookUp.Insert(addr, compiler->SubEntryOffset(block->second->EntryPoint));
return block->second->EntryPoint;
}
return NULL;
}
template <u32 Num>
void LinkBlock(ARM* cpu, u32 codeOffset)
{
u32 targetPseudoPhys = TranslateAddr<Num>(cpu->R[15] - ((cpu->CPSR&0x20)?2:4));
auto block = JitBlocks.find(targetPseudoPhys);
if (block == JitBlocks.end())
{
CompileBlock(cpu);
block = JitBlocks.find(targetPseudoPhys);
}
JIT_DEBUGPRINT("linking to block %08x\n", targetPseudoPhys);
block->second->AddLink(codeOffset);
compiler->LinkBlock(codeOffset, block->second->EntryPoint);
}
void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size)
{
if (cpu->Num == 0)
@ -875,3 +1001,6 @@ void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size)
}
}
template void ARMJIT::LinkBlock<0>(ARM*, u32);
template void ARMJIT::LinkBlock<1>(ARM*, u32);

View File

@ -32,7 +32,6 @@ extern u32 AddrTranslate9[0x2000];
extern u32 AddrTranslate7[0x4000];
const u32 ExeMemSpaceSize = 0x518000; // I hate you C++, sometimes I really hate you...
extern JitBlockEntry FastBlockAccess[ExeMemSpaceSize / 2];
template <u32 num>
inline bool IsMapped(u32 addr)
@ -52,11 +51,8 @@ inline u32 TranslateAddr(u32 addr)
return AddrTranslate7[(addr & 0xFFFFFFF) >> 14] + (addr & 0x3FFF);
}
template <u32 num>
inline JitBlockEntry LookUpBlock(u32 addr)
{
return FastBlockAccess[TranslateAddr<num>(addr) / 2];
}
JitBlockEntry LookUpBlockEntry(u32 addr);
void Init();
void DeInit();
@ -73,4 +69,6 @@ void ResetBlockCache();
}
extern "C" void ARM_Dispatch(ARM* cpu, ARMJIT::JitBlockEntry entry);
#endif

View File

@ -15,7 +15,8 @@ enum
{
branch_IdleBranch = 1 << 0,
branch_FollowCondTaken = 1 << 1,
branch_FollowCondNotTaken = 1 << 2
branch_FollowCondNotTaken = 1 << 2,
branch_StaticTarget = 1 << 3,
};
struct FetchedInstr
@ -76,7 +77,7 @@ struct __attribute__((packed)) TinyVector
assert(capacity > Capacity);
T* newMem = new T[capacity];
if (Data != NULL)
memcpy(newMem, Data, sizeof(Data) * Length);
memcpy(newMem, Data, sizeof(T) * Length);
T* oldData = Data;
Data = newMem;
@ -163,7 +164,6 @@ public:
u32 NumInstrs;
u32 NumAddresses;
u32 NumLinks;
JitBlockEntry EntryPoint;
@ -171,6 +171,21 @@ public:
{ return &Data[0]; }
u32* AddressRanges()
{ return &Data[NumInstrs]; }
u32* Links()
{ return &Data[NumInstrs + NumAddresses]; }
u32 NumLinks()
{ return Data.Length - NumInstrs - NumAddresses; }
void AddLink(u32 link)
{
Data.Add(link);
}
void ResetLinks()
{
Data.SetLength(NumInstrs + NumAddresses);
}
private:
/*
@ -200,6 +215,9 @@ extern u8 MemRegion7[0x80000];
void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size);
template <u32 Num>
void LinkBlock(ARM* cpu, u32 codeOffset);
}
#endif

View File

@ -127,7 +127,7 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles)
ConstantCycles += cycles;
else
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
}
void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
@ -135,7 +135,7 @@ void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
IrregularCycles = true;
BitSet16 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00);
bool previouslyDirty = CPSRDirty;
bool cpsrDirty = CPSRDirty;
SaveCPSR();
if (restoreCPSR)
@ -168,9 +168,10 @@ void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
LoadReg(reg, RegCache.Mapping[reg]);
}
if (previouslyDirty)
LoadCPSR();
CPSRDirty = previouslyDirty;
// in case this instruction is skipped
if (CurInstr.Cond() < 0xE)
CPSRDirty = cpsrDirty;
}
void Compiler::A_Comp_BranchImm()
@ -209,20 +210,12 @@ void Compiler::T_Comp_BCOND()
s32 offset = (s32)(CurInstr.Instr << 24) >> 23;
Comp_JumpTo(R15 + offset + 1, true);
Comp_SpecialBranchBehaviour();
Comp_SpecialBranchBehaviour(true);
FixupBranch skipFailed = J();
SetJumpTarget(skipExecute);
if (CurInstr.BranchFlags & branch_FollowCondTaken)
{
RegCache.PrepareExit();
SaveCPSR(false);
MOV(32, R(RAX), Imm32(ConstantCycles));
ABI_PopRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
RET();
}
Comp_SpecialBranchBehaviour(false);
Comp_AddCycles_C(true);
SetJumpTarget(skipFailed);

View File

@ -1,6 +1,7 @@
#include "ARMJIT_Compiler.h"
#include "../ARMInterpreter.h"
#include "../Config.h"
#include <assert.h>
@ -15,6 +16,8 @@
using namespace Gen;
extern "C" void ARM_Ret();
namespace ARMJIT
{
template <>
@ -170,6 +173,24 @@ Compiler::Compiler()
RET();
}
{
CPSRDirty = true;
BranchStub[0] = GetWritableCodePtr();
SaveCPSR();
MOV(64, R(ABI_PARAM1), R(RCPU));
CALL((u8*)ARMJIT::LinkBlock<0>);
LoadCPSR();
JMP((u8*)ARM_Ret, true);
CPSRDirty = true;
BranchStub[1] = GetWritableCodePtr();
SaveCPSR();
MOV(64, R(ABI_PARAM1), R(RCPU));
CALL((u8*)ARMJIT::LinkBlock<1>);
LoadCPSR();
JMP((u8*)ARM_Ret, true);
}
// move the region forward to prevent overwriting the generated functions
CodeMemSize -= GetWritableCodePtr() - ResetStart;
ResetStart = GetWritableCodePtr();
@ -362,23 +383,43 @@ void Compiler::Reset()
SetCodePtr(ResetStart);
}
void Compiler::Comp_SpecialBranchBehaviour()
void Compiler::Comp_SpecialBranchBehaviour(bool taken)
{
if (CurInstr.BranchFlags & branch_IdleBranch)
OR(32, MDisp(RCPU, offsetof(ARM, IdleLoop)), Imm8(0x1));
if (taken && CurInstr.BranchFlags & branch_IdleBranch)
OR(8, MDisp(RCPU, offsetof(ARM, IdleLoop)), Imm8(0x1));
if (CurInstr.BranchFlags & branch_FollowCondNotTaken)
if ((CurInstr.BranchFlags & branch_FollowCondNotTaken && taken)
|| (CurInstr.BranchFlags & branch_FollowCondTaken && !taken))
{
RegCache.PrepareExit();
SaveCPSR(false);
MOV(32, R(RAX), Imm32(ConstantCycles));
ABI_PopRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
RET();
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles));
if (Config::JIT_BrancheOptimisations == 2 && !(CurInstr.BranchFlags & branch_IdleBranch)
&& (!taken || (CurInstr.BranchFlags & branch_StaticTarget)))
{
FixupBranch ret = J_CC(CC_S);
CMP(32, MDisp(RCPU, offsetof(ARM, StopExecution)), Imm8(0));
FixupBranch ret2 = J_CC(CC_NZ);
u8* rewritePart = GetWritableCodePtr();
NOP(5);
MOV(32, R(ABI_PARAM2), Imm32(rewritePart - ResetStart));
JMP((u8*)BranchStub[Num], true);
SetJumpTarget(ret);
SetJumpTarget(ret2);
JMP((u8*)ARM_Ret, true);
}
else
{
JMP((u8*)&ARM_Ret, true);
}
}
}
JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount)
JitBlockEntry Compiler::CompileBlock(u32 translatedAddr, ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount)
{
if (CodeMemSize - (GetWritableCodePtr() - ResetStart) < 1024 * 32) // guess...
ResetBlockCache();
@ -388,15 +429,11 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
Num = cpu->Num;
CodeRegion = instrs[0].Addr >> 24;
CurCPU = cpu;
// CPSR might have been modified in a previous block
CPSRDirty = Config::JIT_BrancheOptimisations == 2;
JitBlockEntry res = (JitBlockEntry)GetWritableCodePtr();
ABI_PushRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
MOV(64, R(RCPU), ImmPtr(cpu));
LoadCPSR();
RegCache = RegisterCache<Compiler, X64Reg>(this, instrs, instrsCount);
for (int i = 0; i < instrsCount; i++)
@ -474,7 +511,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
else
(this->*comp)();
Comp_SpecialBranchBehaviour();
Comp_SpecialBranchBehaviour(true);
if (CurInstr.Cond() < 0xE)
{
@ -485,15 +522,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
Comp_AddCycles_C(true);
if (CurInstr.BranchFlags & branch_FollowCondTaken)
{
RegCache.PrepareExit();
SaveCPSR(false);
MOV(32, R(RAX), Imm32(ConstantCycles));
ABI_PopRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
RET();
}
Comp_SpecialBranchBehaviour(false);
SetJumpTarget(skipFailed);
}
@ -504,17 +533,38 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
}
}
if (comp == NULL && i != instrsCount - 1)
if (comp == NULL)
LoadCPSR();
}
RegCache.Flush();
SaveCPSR();
MOV(32, R(RAX), Imm32(ConstantCycles));
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles));
ABI_PopRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
RET();
if (Config::JIT_BrancheOptimisations == 2
&& !(instrs[instrsCount - 1].BranchFlags & branch_IdleBranch)
&& (!instrs[instrsCount - 1].Info.Branches()
|| instrs[instrsCount - 1].BranchFlags & branch_FollowCondNotTaken
|| (instrs[instrsCount - 1].BranchFlags & branch_FollowCondTaken && instrs[instrsCount - 1].BranchFlags & branch_StaticTarget)))
{
FixupBranch ret = J_CC(CC_S);
CMP(32, MDisp(RCPU, offsetof(ARM, StopExecution)), Imm8(0));
FixupBranch ret2 = J_CC(CC_NZ);
u8* rewritePart = GetWritableCodePtr();
NOP(5);
MOV(32, R(ABI_PARAM2), Imm32(rewritePart - ResetStart));
JMP((u8*)BranchStub[Num], true);
SetJumpTarget(ret);
SetJumpTarget(ret2);
JMP((u8*)ARM_Ret, true);
}
else
{
JMP((u8*)ARM_Ret, true);
}
/*FILE* codeout = fopen("codeout", "a");
fprintf(codeout, "beginning block argargarg__ %x!!!", instrs[0].Addr);
@ -525,6 +575,22 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
return res;
}
void Compiler::LinkBlock(u32 offset, JitBlockEntry entry)
{
u8* curPtr = GetWritableCodePtr();
SetCodePtr(ResetStart + offset);
JMP((u8*)entry, true);
SetCodePtr(curPtr);
}
void Compiler::UnlinkBlock(u32 offset)
{
u8* curPtr = GetWritableCodePtr();
SetCodePtr(ResetStart + offset);
NOP(5);
SetCodePtr(curPtr);
}
void Compiler::Comp_AddCycles_C(bool forceNonConstant)
{
s32 cycles = Num ?
@ -532,7 +598,7 @@ void Compiler::Comp_AddCycles_C(bool forceNonConstant)
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles);
if ((!Thumb && CurInstr.Cond() < 0xE) || forceNonConstant)
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
else
ConstantCycles += cycles;
}
@ -544,7 +610,7 @@ void Compiler::Comp_AddCycles_CI(u32 i)
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + i;
if (!Thumb && CurInstr.Cond() < 0xE)
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
else
ConstantCycles += cycles;
}
@ -558,12 +624,12 @@ void Compiler::Comp_AddCycles_CI(Gen::X64Reg i, int add)
if (!Thumb && CurInstr.Cond() < 0xE)
{
LEA(32, RSCRATCH, MDisp(i, add + cycles));
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(RSCRATCH));
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(RSCRATCH));
}
else
{
ConstantCycles += i + cycles;
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(i));
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(i));
}
}
@ -599,7 +665,7 @@ void Compiler::Comp_AddCycles_CDI()
}
if (!Thumb && CurInstr.Cond() < 0xE)
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
else
ConstantCycles += cycles;
}
@ -643,7 +709,7 @@ void Compiler::Comp_AddCycles_CD()
}
if (IrregularCycles && !Thumb && CurInstr.Cond() < 0xE)
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
else
ConstantCycles += cycles;
}

View File

@ -51,7 +51,10 @@ public:
void Reset();
JitBlockEntry CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount);
void LinkBlock(u32 offset, JitBlockEntry entry);
void UnlinkBlock(u32 offset);
JitBlockEntry CompileBlock(u32 translatedAddr, ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount);
void LoadReg(int reg, Gen::X64Reg nativeReg);
void SaveReg(int reg, Gen::X64Reg nativeReg);
@ -145,7 +148,7 @@ public:
void Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed);
void Comp_SpecialBranchBehaviour();
void Comp_SpecialBranchBehaviour(bool taken);
void* Gen_MemoryRoutine9(bool store, int size);
@ -176,12 +179,24 @@ public:
return Gen::R(RegCache.Mapping[reg]);
}
JitBlockEntry AddEntryOffset(u32 offset)
{
return (JitBlockEntry)(ResetStart + offset);
}
u32 SubEntryOffset(JitBlockEntry entry)
{
return (u8*)entry - ResetStart;
}
u8* ResetStart;
u32 CodeMemSize;
bool Exit;
bool IrregularCycles;
void* BranchStub[2];
void* MemoryFuncs9[3][2];
void* MemoryFuncs7[3][2];

View File

@ -0,0 +1,15 @@
#include "../ARM.h"
int main(int argc, char* argv[])
{
FILE* f = fopen("ARMJIT_Offsets.h", "w");
#define writeOffset(field) \
fprintf(f, "#define ARM_" #field "_offset 0x%x\n", offsetof(ARM, field))
writeOffset(CPSR);
writeOffset(Cycles);
writeOffset(StopExecution);
fclose(f);
return 0;
}

View File

@ -0,0 +1,74 @@
.intel_syntax noprefix
#include "ARMJIT_Offsets.h"
.text
#define RCPU rbp
#define RCPSR r15d
#ifdef WIN64
#define ARG1_REG ecx
#define ARG2_REG edx
#define ARG3_REG r8d
#define ARG4_REG r9d
#define ARG1_REG64 rcx
#define ARG2_REG64 rdx
#define ARG3_REG64 r8
#define ARG4_REG64 r9
#else
#define ARG1_REG edi
#define ARG2_REG esi
#define ARG3_REG edx
#define ARG4_REG ecx
#define ARG1_REG64 rdi
#define ARG2_REG64 rsi
#define ARG3_REG64 rdx
#define ARG4_REG64 rcx
#endif
.p2align 4,,15
.global ARM_Dispatch
ARM_Dispatch:
#ifdef WIN64
push rdi
push rsi
#endif
push rbx
push r12
push r13
push r14
push r15
push rbp
#ifdef WIN64
sub rsp, 0x28
#endif
mov RCPU, ARG1_REG64
mov RCPSR, [RCPU + ARM_CPSR_offset]
jmp ARG2_REG64
.p2align 4,,15
.global ARM_Ret
ARM_Ret:
mov [RCPU + ARM_CPSR_offset], RCPSR
#ifdef WIN64
add rsp, 0x28
#endif
pop rbp
pop r15
pop r14
pop r13
pop r12
pop rbx
#ifdef WIN64
pop rsi
pop rdi
#endif
ret

View File

@ -0,0 +1,3 @@
#define ARM_CPSR_offset 0x64
#define ARM_Cycles_offset 0xc
#define ARM_StopExecution_offset 0x10

View File

@ -49,9 +49,12 @@ add_library(core STATIC
WifiAP.cpp
tiny-AES-c/aes.c
xxhash/xxhash.c
)
if (ENABLE_JIT)
enable_language(ASM)
target_sources(core PRIVATE
ARMJIT.cpp
@ -68,7 +71,10 @@ if (ENABLE_JIT)
ARMJIT_x64/ARMJIT_ALU.cpp
ARMJIT_x64/ARMJIT_LoadStore.cpp
ARMJIT_x64/ARMJIT_Branch.cpp
ARMJIT_x64/ARMJIT_Linkage.s
)
set_source_files_properties(ARMJIT_x64/ARMJIT_Linkage.s PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp")
endif()
if (ARCHITECTURE STREQUAL ARM64)
target_sources(core PRIVATE

View File

@ -38,10 +38,10 @@ char DSiFirmwarePath[1024];
char DSiNANDPath[1024];
#ifdef JIT_ENABLED
bool JIT_Enable = false;
int JIT_Enable = false;
int JIT_MaxBlockSize = 12;
bool JIT_BrancheOptimisations = true;
bool JIT_LiteralOptimisations = true;
int JIT_BrancheOptimisations = 2;
int JIT_LiteralOptimisations = true;
#endif
ConfigEntry ConfigFile[] =
@ -58,7 +58,7 @@ ConfigEntry ConfigFile[] =
#ifdef JIT_ENABLED
{"JIT_Enable", 0, &JIT_Enable, 0, NULL, 0},
{"JIT_MaxBlockSize", 0, &JIT_MaxBlockSize, 10, NULL, 0},
{"JIT_BrancheOptimisations", 0, &JIT_BrancheOptimisations, 1, NULL, 0},
{"JIT_BranchOptimisations", 0, &JIT_BrancheOptimisations, 2, NULL, 0},
{"JIT_LiteralOptimisations", 0, &JIT_LiteralOptimisations, 1, NULL, 0},
#endif

View File

@ -52,10 +52,10 @@ extern char DSiFirmwarePath[1024];
extern char DSiNANDPath[1024];
#ifdef JIT_ENABLED
extern bool JIT_Enable;
extern int JIT_Enable;
extern int JIT_MaxBlockSize;
extern bool JIT_BrancheOptimisations;
extern bool JIT_LiteralOptimisations;
extern int JIT_BrancheOptimisations;
extern int JIT_LiteralOptimisations;
#endif
}

2390
src/xxhash/xxh3.h Normal file

File diff suppressed because it is too large Load Diff

43
src/xxhash/xxhash.c Normal file
View File

@ -0,0 +1,43 @@
/*
* xxHash - Extremely Fast Hash algorithm
* Copyright (C) 2012-2020 Yann Collet
*
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* You can contact the author at:
* - xxHash homepage: https://www.xxhash.com
* - xxHash source repository: https://github.com/Cyan4973/xxHash
*/
/*
* xxhash.c instantiates functions defined in xxhash.h
*/
#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
#define XXH_IMPLEMENTATION /* access definitions */
#include "xxhash.h"

1965
src/xxhash/xxhash.h Normal file

File diff suppressed because it is too large Load Diff