implement block linking + some refactoring
currently only supported for x64
This commit is contained in:
parent
1ad90cb334
commit
1c07932b40
|
@ -9,3 +9,5 @@ melon_grc.h
|
||||||
cmake-build
|
cmake-build
|
||||||
cmake-build-debug
|
cmake-build-debug
|
||||||
.idea
|
.idea
|
||||||
|
|
||||||
|
*.exe
|
||||||
|
|
37
src/ARM.cpp
37
src/ARM.cpp
|
@ -252,15 +252,15 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr)
|
||||||
if (addr & 0x2)
|
if (addr & 0x2)
|
||||||
{
|
{
|
||||||
NextInstr[0] = CodeRead32(addr-2, true) >> 16;
|
NextInstr[0] = CodeRead32(addr-2, true) >> 16;
|
||||||
Cycles += CodeCycles;
|
Cycles -= CodeCycles;
|
||||||
NextInstr[1] = CodeRead32(addr+2, false);
|
NextInstr[1] = CodeRead32(addr+2, false);
|
||||||
Cycles += CodeCycles;
|
Cycles -= CodeCycles;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
NextInstr[0] = CodeRead32(addr, true);
|
NextInstr[0] = CodeRead32(addr, true);
|
||||||
NextInstr[1] = NextInstr[0] >> 16;
|
NextInstr[1] = NextInstr[0] >> 16;
|
||||||
Cycles += CodeCycles;
|
Cycles -= CodeCycles;
|
||||||
}
|
}
|
||||||
|
|
||||||
CPSR |= 0x20;
|
CPSR |= 0x20;
|
||||||
|
@ -273,9 +273,9 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr)
|
||||||
if (newregion != oldregion) SetupCodeMem(addr);
|
if (newregion != oldregion) SetupCodeMem(addr);
|
||||||
|
|
||||||
NextInstr[0] = CodeRead32(addr, true);
|
NextInstr[0] = CodeRead32(addr, true);
|
||||||
Cycles += CodeCycles;
|
Cycles -= CodeCycles;
|
||||||
NextInstr[1] = CodeRead32(addr+4, false);
|
NextInstr[1] = CodeRead32(addr+4, false);
|
||||||
Cycles += CodeCycles;
|
Cycles -= CodeCycles;
|
||||||
|
|
||||||
CPSR &= ~0x20;
|
CPSR &= ~0x20;
|
||||||
}
|
}
|
||||||
|
@ -315,7 +315,7 @@ void ARMv4::JumpTo(u32 addr, bool restorecpsr)
|
||||||
|
|
||||||
NextInstr[0] = CodeRead16(addr);
|
NextInstr[0] = CodeRead16(addr);
|
||||||
NextInstr[1] = CodeRead16(addr+2);
|
NextInstr[1] = CodeRead16(addr+2);
|
||||||
Cycles += NDS::ARM7MemTimings[CodeCycles][0] + NDS::ARM7MemTimings[CodeCycles][1];
|
Cycles -= NDS::ARM7MemTimings[CodeCycles][0] + NDS::ARM7MemTimings[CodeCycles][1];
|
||||||
|
|
||||||
CPSR |= 0x20;
|
CPSR |= 0x20;
|
||||||
}
|
}
|
||||||
|
@ -328,7 +328,7 @@ void ARMv4::JumpTo(u32 addr, bool restorecpsr)
|
||||||
|
|
||||||
NextInstr[0] = CodeRead32(addr);
|
NextInstr[0] = CodeRead32(addr);
|
||||||
NextInstr[1] = CodeRead32(addr+4);
|
NextInstr[1] = CodeRead32(addr+4);
|
||||||
Cycles += NDS::ARM7MemTimings[CodeCycles][2] + NDS::ARM7MemTimings[CodeCycles][3];
|
Cycles -= NDS::ARM7MemTimings[CodeCycles][2] + NDS::ARM7MemTimings[CodeCycles][3];
|
||||||
|
|
||||||
CPSR &= ~0x20;
|
CPSR &= ~0x20;
|
||||||
}
|
}
|
||||||
|
@ -587,7 +587,7 @@ void ARMv5::Execute()
|
||||||
}*/
|
}*/
|
||||||
if (IRQ) TriggerIRQ();
|
if (IRQ) TriggerIRQ();
|
||||||
|
|
||||||
NDS::ARM9Timestamp += Cycles;
|
NDS::ARM9Timestamp -= Cycles;
|
||||||
Cycles = 0;
|
Cycles = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -627,14 +627,16 @@ void ARMv5::ExecuteJIT()
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlock<0>(instrAddr);
|
// hack so Cycles <= 0 becomes Cycles < 0
|
||||||
|
Cycles = NDS::ARM9Target - NDS::ARM9Timestamp - 1;
|
||||||
|
|
||||||
|
ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlockEntry(ARMJIT::TranslateAddr<0>(instrAddr));
|
||||||
if (block)
|
if (block)
|
||||||
Cycles += block();
|
ARM_Dispatch(this, block);
|
||||||
else
|
else
|
||||||
ARMJIT::CompileBlock(this);
|
ARMJIT::CompileBlock(this);
|
||||||
|
|
||||||
NDS::ARM9Timestamp += Cycles;
|
NDS::ARM9Timestamp = NDS::ARM9Target - (Cycles + 1);
|
||||||
Cycles = 0;
|
|
||||||
|
|
||||||
if (StopExecution)
|
if (StopExecution)
|
||||||
{
|
{
|
||||||
|
@ -728,7 +730,7 @@ void ARMv4::Execute()
|
||||||
}*/
|
}*/
|
||||||
if (IRQ) TriggerIRQ();
|
if (IRQ) TriggerIRQ();
|
||||||
|
|
||||||
NDS::ARM7Timestamp += Cycles;
|
NDS::ARM7Timestamp -= Cycles;
|
||||||
Cycles = 0;
|
Cycles = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -768,14 +770,15 @@ void ARMv4::ExecuteJIT()
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlock<1>(instrAddr);
|
Cycles = NDS::ARM7Target - NDS::ARM7Timestamp - 1;
|
||||||
|
|
||||||
|
ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlockEntry(ARMJIT::TranslateAddr<1>(instrAddr));
|
||||||
if (block)
|
if (block)
|
||||||
Cycles += block();
|
ARM_Dispatch(this, block);
|
||||||
else
|
else
|
||||||
ARMJIT::CompileBlock(this);
|
ARMJIT::CompileBlock(this);
|
||||||
|
|
||||||
NDS::ARM7Timestamp += Cycles;
|
NDS::ARM7Timestamp = NDS::ARM7Target - (Cycles + 1);
|
||||||
Cycles = 0;
|
|
||||||
|
|
||||||
// TODO optimize this shit!!!
|
// TODO optimize this shit!!!
|
||||||
if (StopExecution)
|
if (StopExecution)
|
||||||
|
|
32
src/ARM.h
32
src/ARM.h
|
@ -193,14 +193,14 @@ public:
|
||||||
{
|
{
|
||||||
// code only. always nonseq 32-bit for ARM9.
|
// code only. always nonseq 32-bit for ARM9.
|
||||||
s32 numC = (R[15] & 0x2) ? 0 : CodeCycles;
|
s32 numC = (R[15] & 0x2) ? 0 : CodeCycles;
|
||||||
Cycles += numC;
|
Cycles -= numC;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AddCycles_CI(s32 numI)
|
void AddCycles_CI(s32 numI)
|
||||||
{
|
{
|
||||||
// code+internal
|
// code+internal
|
||||||
s32 numC = (R[15] & 0x2) ? 0 : CodeCycles;
|
s32 numC = (R[15] & 0x2) ? 0 : CodeCycles;
|
||||||
Cycles += numC + numI;
|
Cycles -= numC + numI;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AddCycles_CDI()
|
void AddCycles_CDI()
|
||||||
|
@ -211,9 +211,9 @@ public:
|
||||||
s32 numD = DataCycles;
|
s32 numD = DataCycles;
|
||||||
|
|
||||||
//if (DataRegion != CodeRegion)
|
//if (DataRegion != CodeRegion)
|
||||||
Cycles += std::max(numC + numD - 6, std::max(numC, numD));
|
Cycles -= std::max(numC + numD - 6, std::max(numC, numD));
|
||||||
//else
|
//else
|
||||||
// Cycles += numC + numD;
|
// Cycles -= numC + numD;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AddCycles_CD()
|
void AddCycles_CD()
|
||||||
|
@ -223,9 +223,9 @@ public:
|
||||||
s32 numD = DataCycles;
|
s32 numD = DataCycles;
|
||||||
|
|
||||||
//if (DataRegion != CodeRegion)
|
//if (DataRegion != CodeRegion)
|
||||||
Cycles += std::max(numC + numD - 6, std::max(numC, numD));
|
Cycles -= std::max(numC + numD - 6, std::max(numC, numD));
|
||||||
//else
|
//else
|
||||||
// Cycles += numC + numD;
|
// Cycles -= numC + numD;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GetCodeMemRegion(u32 addr, NDS::MemRegion* region);
|
void GetCodeMemRegion(u32 addr, NDS::MemRegion* region);
|
||||||
|
@ -387,13 +387,13 @@ public:
|
||||||
void AddCycles_C()
|
void AddCycles_C()
|
||||||
{
|
{
|
||||||
// code only. this code fetch is sequential.
|
// code only. this code fetch is sequential.
|
||||||
Cycles += NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?1:3];
|
Cycles -= NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?1:3];
|
||||||
}
|
}
|
||||||
|
|
||||||
void AddCycles_CI(s32 num)
|
void AddCycles_CI(s32 num)
|
||||||
{
|
{
|
||||||
// code+internal. results in a nonseq code fetch.
|
// code+internal. results in a nonseq code fetch.
|
||||||
Cycles += NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?0:2] + num;
|
Cycles -= NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?0:2] + num;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AddCycles_CDI()
|
void AddCycles_CDI()
|
||||||
|
@ -405,21 +405,21 @@ public:
|
||||||
if ((DataRegion >> 4) == 0x02) // mainRAM
|
if ((DataRegion >> 4) == 0x02) // mainRAM
|
||||||
{
|
{
|
||||||
if (CodeRegion == 0x02)
|
if (CodeRegion == 0x02)
|
||||||
Cycles += numC + numD;
|
Cycles -= numC + numD;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
numC++;
|
numC++;
|
||||||
Cycles += std::max(numC + numD - 3, std::max(numC, numD));
|
Cycles -= std::max(numC + numD - 3, std::max(numC, numD));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (CodeRegion == 0x02)
|
else if (CodeRegion == 0x02)
|
||||||
{
|
{
|
||||||
numD++;
|
numD++;
|
||||||
Cycles += std::max(numC + numD - 3, std::max(numC, numD));
|
Cycles -= std::max(numC + numD - 3, std::max(numC, numD));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
Cycles += numC + numD + 1;
|
Cycles -= numC + numD + 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -432,17 +432,17 @@ public:
|
||||||
if ((DataRegion >> 4) == 0x02)
|
if ((DataRegion >> 4) == 0x02)
|
||||||
{
|
{
|
||||||
if (CodeRegion == 0x02)
|
if (CodeRegion == 0x02)
|
||||||
Cycles += numC + numD;
|
Cycles -= numC + numD;
|
||||||
else
|
else
|
||||||
Cycles += std::max(numC + numD - 3, std::max(numC, numD));
|
Cycles -= std::max(numC + numD - 3, std::max(numC, numD));
|
||||||
}
|
}
|
||||||
else if (CodeRegion == 0x02)
|
else if (CodeRegion == 0x02)
|
||||||
{
|
{
|
||||||
Cycles += std::max(numC + numD - 3, std::max(numC, numD));
|
Cycles -= std::max(numC + numD - 3, std::max(numC, numD));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
Cycles += numC + numD;
|
Cycles -= numC + numD;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
217
src/ARMJIT.cpp
217
src/ARMJIT.cpp
|
@ -2,6 +2,10 @@
|
||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
#define XXH_STATIC_LINKING_ONLY
|
||||||
|
#include "xxhash/xxhash.h"
|
||||||
|
|
||||||
#include "Config.h"
|
#include "Config.h"
|
||||||
|
|
||||||
|
@ -113,16 +117,101 @@ const static ExeMemKind JIT_MEM[2][32] = {
|
||||||
u32 AddrTranslate9[0x2000];
|
u32 AddrTranslate9[0x2000];
|
||||||
u32 AddrTranslate7[0x4000];
|
u32 AddrTranslate7[0x4000];
|
||||||
|
|
||||||
JitBlockEntry FastBlockAccess[ExeMemSpaceSize / 2];
|
|
||||||
AddressRange CodeRanges[ExeMemSpaceSize / 512];
|
AddressRange CodeRanges[ExeMemSpaceSize / 512];
|
||||||
|
|
||||||
TinyVector<JitBlock*> JitBlocks;
|
std::unordered_map<u32, JitBlock*> JitBlocks;
|
||||||
JitBlock* RestoreCandidates[0x1000] = {NULL};
|
|
||||||
|
|
||||||
u32 HashRestoreCandidate(u32 pseudoPhysicalAddr)
|
template <typename K, typename V, int Size, V InvalidValue>
|
||||||
|
struct UnreliableHashTable
|
||||||
{
|
{
|
||||||
return (u32)(((u64)pseudoPhysicalAddr * 11400714819323198485llu) >> 53);
|
struct Bucket
|
||||||
}
|
{
|
||||||
|
K KeyA, KeyB;
|
||||||
|
V ValA, ValB;
|
||||||
|
};
|
||||||
|
|
||||||
|
Bucket Table[Size];
|
||||||
|
|
||||||
|
void Reset()
|
||||||
|
{
|
||||||
|
for (int i = 0; i < Size; i++)
|
||||||
|
{
|
||||||
|
Table[i].ValA = Table[i].ValB = InvalidValue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
UnreliableHashTable()
|
||||||
|
{
|
||||||
|
Reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
V Insert(K key, V value)
|
||||||
|
{
|
||||||
|
u32 slot = XXH3_64bits(&key, sizeof(K)) & (Size - 1);
|
||||||
|
Bucket* bucket = &Table[slot];
|
||||||
|
|
||||||
|
if (bucket->ValA == value || bucket->ValB == value)
|
||||||
|
{
|
||||||
|
return InvalidValue;
|
||||||
|
}
|
||||||
|
else if (bucket->ValA == InvalidValue)
|
||||||
|
{
|
||||||
|
bucket->KeyA = key;
|
||||||
|
bucket->ValA = value;
|
||||||
|
}
|
||||||
|
else if (bucket->ValB == InvalidValue)
|
||||||
|
{
|
||||||
|
bucket->KeyB = key;
|
||||||
|
bucket->ValB = value;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
V prevVal = bucket->ValB;
|
||||||
|
bucket->KeyB = bucket->KeyA;
|
||||||
|
bucket->ValB = bucket->ValA;
|
||||||
|
bucket->KeyA = key;
|
||||||
|
bucket->ValA = value;
|
||||||
|
return prevVal;
|
||||||
|
}
|
||||||
|
|
||||||
|
return InvalidValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Remove(K key)
|
||||||
|
{
|
||||||
|
u32 slot = XXH3_64bits(&key, sizeof(K)) & (Size - 1);
|
||||||
|
Bucket* bucket = &Table[slot];
|
||||||
|
|
||||||
|
if (bucket->KeyA == key && bucket->ValA != InvalidValue)
|
||||||
|
{
|
||||||
|
bucket->ValA = InvalidValue;
|
||||||
|
if (bucket->ValB != InvalidValue)
|
||||||
|
{
|
||||||
|
bucket->KeyA = bucket->KeyB;
|
||||||
|
bucket->ValA = bucket->ValB;
|
||||||
|
bucket->ValB = InvalidValue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (bucket->KeyB == key && bucket->ValB != InvalidValue)
|
||||||
|
bucket->ValB = InvalidValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
V LookUp(K addr)
|
||||||
|
{
|
||||||
|
u32 slot = XXH3_64bits(&addr, 4) & (Size - 1);
|
||||||
|
Bucket* bucket = &Table[slot];
|
||||||
|
|
||||||
|
if (bucket->ValA != InvalidValue && bucket->KeyA == addr)
|
||||||
|
return bucket->ValA;
|
||||||
|
if (bucket->ValB != InvalidValue && bucket->KeyB == addr)
|
||||||
|
return bucket->ValB;
|
||||||
|
|
||||||
|
return InvalidValue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
UnreliableHashTable<u32, JitBlock*, 0x800, nullptr> RestoreCandidates;
|
||||||
|
UnreliableHashTable<u32, u32, 0x1000, UINT32_MAX> FastBlockLookUp;
|
||||||
|
|
||||||
void Init()
|
void Init()
|
||||||
{
|
{
|
||||||
|
@ -396,9 +485,8 @@ void CompileBlock(ARM* cpu)
|
||||||
u32 nextInstr[2] = {cpu->NextInstr[0], cpu->NextInstr[1]};
|
u32 nextInstr[2] = {cpu->NextInstr[0], cpu->NextInstr[1]};
|
||||||
u32 nextInstrAddr[2] = {blockAddr, r15};
|
u32 nextInstrAddr[2] = {blockAddr, r15};
|
||||||
|
|
||||||
JIT_DEBUGPRINT("start block %x %08x (%x) %p %p (region invalidates %dx)\n",
|
JIT_DEBUGPRINT("start block %x %08x (%x) (region invalidates %dx)\n",
|
||||||
blockAddr, cpu->CPSR, pseudoPhysicalAddr, FastBlockAccess[pseudoPhysicalAddr / 2],
|
blockAddr, cpu->CPSR, pseudoPhysicalAddr,
|
||||||
cpu->Num == 0 ? LookUpBlock<0>(blockAddr) : LookUpBlock<1>(blockAddr),
|
|
||||||
CodeRanges[pseudoPhysicalAddr / 512].TimesInvalidated);
|
CodeRanges[pseudoPhysicalAddr / 512].TimesInvalidated);
|
||||||
|
|
||||||
u32 lastSegmentStart = blockAddr;
|
u32 lastSegmentStart = blockAddr;
|
||||||
|
@ -534,6 +622,8 @@ void CompileBlock(ARM* cpu)
|
||||||
|
|
||||||
if (staticBranch)
|
if (staticBranch)
|
||||||
{
|
{
|
||||||
|
instrs[i].BranchFlags |= branch_StaticTarget;
|
||||||
|
|
||||||
bool isBackJump = false;
|
bool isBackJump = false;
|
||||||
if (hasBranched)
|
if (hasBranched)
|
||||||
{
|
{
|
||||||
|
@ -604,12 +694,11 @@ void CompileBlock(ARM* cpu)
|
||||||
FloodFillSetFlags(instrs, i - 2, !secondaryFlagReadCond ? instrs[i - 1].Info.ReadFlags : 0xF);
|
FloodFillSetFlags(instrs, i - 2, !secondaryFlagReadCond ? instrs[i - 1].Info.ReadFlags : 0xF);
|
||||||
} while(!instrs[i - 1].Info.EndBlock && i < Config::JIT_MaxBlockSize && !cpu->Halted && (!cpu->IRQ || (cpu->CPSR & 0x80)));
|
} while(!instrs[i - 1].Info.EndBlock && i < Config::JIT_MaxBlockSize && !cpu->Halted && (!cpu->IRQ || (cpu->CPSR & 0x80)));
|
||||||
|
|
||||||
u32 restoreSlot = HashRestoreCandidate(pseudoPhysicalAddr);
|
JitBlock* prevBlock = RestoreCandidates.LookUp(pseudoPhysicalAddr);
|
||||||
JitBlock* prevBlock = RestoreCandidates[restoreSlot];
|
|
||||||
bool mayRestore = true;
|
bool mayRestore = true;
|
||||||
if (prevBlock && prevBlock->PseudoPhysicalAddr == pseudoPhysicalAddr)
|
if (prevBlock)
|
||||||
{
|
{
|
||||||
RestoreCandidates[restoreSlot] = NULL;
|
RestoreCandidates.Remove(pseudoPhysicalAddr);
|
||||||
if (prevBlock->NumInstrs == i)
|
if (prevBlock->NumInstrs == i)
|
||||||
{
|
{
|
||||||
for (int j = 0; j < i; j++)
|
for (int j = 0; j < i; j++)
|
||||||
|
@ -661,7 +750,7 @@ void CompileBlock(ARM* cpu)
|
||||||
|
|
||||||
FloodFillSetFlags(instrs, i - 1, 0xF);
|
FloodFillSetFlags(instrs, i - 1, 0xF);
|
||||||
|
|
||||||
block->EntryPoint = compiler->CompileBlock(cpu, thumb, instrs, i);
|
block->EntryPoint = compiler->CompileBlock(pseudoPhysicalAddr, cpu, thumb, instrs, i);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -675,9 +764,8 @@ void CompileBlock(ARM* cpu)
|
||||||
CodeRanges[addresseRanges[j] / 512].Blocks.Add(block);
|
CodeRanges[addresseRanges[j] / 512].Blocks.Add(block);
|
||||||
}
|
}
|
||||||
|
|
||||||
FastBlockAccess[block->PseudoPhysicalAddr / 2] = block->EntryPoint;
|
JitBlocks[pseudoPhysicalAddr] = block;
|
||||||
|
FastBlockLookUp.Insert(pseudoPhysicalAddr, compiler->SubEntryOffset(block->EntryPoint));
|
||||||
JitBlocks.Add(block);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void InvalidateByAddr(u32 pseudoPhysical, bool mayRestore)
|
void InvalidateByAddr(u32 pseudoPhysical, bool mayRestore)
|
||||||
|
@ -701,18 +789,17 @@ void InvalidateByAddr(u32 pseudoPhysical, bool mayRestore)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool removed = JitBlocks.RemoveByValue(block);
|
for (int j = 0; j < block->NumLinks(); j++)
|
||||||
assert(removed);
|
compiler->UnlinkBlock(block->Links()[j]);
|
||||||
|
|
||||||
FastBlockAccess[block->PseudoPhysicalAddr / 2] = NULL;
|
JitBlocks.erase(block->PseudoPhysicalAddr);
|
||||||
|
FastBlockLookUp.Remove(block->PseudoPhysicalAddr);
|
||||||
|
|
||||||
if (mayRestore)
|
if (mayRestore)
|
||||||
{
|
{
|
||||||
u32 slot = HashRestoreCandidate(block->PseudoPhysicalAddr);
|
JitBlock* prevBlock = RestoreCandidates.Insert(block->PseudoPhysicalAddr, block);
|
||||||
if (RestoreCandidates[slot] && RestoreCandidates[slot] != block)
|
if (prevBlock)
|
||||||
delete RestoreCandidates[slot];
|
delete prevBlock;
|
||||||
|
|
||||||
RestoreCandidates[slot] = block;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ((range->TimesInvalidated + 1) > range->TimesInvalidated)
|
if ((range->TimesInvalidated + 1) > range->TimesInvalidated)
|
||||||
|
@ -738,47 +825,54 @@ void InvalidateITCM(u32 addr)
|
||||||
void InvalidateAll()
|
void InvalidateAll()
|
||||||
{
|
{
|
||||||
JIT_DEBUGPRINT("invalidating all %x\n", JitBlocks.Length);
|
JIT_DEBUGPRINT("invalidating all %x\n", JitBlocks.Length);
|
||||||
for (int i = 0; i < JitBlocks.Length; i++)
|
for (auto it : JitBlocks)
|
||||||
{
|
{
|
||||||
JitBlock* block = JitBlocks[i];
|
JitBlock* block = it.second;
|
||||||
|
|
||||||
FastBlockAccess[block->PseudoPhysicalAddr / 2] = NULL;
|
FastBlockLookUp.Remove(block->PseudoPhysicalAddr);
|
||||||
|
|
||||||
for (int j = 0; j < block->NumAddresses; j++)
|
for (int i = 0; i < block->NumAddresses; i++)
|
||||||
{
|
{
|
||||||
u32 addr = block->AddressRanges()[j];
|
u32 addr = block->AddressRanges()[i];
|
||||||
AddressRange* range = &CodeRanges[addr / 512];
|
AddressRange* range = &CodeRanges[addr / 512];
|
||||||
range->Blocks.Clear();
|
range->Blocks.Clear();
|
||||||
if (range->TimesInvalidated + 1 > range->TimesInvalidated)
|
if (range->TimesInvalidated + 1 > range->TimesInvalidated)
|
||||||
range->TimesInvalidated++;
|
range->TimesInvalidated++;
|
||||||
}
|
}
|
||||||
|
for (int i = 0; i < block->NumLinks(); i++)
|
||||||
|
compiler->UnlinkBlock(block->Links()[i]);
|
||||||
|
block->ResetLinks();
|
||||||
|
|
||||||
u32 slot = HashRestoreCandidate(block->PseudoPhysicalAddr);
|
JitBlock* prevBlock = RestoreCandidates.Insert(block->PseudoPhysicalAddr, block);
|
||||||
if (RestoreCandidates[slot] && RestoreCandidates[slot] != block)
|
if (prevBlock)
|
||||||
delete RestoreCandidates[slot];
|
delete prevBlock;
|
||||||
|
|
||||||
RestoreCandidates[slot] = block;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
JitBlocks.Clear();
|
JitBlocks.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
void ResetBlockCache()
|
void ResetBlockCache()
|
||||||
{
|
{
|
||||||
printf("Resetting JIT block cache...\n");
|
printf("Resetting JIT block cache...\n");
|
||||||
|
|
||||||
memset(FastBlockAccess, 0, sizeof(FastBlockAccess));
|
FastBlockLookUp.Reset();
|
||||||
for (int i = 0; i < sizeof(RestoreCandidates)/sizeof(RestoreCandidates[0]); i++)
|
RestoreCandidates.Reset();
|
||||||
|
for (int i = 0; i < sizeof(RestoreCandidates.Table)/sizeof(RestoreCandidates.Table[0]); i++)
|
||||||
{
|
{
|
||||||
if (RestoreCandidates[i])
|
if (RestoreCandidates.Table[i].ValA)
|
||||||
{
|
{
|
||||||
delete RestoreCandidates[i];
|
delete RestoreCandidates.Table[i].ValA;
|
||||||
RestoreCandidates[i] = NULL;
|
RestoreCandidates.Table[i].ValA = NULL;
|
||||||
|
}
|
||||||
|
if (RestoreCandidates.Table[i].ValA)
|
||||||
|
{
|
||||||
|
delete RestoreCandidates.Table[i].ValB;
|
||||||
|
RestoreCandidates.Table[i].ValB = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (int i = 0; i < JitBlocks.Length; i++)
|
for (auto it : JitBlocks)
|
||||||
{
|
{
|
||||||
JitBlock* block = JitBlocks[i];
|
JitBlock* block = it.second;
|
||||||
for (int j = 0; j < block->NumAddresses; j++)
|
for (int j = 0; j < block->NumAddresses; j++)
|
||||||
{
|
{
|
||||||
u32 addr = block->AddressRanges()[j];
|
u32 addr = block->AddressRanges()[j];
|
||||||
|
@ -788,11 +882,43 @@ void ResetBlockCache()
|
||||||
}
|
}
|
||||||
delete block;
|
delete block;
|
||||||
}
|
}
|
||||||
JitBlocks.Clear();
|
JitBlocks.clear();
|
||||||
|
|
||||||
compiler->Reset();
|
compiler->Reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
JitBlockEntry LookUpBlockEntry(u32 addr)
|
||||||
|
{
|
||||||
|
u32 entryOffset = FastBlockLookUp.LookUp(addr);
|
||||||
|
if (entryOffset != UINT32_MAX)
|
||||||
|
return compiler->AddEntryOffset(entryOffset);
|
||||||
|
|
||||||
|
auto block = JitBlocks.find(addr);
|
||||||
|
if (block != JitBlocks.end())
|
||||||
|
{
|
||||||
|
FastBlockLookUp.Insert(addr, compiler->SubEntryOffset(block->second->EntryPoint));
|
||||||
|
return block->second->EntryPoint;
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <u32 Num>
|
||||||
|
void LinkBlock(ARM* cpu, u32 codeOffset)
|
||||||
|
{
|
||||||
|
u32 targetPseudoPhys = TranslateAddr<Num>(cpu->R[15] - ((cpu->CPSR&0x20)?2:4));
|
||||||
|
auto block = JitBlocks.find(targetPseudoPhys);
|
||||||
|
if (block == JitBlocks.end())
|
||||||
|
{
|
||||||
|
CompileBlock(cpu);
|
||||||
|
block = JitBlocks.find(targetPseudoPhys);
|
||||||
|
}
|
||||||
|
|
||||||
|
JIT_DEBUGPRINT("linking to block %08x\n", targetPseudoPhys);
|
||||||
|
|
||||||
|
block->second->AddLink(codeOffset);
|
||||||
|
compiler->LinkBlock(codeOffset, block->second->EntryPoint);
|
||||||
|
}
|
||||||
|
|
||||||
void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size)
|
void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size)
|
||||||
{
|
{
|
||||||
if (cpu->Num == 0)
|
if (cpu->Num == 0)
|
||||||
|
@ -875,3 +1001,6 @@ void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size)
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template void ARMJIT::LinkBlock<0>(ARM*, u32);
|
||||||
|
template void ARMJIT::LinkBlock<1>(ARM*, u32);
|
||||||
|
|
10
src/ARMJIT.h
10
src/ARMJIT.h
|
@ -32,7 +32,6 @@ extern u32 AddrTranslate9[0x2000];
|
||||||
extern u32 AddrTranslate7[0x4000];
|
extern u32 AddrTranslate7[0x4000];
|
||||||
|
|
||||||
const u32 ExeMemSpaceSize = 0x518000; // I hate you C++, sometimes I really hate you...
|
const u32 ExeMemSpaceSize = 0x518000; // I hate you C++, sometimes I really hate you...
|
||||||
extern JitBlockEntry FastBlockAccess[ExeMemSpaceSize / 2];
|
|
||||||
|
|
||||||
template <u32 num>
|
template <u32 num>
|
||||||
inline bool IsMapped(u32 addr)
|
inline bool IsMapped(u32 addr)
|
||||||
|
@ -52,11 +51,8 @@ inline u32 TranslateAddr(u32 addr)
|
||||||
return AddrTranslate7[(addr & 0xFFFFFFF) >> 14] + (addr & 0x3FFF);
|
return AddrTranslate7[(addr & 0xFFFFFFF) >> 14] + (addr & 0x3FFF);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <u32 num>
|
JitBlockEntry LookUpBlockEntry(u32 addr);
|
||||||
inline JitBlockEntry LookUpBlock(u32 addr)
|
|
||||||
{
|
|
||||||
return FastBlockAccess[TranslateAddr<num>(addr) / 2];
|
|
||||||
}
|
|
||||||
|
|
||||||
void Init();
|
void Init();
|
||||||
void DeInit();
|
void DeInit();
|
||||||
|
@ -73,4 +69,6 @@ void ResetBlockCache();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern "C" void ARM_Dispatch(ARM* cpu, ARMJIT::JitBlockEntry entry);
|
||||||
|
|
||||||
#endif
|
#endif
|
|
@ -15,7 +15,8 @@ enum
|
||||||
{
|
{
|
||||||
branch_IdleBranch = 1 << 0,
|
branch_IdleBranch = 1 << 0,
|
||||||
branch_FollowCondTaken = 1 << 1,
|
branch_FollowCondTaken = 1 << 1,
|
||||||
branch_FollowCondNotTaken = 1 << 2
|
branch_FollowCondNotTaken = 1 << 2,
|
||||||
|
branch_StaticTarget = 1 << 3,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct FetchedInstr
|
struct FetchedInstr
|
||||||
|
@ -76,7 +77,7 @@ struct __attribute__((packed)) TinyVector
|
||||||
assert(capacity > Capacity);
|
assert(capacity > Capacity);
|
||||||
T* newMem = new T[capacity];
|
T* newMem = new T[capacity];
|
||||||
if (Data != NULL)
|
if (Data != NULL)
|
||||||
memcpy(newMem, Data, sizeof(Data) * Length);
|
memcpy(newMem, Data, sizeof(T) * Length);
|
||||||
|
|
||||||
T* oldData = Data;
|
T* oldData = Data;
|
||||||
Data = newMem;
|
Data = newMem;
|
||||||
|
@ -163,7 +164,6 @@ public:
|
||||||
|
|
||||||
u32 NumInstrs;
|
u32 NumInstrs;
|
||||||
u32 NumAddresses;
|
u32 NumAddresses;
|
||||||
u32 NumLinks;
|
|
||||||
|
|
||||||
JitBlockEntry EntryPoint;
|
JitBlockEntry EntryPoint;
|
||||||
|
|
||||||
|
@ -171,6 +171,21 @@ public:
|
||||||
{ return &Data[0]; }
|
{ return &Data[0]; }
|
||||||
u32* AddressRanges()
|
u32* AddressRanges()
|
||||||
{ return &Data[NumInstrs]; }
|
{ return &Data[NumInstrs]; }
|
||||||
|
u32* Links()
|
||||||
|
{ return &Data[NumInstrs + NumAddresses]; }
|
||||||
|
|
||||||
|
u32 NumLinks()
|
||||||
|
{ return Data.Length - NumInstrs - NumAddresses; }
|
||||||
|
|
||||||
|
void AddLink(u32 link)
|
||||||
|
{
|
||||||
|
Data.Add(link);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ResetLinks()
|
||||||
|
{
|
||||||
|
Data.SetLength(NumInstrs + NumAddresses);
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/*
|
/*
|
||||||
|
@ -200,6 +215,9 @@ extern u8 MemRegion7[0x80000];
|
||||||
|
|
||||||
void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size);
|
void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size);
|
||||||
|
|
||||||
|
template <u32 Num>
|
||||||
|
void LinkBlock(ARM* cpu, u32 codeOffset);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
|
@ -127,7 +127,7 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
|
||||||
if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles)
|
if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles)
|
||||||
ConstantCycles += cycles;
|
ConstantCycles += cycles;
|
||||||
else
|
else
|
||||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
|
void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
|
||||||
|
@ -135,7 +135,7 @@ void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
|
||||||
IrregularCycles = true;
|
IrregularCycles = true;
|
||||||
|
|
||||||
BitSet16 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00);
|
BitSet16 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00);
|
||||||
bool previouslyDirty = CPSRDirty;
|
bool cpsrDirty = CPSRDirty;
|
||||||
SaveCPSR();
|
SaveCPSR();
|
||||||
|
|
||||||
if (restoreCPSR)
|
if (restoreCPSR)
|
||||||
|
@ -168,9 +168,10 @@ void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
|
||||||
LoadReg(reg, RegCache.Mapping[reg]);
|
LoadReg(reg, RegCache.Mapping[reg]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (previouslyDirty)
|
LoadCPSR();
|
||||||
LoadCPSR();
|
// in case this instruction is skipped
|
||||||
CPSRDirty = previouslyDirty;
|
if (CurInstr.Cond() < 0xE)
|
||||||
|
CPSRDirty = cpsrDirty;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Compiler::A_Comp_BranchImm()
|
void Compiler::A_Comp_BranchImm()
|
||||||
|
@ -209,20 +210,12 @@ void Compiler::T_Comp_BCOND()
|
||||||
s32 offset = (s32)(CurInstr.Instr << 24) >> 23;
|
s32 offset = (s32)(CurInstr.Instr << 24) >> 23;
|
||||||
Comp_JumpTo(R15 + offset + 1, true);
|
Comp_JumpTo(R15 + offset + 1, true);
|
||||||
|
|
||||||
Comp_SpecialBranchBehaviour();
|
Comp_SpecialBranchBehaviour(true);
|
||||||
|
|
||||||
FixupBranch skipFailed = J();
|
FixupBranch skipFailed = J();
|
||||||
SetJumpTarget(skipExecute);
|
SetJumpTarget(skipExecute);
|
||||||
|
|
||||||
if (CurInstr.BranchFlags & branch_FollowCondTaken)
|
Comp_SpecialBranchBehaviour(false);
|
||||||
{
|
|
||||||
RegCache.PrepareExit();
|
|
||||||
SaveCPSR(false);
|
|
||||||
|
|
||||||
MOV(32, R(RAX), Imm32(ConstantCycles));
|
|
||||||
ABI_PopRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
|
|
||||||
RET();
|
|
||||||
}
|
|
||||||
|
|
||||||
Comp_AddCycles_C(true);
|
Comp_AddCycles_C(true);
|
||||||
SetJumpTarget(skipFailed);
|
SetJumpTarget(skipFailed);
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
#include "ARMJIT_Compiler.h"
|
#include "ARMJIT_Compiler.h"
|
||||||
|
|
||||||
#include "../ARMInterpreter.h"
|
#include "../ARMInterpreter.h"
|
||||||
|
#include "../Config.h"
|
||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
|
@ -15,6 +16,8 @@
|
||||||
|
|
||||||
using namespace Gen;
|
using namespace Gen;
|
||||||
|
|
||||||
|
extern "C" void ARM_Ret();
|
||||||
|
|
||||||
namespace ARMJIT
|
namespace ARMJIT
|
||||||
{
|
{
|
||||||
template <>
|
template <>
|
||||||
|
@ -170,6 +173,24 @@ Compiler::Compiler()
|
||||||
RET();
|
RET();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
CPSRDirty = true;
|
||||||
|
BranchStub[0] = GetWritableCodePtr();
|
||||||
|
SaveCPSR();
|
||||||
|
MOV(64, R(ABI_PARAM1), R(RCPU));
|
||||||
|
CALL((u8*)ARMJIT::LinkBlock<0>);
|
||||||
|
LoadCPSR();
|
||||||
|
JMP((u8*)ARM_Ret, true);
|
||||||
|
|
||||||
|
CPSRDirty = true;
|
||||||
|
BranchStub[1] = GetWritableCodePtr();
|
||||||
|
SaveCPSR();
|
||||||
|
MOV(64, R(ABI_PARAM1), R(RCPU));
|
||||||
|
CALL((u8*)ARMJIT::LinkBlock<1>);
|
||||||
|
LoadCPSR();
|
||||||
|
JMP((u8*)ARM_Ret, true);
|
||||||
|
}
|
||||||
|
|
||||||
// move the region forward to prevent overwriting the generated functions
|
// move the region forward to prevent overwriting the generated functions
|
||||||
CodeMemSize -= GetWritableCodePtr() - ResetStart;
|
CodeMemSize -= GetWritableCodePtr() - ResetStart;
|
||||||
ResetStart = GetWritableCodePtr();
|
ResetStart = GetWritableCodePtr();
|
||||||
|
@ -362,23 +383,43 @@ void Compiler::Reset()
|
||||||
SetCodePtr(ResetStart);
|
SetCodePtr(ResetStart);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Compiler::Comp_SpecialBranchBehaviour()
|
void Compiler::Comp_SpecialBranchBehaviour(bool taken)
|
||||||
{
|
{
|
||||||
if (CurInstr.BranchFlags & branch_IdleBranch)
|
if (taken && CurInstr.BranchFlags & branch_IdleBranch)
|
||||||
OR(32, MDisp(RCPU, offsetof(ARM, IdleLoop)), Imm8(0x1));
|
OR(8, MDisp(RCPU, offsetof(ARM, IdleLoop)), Imm8(0x1));
|
||||||
|
|
||||||
if (CurInstr.BranchFlags & branch_FollowCondNotTaken)
|
if ((CurInstr.BranchFlags & branch_FollowCondNotTaken && taken)
|
||||||
|
|| (CurInstr.BranchFlags & branch_FollowCondTaken && !taken))
|
||||||
{
|
{
|
||||||
RegCache.PrepareExit();
|
RegCache.PrepareExit();
|
||||||
SaveCPSR(false);
|
|
||||||
|
|
||||||
MOV(32, R(RAX), Imm32(ConstantCycles));
|
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles));
|
||||||
ABI_PopRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
|
|
||||||
RET();
|
if (Config::JIT_BrancheOptimisations == 2 && !(CurInstr.BranchFlags & branch_IdleBranch)
|
||||||
|
&& (!taken || (CurInstr.BranchFlags & branch_StaticTarget)))
|
||||||
|
{
|
||||||
|
FixupBranch ret = J_CC(CC_S);
|
||||||
|
CMP(32, MDisp(RCPU, offsetof(ARM, StopExecution)), Imm8(0));
|
||||||
|
FixupBranch ret2 = J_CC(CC_NZ);
|
||||||
|
|
||||||
|
u8* rewritePart = GetWritableCodePtr();
|
||||||
|
NOP(5);
|
||||||
|
|
||||||
|
MOV(32, R(ABI_PARAM2), Imm32(rewritePart - ResetStart));
|
||||||
|
JMP((u8*)BranchStub[Num], true);
|
||||||
|
|
||||||
|
SetJumpTarget(ret);
|
||||||
|
SetJumpTarget(ret2);
|
||||||
|
JMP((u8*)ARM_Ret, true);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
JMP((u8*)&ARM_Ret, true);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount)
|
JitBlockEntry Compiler::CompileBlock(u32 translatedAddr, ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount)
|
||||||
{
|
{
|
||||||
if (CodeMemSize - (GetWritableCodePtr() - ResetStart) < 1024 * 32) // guess...
|
if (CodeMemSize - (GetWritableCodePtr() - ResetStart) < 1024 * 32) // guess...
|
||||||
ResetBlockCache();
|
ResetBlockCache();
|
||||||
|
@ -388,15 +429,11 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
|
||||||
Num = cpu->Num;
|
Num = cpu->Num;
|
||||||
CodeRegion = instrs[0].Addr >> 24;
|
CodeRegion = instrs[0].Addr >> 24;
|
||||||
CurCPU = cpu;
|
CurCPU = cpu;
|
||||||
|
// CPSR might have been modified in a previous block
|
||||||
|
CPSRDirty = Config::JIT_BrancheOptimisations == 2;
|
||||||
|
|
||||||
JitBlockEntry res = (JitBlockEntry)GetWritableCodePtr();
|
JitBlockEntry res = (JitBlockEntry)GetWritableCodePtr();
|
||||||
|
|
||||||
ABI_PushRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
|
|
||||||
|
|
||||||
MOV(64, R(RCPU), ImmPtr(cpu));
|
|
||||||
|
|
||||||
LoadCPSR();
|
|
||||||
|
|
||||||
RegCache = RegisterCache<Compiler, X64Reg>(this, instrs, instrsCount);
|
RegCache = RegisterCache<Compiler, X64Reg>(this, instrs, instrsCount);
|
||||||
|
|
||||||
for (int i = 0; i < instrsCount; i++)
|
for (int i = 0; i < instrsCount; i++)
|
||||||
|
@ -474,7 +511,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
|
||||||
else
|
else
|
||||||
(this->*comp)();
|
(this->*comp)();
|
||||||
|
|
||||||
Comp_SpecialBranchBehaviour();
|
Comp_SpecialBranchBehaviour(true);
|
||||||
|
|
||||||
if (CurInstr.Cond() < 0xE)
|
if (CurInstr.Cond() < 0xE)
|
||||||
{
|
{
|
||||||
|
@ -485,15 +522,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
|
||||||
|
|
||||||
Comp_AddCycles_C(true);
|
Comp_AddCycles_C(true);
|
||||||
|
|
||||||
if (CurInstr.BranchFlags & branch_FollowCondTaken)
|
Comp_SpecialBranchBehaviour(false);
|
||||||
{
|
|
||||||
RegCache.PrepareExit();
|
|
||||||
SaveCPSR(false);
|
|
||||||
|
|
||||||
MOV(32, R(RAX), Imm32(ConstantCycles));
|
|
||||||
ABI_PopRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
|
|
||||||
RET();
|
|
||||||
}
|
|
||||||
|
|
||||||
SetJumpTarget(skipFailed);
|
SetJumpTarget(skipFailed);
|
||||||
}
|
}
|
||||||
|
@ -504,17 +533,38 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (comp == NULL && i != instrsCount - 1)
|
if (comp == NULL)
|
||||||
LoadCPSR();
|
LoadCPSR();
|
||||||
}
|
}
|
||||||
|
|
||||||
RegCache.Flush();
|
RegCache.Flush();
|
||||||
SaveCPSR();
|
|
||||||
|
|
||||||
MOV(32, R(RAX), Imm32(ConstantCycles));
|
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles));
|
||||||
|
|
||||||
ABI_PopRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
|
if (Config::JIT_BrancheOptimisations == 2
|
||||||
RET();
|
&& !(instrs[instrsCount - 1].BranchFlags & branch_IdleBranch)
|
||||||
|
&& (!instrs[instrsCount - 1].Info.Branches()
|
||||||
|
|| instrs[instrsCount - 1].BranchFlags & branch_FollowCondNotTaken
|
||||||
|
|| (instrs[instrsCount - 1].BranchFlags & branch_FollowCondTaken && instrs[instrsCount - 1].BranchFlags & branch_StaticTarget)))
|
||||||
|
{
|
||||||
|
FixupBranch ret = J_CC(CC_S);
|
||||||
|
CMP(32, MDisp(RCPU, offsetof(ARM, StopExecution)), Imm8(0));
|
||||||
|
FixupBranch ret2 = J_CC(CC_NZ);
|
||||||
|
|
||||||
|
u8* rewritePart = GetWritableCodePtr();
|
||||||
|
NOP(5);
|
||||||
|
|
||||||
|
MOV(32, R(ABI_PARAM2), Imm32(rewritePart - ResetStart));
|
||||||
|
JMP((u8*)BranchStub[Num], true);
|
||||||
|
|
||||||
|
SetJumpTarget(ret);
|
||||||
|
SetJumpTarget(ret2);
|
||||||
|
JMP((u8*)ARM_Ret, true);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
JMP((u8*)ARM_Ret, true);
|
||||||
|
}
|
||||||
|
|
||||||
/*FILE* codeout = fopen("codeout", "a");
|
/*FILE* codeout = fopen("codeout", "a");
|
||||||
fprintf(codeout, "beginning block argargarg__ %x!!!", instrs[0].Addr);
|
fprintf(codeout, "beginning block argargarg__ %x!!!", instrs[0].Addr);
|
||||||
|
@ -525,6 +575,22 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Compiler::LinkBlock(u32 offset, JitBlockEntry entry)
|
||||||
|
{
|
||||||
|
u8* curPtr = GetWritableCodePtr();
|
||||||
|
SetCodePtr(ResetStart + offset);
|
||||||
|
JMP((u8*)entry, true);
|
||||||
|
SetCodePtr(curPtr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Compiler::UnlinkBlock(u32 offset)
|
||||||
|
{
|
||||||
|
u8* curPtr = GetWritableCodePtr();
|
||||||
|
SetCodePtr(ResetStart + offset);
|
||||||
|
NOP(5);
|
||||||
|
SetCodePtr(curPtr);
|
||||||
|
}
|
||||||
|
|
||||||
void Compiler::Comp_AddCycles_C(bool forceNonConstant)
|
void Compiler::Comp_AddCycles_C(bool forceNonConstant)
|
||||||
{
|
{
|
||||||
s32 cycles = Num ?
|
s32 cycles = Num ?
|
||||||
|
@ -532,7 +598,7 @@ void Compiler::Comp_AddCycles_C(bool forceNonConstant)
|
||||||
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles);
|
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles);
|
||||||
|
|
||||||
if ((!Thumb && CurInstr.Cond() < 0xE) || forceNonConstant)
|
if ((!Thumb && CurInstr.Cond() < 0xE) || forceNonConstant)
|
||||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
||||||
else
|
else
|
||||||
ConstantCycles += cycles;
|
ConstantCycles += cycles;
|
||||||
}
|
}
|
||||||
|
@ -544,7 +610,7 @@ void Compiler::Comp_AddCycles_CI(u32 i)
|
||||||
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + i;
|
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + i;
|
||||||
|
|
||||||
if (!Thumb && CurInstr.Cond() < 0xE)
|
if (!Thumb && CurInstr.Cond() < 0xE)
|
||||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
||||||
else
|
else
|
||||||
ConstantCycles += cycles;
|
ConstantCycles += cycles;
|
||||||
}
|
}
|
||||||
|
@ -558,12 +624,12 @@ void Compiler::Comp_AddCycles_CI(Gen::X64Reg i, int add)
|
||||||
if (!Thumb && CurInstr.Cond() < 0xE)
|
if (!Thumb && CurInstr.Cond() < 0xE)
|
||||||
{
|
{
|
||||||
LEA(32, RSCRATCH, MDisp(i, add + cycles));
|
LEA(32, RSCRATCH, MDisp(i, add + cycles));
|
||||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(RSCRATCH));
|
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(RSCRATCH));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ConstantCycles += i + cycles;
|
ConstantCycles += i + cycles;
|
||||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(i));
|
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -599,7 +665,7 @@ void Compiler::Comp_AddCycles_CDI()
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!Thumb && CurInstr.Cond() < 0xE)
|
if (!Thumb && CurInstr.Cond() < 0xE)
|
||||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
||||||
else
|
else
|
||||||
ConstantCycles += cycles;
|
ConstantCycles += cycles;
|
||||||
}
|
}
|
||||||
|
@ -643,7 +709,7 @@ void Compiler::Comp_AddCycles_CD()
|
||||||
}
|
}
|
||||||
|
|
||||||
if (IrregularCycles && !Thumb && CurInstr.Cond() < 0xE)
|
if (IrregularCycles && !Thumb && CurInstr.Cond() < 0xE)
|
||||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
||||||
else
|
else
|
||||||
ConstantCycles += cycles;
|
ConstantCycles += cycles;
|
||||||
}
|
}
|
||||||
|
|
|
@ -51,7 +51,10 @@ public:
|
||||||
|
|
||||||
void Reset();
|
void Reset();
|
||||||
|
|
||||||
JitBlockEntry CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount);
|
void LinkBlock(u32 offset, JitBlockEntry entry);
|
||||||
|
void UnlinkBlock(u32 offset);
|
||||||
|
|
||||||
|
JitBlockEntry CompileBlock(u32 translatedAddr, ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount);
|
||||||
|
|
||||||
void LoadReg(int reg, Gen::X64Reg nativeReg);
|
void LoadReg(int reg, Gen::X64Reg nativeReg);
|
||||||
void SaveReg(int reg, Gen::X64Reg nativeReg);
|
void SaveReg(int reg, Gen::X64Reg nativeReg);
|
||||||
|
@ -145,7 +148,7 @@ public:
|
||||||
|
|
||||||
void Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed);
|
void Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed);
|
||||||
|
|
||||||
void Comp_SpecialBranchBehaviour();
|
void Comp_SpecialBranchBehaviour(bool taken);
|
||||||
|
|
||||||
void* Gen_MemoryRoutine9(bool store, int size);
|
void* Gen_MemoryRoutine9(bool store, int size);
|
||||||
|
|
||||||
|
@ -176,12 +179,24 @@ public:
|
||||||
return Gen::R(RegCache.Mapping[reg]);
|
return Gen::R(RegCache.Mapping[reg]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
JitBlockEntry AddEntryOffset(u32 offset)
|
||||||
|
{
|
||||||
|
return (JitBlockEntry)(ResetStart + offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 SubEntryOffset(JitBlockEntry entry)
|
||||||
|
{
|
||||||
|
return (u8*)entry - ResetStart;
|
||||||
|
}
|
||||||
|
|
||||||
u8* ResetStart;
|
u8* ResetStart;
|
||||||
u32 CodeMemSize;
|
u32 CodeMemSize;
|
||||||
|
|
||||||
bool Exit;
|
bool Exit;
|
||||||
bool IrregularCycles;
|
bool IrregularCycles;
|
||||||
|
|
||||||
|
void* BranchStub[2];
|
||||||
|
|
||||||
void* MemoryFuncs9[3][2];
|
void* MemoryFuncs9[3][2];
|
||||||
void* MemoryFuncs7[3][2];
|
void* MemoryFuncs7[3][2];
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,15 @@
|
||||||
|
#include "../ARM.h"
|
||||||
|
|
||||||
|
int main(int argc, char* argv[])
|
||||||
|
{
|
||||||
|
FILE* f = fopen("ARMJIT_Offsets.h", "w");
|
||||||
|
#define writeOffset(field) \
|
||||||
|
fprintf(f, "#define ARM_" #field "_offset 0x%x\n", offsetof(ARM, field))
|
||||||
|
|
||||||
|
writeOffset(CPSR);
|
||||||
|
writeOffset(Cycles);
|
||||||
|
writeOffset(StopExecution);
|
||||||
|
|
||||||
|
fclose(f);
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -0,0 +1,74 @@
|
||||||
|
.intel_syntax noprefix
|
||||||
|
|
||||||
|
#include "ARMJIT_Offsets.h"
|
||||||
|
|
||||||
|
.text
|
||||||
|
|
||||||
|
#define RCPU rbp
|
||||||
|
#define RCPSR r15d
|
||||||
|
|
||||||
|
#ifdef WIN64
|
||||||
|
#define ARG1_REG ecx
|
||||||
|
#define ARG2_REG edx
|
||||||
|
#define ARG3_REG r8d
|
||||||
|
#define ARG4_REG r9d
|
||||||
|
#define ARG1_REG64 rcx
|
||||||
|
#define ARG2_REG64 rdx
|
||||||
|
#define ARG3_REG64 r8
|
||||||
|
#define ARG4_REG64 r9
|
||||||
|
#else
|
||||||
|
#define ARG1_REG edi
|
||||||
|
#define ARG2_REG esi
|
||||||
|
#define ARG3_REG edx
|
||||||
|
#define ARG4_REG ecx
|
||||||
|
#define ARG1_REG64 rdi
|
||||||
|
#define ARG2_REG64 rsi
|
||||||
|
#define ARG3_REG64 rdx
|
||||||
|
#define ARG4_REG64 rcx
|
||||||
|
#endif
|
||||||
|
|
||||||
|
.p2align 4,,15
|
||||||
|
|
||||||
|
.global ARM_Dispatch
|
||||||
|
ARM_Dispatch:
|
||||||
|
#ifdef WIN64
|
||||||
|
push rdi
|
||||||
|
push rsi
|
||||||
|
#endif
|
||||||
|
push rbx
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
push rbp
|
||||||
|
|
||||||
|
#ifdef WIN64
|
||||||
|
sub rsp, 0x28
|
||||||
|
#endif
|
||||||
|
mov RCPU, ARG1_REG64
|
||||||
|
mov RCPSR, [RCPU + ARM_CPSR_offset]
|
||||||
|
|
||||||
|
jmp ARG2_REG64
|
||||||
|
|
||||||
|
.p2align 4,,15
|
||||||
|
|
||||||
|
.global ARM_Ret
|
||||||
|
ARM_Ret:
|
||||||
|
mov [RCPU + ARM_CPSR_offset], RCPSR
|
||||||
|
|
||||||
|
#ifdef WIN64
|
||||||
|
add rsp, 0x28
|
||||||
|
#endif
|
||||||
|
|
||||||
|
pop rbp
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rbx
|
||||||
|
#ifdef WIN64
|
||||||
|
pop rsi
|
||||||
|
pop rdi
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ret
|
|
@ -0,0 +1,3 @@
|
||||||
|
#define ARM_CPSR_offset 0x64
|
||||||
|
#define ARM_Cycles_offset 0xc
|
||||||
|
#define ARM_StopExecution_offset 0x10
|
|
@ -49,9 +49,12 @@ add_library(core STATIC
|
||||||
WifiAP.cpp
|
WifiAP.cpp
|
||||||
|
|
||||||
tiny-AES-c/aes.c
|
tiny-AES-c/aes.c
|
||||||
|
xxhash/xxhash.c
|
||||||
)
|
)
|
||||||
|
|
||||||
if (ENABLE_JIT)
|
if (ENABLE_JIT)
|
||||||
|
enable_language(ASM)
|
||||||
|
|
||||||
target_sources(core PRIVATE
|
target_sources(core PRIVATE
|
||||||
ARMJIT.cpp
|
ARMJIT.cpp
|
||||||
|
|
||||||
|
@ -68,7 +71,10 @@ if (ENABLE_JIT)
|
||||||
ARMJIT_x64/ARMJIT_ALU.cpp
|
ARMJIT_x64/ARMJIT_ALU.cpp
|
||||||
ARMJIT_x64/ARMJIT_LoadStore.cpp
|
ARMJIT_x64/ARMJIT_LoadStore.cpp
|
||||||
ARMJIT_x64/ARMJIT_Branch.cpp
|
ARMJIT_x64/ARMJIT_Branch.cpp
|
||||||
|
|
||||||
|
ARMJIT_x64/ARMJIT_Linkage.s
|
||||||
)
|
)
|
||||||
|
set_source_files_properties(ARMJIT_x64/ARMJIT_Linkage.s PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp")
|
||||||
endif()
|
endif()
|
||||||
if (ARCHITECTURE STREQUAL ARM64)
|
if (ARCHITECTURE STREQUAL ARM64)
|
||||||
target_sources(core PRIVATE
|
target_sources(core PRIVATE
|
||||||
|
|
|
@ -38,10 +38,10 @@ char DSiFirmwarePath[1024];
|
||||||
char DSiNANDPath[1024];
|
char DSiNANDPath[1024];
|
||||||
|
|
||||||
#ifdef JIT_ENABLED
|
#ifdef JIT_ENABLED
|
||||||
bool JIT_Enable = false;
|
int JIT_Enable = false;
|
||||||
int JIT_MaxBlockSize = 12;
|
int JIT_MaxBlockSize = 12;
|
||||||
bool JIT_BrancheOptimisations = true;
|
int JIT_BrancheOptimisations = 2;
|
||||||
bool JIT_LiteralOptimisations = true;
|
int JIT_LiteralOptimisations = true;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
ConfigEntry ConfigFile[] =
|
ConfigEntry ConfigFile[] =
|
||||||
|
@ -58,7 +58,7 @@ ConfigEntry ConfigFile[] =
|
||||||
#ifdef JIT_ENABLED
|
#ifdef JIT_ENABLED
|
||||||
{"JIT_Enable", 0, &JIT_Enable, 0, NULL, 0},
|
{"JIT_Enable", 0, &JIT_Enable, 0, NULL, 0},
|
||||||
{"JIT_MaxBlockSize", 0, &JIT_MaxBlockSize, 10, NULL, 0},
|
{"JIT_MaxBlockSize", 0, &JIT_MaxBlockSize, 10, NULL, 0},
|
||||||
{"JIT_BrancheOptimisations", 0, &JIT_BrancheOptimisations, 1, NULL, 0},
|
{"JIT_BranchOptimisations", 0, &JIT_BrancheOptimisations, 2, NULL, 0},
|
||||||
{"JIT_LiteralOptimisations", 0, &JIT_LiteralOptimisations, 1, NULL, 0},
|
{"JIT_LiteralOptimisations", 0, &JIT_LiteralOptimisations, 1, NULL, 0},
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -52,10 +52,10 @@ extern char DSiFirmwarePath[1024];
|
||||||
extern char DSiNANDPath[1024];
|
extern char DSiNANDPath[1024];
|
||||||
|
|
||||||
#ifdef JIT_ENABLED
|
#ifdef JIT_ENABLED
|
||||||
extern bool JIT_Enable;
|
extern int JIT_Enable;
|
||||||
extern int JIT_MaxBlockSize;
|
extern int JIT_MaxBlockSize;
|
||||||
extern bool JIT_BrancheOptimisations;
|
extern int JIT_BrancheOptimisations;
|
||||||
extern bool JIT_LiteralOptimisations;
|
extern int JIT_LiteralOptimisations;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,43 @@
|
||||||
|
/*
|
||||||
|
* xxHash - Extremely Fast Hash algorithm
|
||||||
|
* Copyright (C) 2012-2020 Yann Collet
|
||||||
|
*
|
||||||
|
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are
|
||||||
|
* met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above
|
||||||
|
* copyright notice, this list of conditions and the following disclaimer
|
||||||
|
* in the documentation and/or other materials provided with the
|
||||||
|
* distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* You can contact the author at:
|
||||||
|
* - xxHash homepage: https://www.xxhash.com
|
||||||
|
* - xxHash source repository: https://github.com/Cyan4973/xxHash
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* xxhash.c instantiates functions defined in xxhash.h
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
|
||||||
|
#define XXH_IMPLEMENTATION /* access definitions */
|
||||||
|
|
||||||
|
#include "xxhash.h"
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue