JIT: base

all instructions are interpreted
This commit is contained in:
RSDuck 2019-06-22 01:28:32 +02:00
parent d2f05cd30f
commit c5c342c009
28 changed files with 7743 additions and 4 deletions

View File

@ -21,6 +21,7 @@
#include "ARM.h"
#include "ARMInterpreter.h"
#include "AREngine.h"
#include "ARMJIT.h"
// instruction timing notes
@ -481,7 +482,7 @@ void ARMv5::Execute()
while (NDS::ARM9Timestamp < NDS::ARM9Target)
{
if (CPSR & 0x20) // THUMB
/*if (CPSR & 0x20) // THUMB
{
// prefetch
R[15] += 2;
@ -514,7 +515,15 @@ void ARMv5::Execute()
}
else
AddCycles_C();
}
}*/
if (!ARMJIT::IsMapped(Num, R[15] - ((CPSR&0x20)?2:4)))
printf("aaarg ungempappter raum %x\n", R[15]);
ARMJIT::CompiledBlock block = ARMJIT::LookUpBlock(Num, R[15] - ((CPSR&0x20)?2:4));
if (block == NULL)
block = ARMJIT::CompileBlock(this);
Cycles += block();
// TODO optimize this shit!!!
if (Halted)

177
src/ARMJIT.cpp Normal file
View File

@ -0,0 +1,177 @@
#include "ARMJIT.h"
#include "ARMJIT_x64/ARMJIT_Compiler.h"
namespace ARMJIT
{
Compiler* compiler;
BlockCache cache;
#define DUP2(x) x, x
static ptrdiff_t JIT_MEM[2][32] = {
//arm9
{
/* 0X*/ DUP2(offsetof(BlockCache, ARM9_ITCM)),
/* 1X*/ DUP2(offsetof(BlockCache, ARM9_ITCM)), // mirror
/* 2X*/ DUP2(offsetof(BlockCache, MainRAM)),
/* 3X*/ DUP2(offsetof(BlockCache, SWRAM)),
/* 4X*/ DUP2(-1),
/* 5X*/ DUP2(-1),
/* 6X*/ -1,
offsetof(BlockCache, ARM9_LCDC), // Plain ARM9-CPU Access (LCDC mode) (max 656KB)
/* 7X*/ DUP2(-1),
/* 8X*/ DUP2(-1),
/* 9X*/ DUP2(-1),
/* AX*/ DUP2(-1),
/* BX*/ DUP2(-1),
/* CX*/ DUP2(-1),
/* DX*/ DUP2(-1),
/* EX*/ DUP2(-1),
/* FX*/ DUP2(offsetof(BlockCache, ARM9_BIOS))
},
//arm7
{
/* 0X*/ DUP2(offsetof(BlockCache, ARM7_BIOS)),
/* 1X*/ DUP2(-1),
/* 2X*/ DUP2(offsetof(BlockCache, MainRAM)),
/* 3X*/ offsetof(BlockCache, SWRAM),
offsetof(BlockCache, ARM7_WRAM),
/* 4X*/ -1,
offsetof(BlockCache, ARM7_WIRAM),
/* 5X*/ DUP2(-1),
/* 6X*/ DUP2(offsetof(BlockCache, ARM7_WVRAM)), /* contrary to Gbatek, melonDS and itself,
DeSmuME doesn't mirror the 64 MB region at 0x6800000 */
/* 7X*/ DUP2(-1),
/* 8X*/ DUP2(-1),
/* 9X*/ DUP2(-1),
/* AX*/ DUP2(-1),
/* BX*/ DUP2(-1),
/* CX*/ DUP2(-1),
/* DX*/ DUP2(-1),
/* EX*/ DUP2(-1),
/* FX*/ DUP2(-1)
}
};
static u32 JIT_MASK[2][32] = {
//arm9
{
/* 0X*/ DUP2(0x00007FFF),
/* 1X*/ DUP2(0x00007FFF),
/* 2X*/ DUP2(0x003FFFFF),
/* 3X*/ DUP2(0x00007FFF),
/* 4X*/ DUP2(0x00000000),
/* 5X*/ DUP2(0x00000000),
/* 6X*/ 0x00000000,
0x000FFFFF,
/* 7X*/ DUP2(0x00000000),
/* 8X*/ DUP2(0x00000000),
/* 9X*/ DUP2(0x00000000),
/* AX*/ DUP2(0x00000000),
/* BX*/ DUP2(0x00000000),
/* CX*/ DUP2(0x00000000),
/* DX*/ DUP2(0x00000000),
/* EX*/ DUP2(0x00000000),
/* FX*/ DUP2(0x00007FFF)
},
//arm7
{
/* 0X*/ DUP2(0x00003FFF),
/* 1X*/ DUP2(0x00000000),
/* 2X*/ DUP2(0x003FFFFF),
/* 3X*/ 0x00007FFF,
0x0000FFFF,
/* 4X*/ 0x00000000,
0x0000FFFF,
/* 5X*/ DUP2(0x00000000),
/* 6X*/ DUP2(0x0003FFFF),
/* 7X*/ DUP2(0x00000000),
/* 8X*/ DUP2(0x00000000),
/* 9X*/ DUP2(0x00000000),
/* AX*/ DUP2(0x00000000),
/* BX*/ DUP2(0x00000000),
/* CX*/ DUP2(0x00000000),
/* DX*/ DUP2(0x00000000),
/* EX*/ DUP2(0x00000000),
/* FX*/ DUP2(0x00000000)
}
};
#undef DUP2
void Init()
{
memset(&cache, 0, sizeof(BlockCache));
for (int cpu = 0; cpu < 2; cpu++)
for (int i = 0; i < 0x4000; i++)
cache.AddrMapping[cpu][i] = JIT_MEM[cpu][i >> 9] == -1 ? NULL :
(CompiledBlock*)((u8*)&cache + JIT_MEM[cpu][i >> 9])
+ (((i << 14) & JIT_MASK[cpu][i >> 9]) >> 1);
compiler = new Compiler();
}
void DeInit()
{
delete compiler;
}
CompiledBlock CompileBlock(ARM* cpu)
{
bool thumb = cpu->CPSR & 0x20;
FetchedInstr instrs[12];
int i = 0;
u32 r15 = cpu->R[15];
u32 nextInstr[2] = {cpu->NextInstr[0], cpu->NextInstr[1]};
//printf("block %x %d\n", r15, thumb);
do
{
r15 += thumb ? 2 : 4;
instrs[i].Instr = nextInstr[0];
//printf("%x %x\n", instrs[i].Instr, r15);
instrs[i].NextInstr[0] = nextInstr[0] = nextInstr[1];
if (cpu->Num == 0)
{
ARMv5* cpuv5 = (ARMv5*)cpu;
if (thumb && r15 & 0x2)
{
nextInstr[1] >>= 16;
instrs[i].CodeCycles = 0;
}
else
{
nextInstr[1] = cpuv5->CodeRead32(r15, false);
instrs[i].CodeCycles = cpu->CodeCycles;
}
}
else
{
ARMv4* cpuv4 = (ARMv4*)cpu;
if (thumb)
nextInstr[1] = cpuv4->CodeRead16(r15);
else
nextInstr[1] = cpuv4->CodeRead32(r15);
instrs[i].CodeCycles = cpu->CodeCycles;
}
instrs[i].NextInstr[1] = nextInstr[1];
instrs[i].Info = ARMInstrInfo::Decode(thumb, cpu->Num, instrs[i].Instr);
i++;
} while(!instrs[i - 1].Info.Branches() && i < 10);
CompiledBlock block = compiler->CompileBlock(cpu, instrs, i);
InsertBlock(cpu->Num, cpu->R[15] - (thumb ? 2 : 4), block);
return block;
}
}

140
src/ARMJIT.h Normal file
View File

@ -0,0 +1,140 @@
#ifndef ARMJIT_H
#define ARMJIT_H
#include "types.h"
#include <string.h>
#include "ARM.h"
#include "ARM_InstrInfo.h"
namespace ARMJIT
{
typedef u32 (*CompiledBlock)();
class RegCache
{
static const int NativeRegAllocOrder[];
static const int NativeRegsCount;
};
struct FetchedInstr
{
u32 A_Reg(int pos) const
{
return (Instr >> pos) & 0xF;
}
u32 T_Reg(int pos) const
{
return (Instr >> pos) & 0x7;
}
u32 Cond() const
{
return Instr >> 28;
}
u32 Instr;
u32 NextInstr[2];
u8 CodeCycles;
ARMInstrInfo::Info Info;
};
/*
Copied from DeSmuME
Some names where changed to match the nomenclature of melonDS
Since it's nowhere explained and atleast I needed some time to get behind it,
here's a summary on how it works:
more or less all memory locations from which code can be executed are
represented by an array of function pointers, which point to null or
a function which executes a block instructions starting from there.
The most significant 4 bits of each address is ignored. This 28 bit space is
divided into 0x4000 16 KB blocks, each of which a pointer to the relevant
place inside the before mentioned arrays. Only half of the bytes need to be
addressed (ARM address are aligned to 4, Thumb addresses to a 2 byte boundary).
In case a memory write hits mapped memory, the function block at this
address is set to null, so it's recompiled the next time it's executed.
This method has disadvantages, namely that only writing to the
first instruction of a block marks it as invalid and that memory remapping
(SWRAM and VRAM) isn't taken into account.
*/
struct BlockCache
{
CompiledBlock* AddrMapping[2][0x4000] = {0};
CompiledBlock MainRAM[16*1024*1024/2];
CompiledBlock SWRAM[0x8000/2]; // Shared working RAM
CompiledBlock ARM9_ITCM[0x8000/2];
CompiledBlock ARM9_LCDC[0xA4000/2];
CompiledBlock ARM9_BIOS[0x8000/2];
CompiledBlock ARM7_BIOS[0x4000/2];
CompiledBlock ARM7_WRAM[0x10000/2]; // dedicated ARM7 WRAM
CompiledBlock ARM7_WIRAM[0x10000/2]; // Wifi
CompiledBlock ARM7_WVRAM[0x40000/2]; // VRAM allocated as Working RAM
};
extern BlockCache cache;
inline bool IsMapped(u32 num, u32 addr)
{
return cache.AddrMapping[num][(addr & 0xFFFFFFF) >> 14];
}
inline CompiledBlock LookUpBlock(u32 num, u32 addr)
{
return cache.AddrMapping[num][(addr & 0xFFFFFFF) >> 14][(addr & 0x3FFF) >> 1];
}
inline void Invalidate16(u32 num, u32 addr)
{
if (IsMapped(num, addr))
cache.AddrMapping[num][(addr & 0xFFFFFFF) >> 14][(addr & 0x3FFF) >> 1] = NULL;
}
inline void Invalidate32(u32 num, u32 addr)
{
if (IsMapped(num, addr))
{
CompiledBlock* page = cache.AddrMapping[num][(addr & 0xFFFFFFF) >> 14];
page[(addr & 0x3FFF) >> 1] = NULL;
page[((addr + 2) & 0x3FFF) >> 1] = NULL;
}
}
inline void InsertBlock(u32 num, u32 addr, CompiledBlock func)
{
cache.AddrMapping[num][(addr & 0xFFFFFFF) >> 14][(addr & 0x3FFF) >> 1] = func;
}
inline void ResetBlocks()
{
memset(cache.MainRAM, 0, sizeof(cache.MainRAM));
memset(cache.SWRAM, 0, sizeof(cache.SWRAM));
memset(cache.ARM9_BIOS, 0, sizeof(cache.ARM9_BIOS));
memset(cache.ARM9_ITCM, 0, sizeof(cache.ARM9_ITCM));
memset(cache.ARM9_LCDC, 0, sizeof(cache.ARM9_LCDC));
memset(cache.ARM7_BIOS, 0, sizeof(cache.ARM7_BIOS));
memset(cache.ARM7_WIRAM, 0, sizeof(cache.ARM7_WIRAM));
memset(cache.ARM7_WRAM, 0, sizeof(cache.ARM7_WRAM));
memset(cache.ARM7_WVRAM, 0, sizeof(cache.ARM7_WVRAM));
}
void Init();
void DeInit();
CompiledBlock CompileBlock(ARM* cpu);
}
#endif

View File

@ -0,0 +1,332 @@
#include "ARMJIT_Compiler.h"
#include "../ARMInterpreter.h"
#include <assert.h>
using namespace Gen;
namespace ARMJIT
{
const int RegCache::NativeRegAllocOrder[] = {(int)RBX, (int)RSI, (int)RDI, (int)R12, (int)R13};
const int RegCache::NativeRegsCount = 5;
Compiler::Compiler()
{
AllocCodeSpace(1024 * 1024 * 4);
}
typedef void (Compiler::*CompileFunc)();
typedef void (*InterpretFunc)(ARM*);
void Compiler::LoadCPSR()
{
assert(!CPSRDirty);
MOV(32, R(RCPSR), MDisp(RCPU, offsetof(ARM, CPSR)));
}
void Compiler::SaveCPSR()
{
if (CPSRDirty)
{
MOV(32, MDisp(RCPU, offsetof(ARM, CPSR)), R(RCPSR));
CPSRDirty = false;
}
}
CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrsCount)
{
if (IsAlmostFull())
{
ResetBlocks();
ResetCodePtr();
}
CompiledBlock res = (CompiledBlock)GetWritableCodePtr();
ConstantCycles = 0;
Thumb = cpu->CPSR & 0x20;
Num = cpu->Num;
R15 = cpu->R[15];
ABI_PushRegistersAndAdjustStack({ABI_ALL_CALLEE_SAVED}, 8, 0);
MOV(64, R(RCPU), ImmPtr(cpu));
XOR(32, R(RCycles), R(RCycles));
LoadCPSR();
for (int i = 0; i < instrsCount; i++)
{
R15 += Thumb ? 2 : 4;
CurrentInstr = instrs[i];
CompileFunc comp = NULL;
if (comp == NULL || i == instrsCount - 1)
{
MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(R15));
MOV(32, MDisp(RCPU, offsetof(ARM, CodeCycles)), Imm32(CurrentInstr.CodeCycles));
MOV(32, MDisp(RCPU, offsetof(ARM, CurInstr)), Imm32(CurrentInstr.Instr));
if (i == instrsCount - 1)
{
MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[0])), Imm32(CurrentInstr.NextInstr[0]));
MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[1])), Imm32(CurrentInstr.NextInstr[1]));
}
SaveCPSR();
}
if (Thumb)
{
if (comp == NULL)
{
MOV(64, R(ABI_PARAM1), R(RCPU));
u32 icode = (CurrentInstr.Instr >> 6) & 0x3FF;
ABI_CallFunction(ARMInterpreter::THUMBInstrTable[icode]);
}
else
{
}
}
else
{
u32 cond = CurrentInstr.Cond();
if (CurrentInstr.Info.Kind == ARMInstrInfo::ak_BLX_IMM)
{
MOV(64, R(ABI_PARAM1), R(RCPU));
ABI_CallFunction(ARMInterpreter::A_BLX_IMM);
}
else if (cond == 0xF)
AddCycles_C();
else
{
FixupBranch skipExecute;
if (cond < 0xE)
{
if (cond >= 0x8)
{
static_assert(RSCRATCH3 == ECX);
MOV(32, R(RSCRATCH3), R(RCPSR));
SHR(32, R(RSCRATCH3), Imm8(28));
MOV(32, R(RSCRATCH), Imm32(1));
SHL(32, R(RSCRATCH), R(RSCRATCH3));
TEST(32, R(RSCRATCH), Imm32(ARM::ConditionTable[cond]));
skipExecute = J_CC(CC_Z);
}
else
{
// could have used a LUT, but then where would be the fun?
BT(32, R(RCPSR), Imm8(28 + ((~(cond >> 1) & 1) << 1 | (cond >> 2 & 1) ^ (cond >> 1 & 1))));
skipExecute = J_CC(cond & 1 ? CC_C : CC_NC);
}
}
if (comp == NULL)
{
MOV(64, R(ABI_PARAM1), R(RCPU));
u32 icode = ((CurrentInstr.Instr >> 4) & 0xF) | ((CurrentInstr.Instr >> 16) & 0xFF0);
ABI_CallFunction(ARMInterpreter::ARMInstrTable[icode]);
}
else
{
}
FixupBranch skipFailed;
if (CurrentInstr.Cond() < 0xE)
{
skipFailed = J();
SetJumpTarget(skipExecute);
AddCycles_C();
SetJumpTarget(skipFailed);
}
}
}
/*
we don't need to collect the interpreted cycles,
since all functions only add to it, the dispatcher
can take care of it.
*/
if (comp == NULL && i != instrsCount - 1)
LoadCPSR();
}
SaveCPSR();
LEA(32, RAX, MDisp(RCycles, ConstantCycles));
ABI_PopRegistersAndAdjustStack({ABI_ALL_CALLEE_SAVED}, 8, 0);
RET();
return res;
}
void Compiler::Compile(RegCache& regs, const FetchedInstr& instr)
{
const CompileFunc A_Comp[ARMInstrInfo::ak_Count] =
{
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
};
const CompileFunc T_Comp[ARMInstrInfo::tk_Count] = {
NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL
};
}
void Compiler::AddCycles_C()
{
s32 cycles = Num ?
NDS::ARM7MemTimings[CurrentInstr.CodeCycles][Thumb ? 1 : 3]
: ((R15 & 0x2) ? 0 : CurrentInstr.CodeCycles);
if (CurrentInstr.Cond() < 0xE)
ADD(32, R(RCycles), Imm8(cycles));
else
ConstantCycles += cycles;
}
// may uses RSCRATCH for op2 and RSCRATCH2 for the carryValue
OpArg Compiler::Comp_ShiftRegImm(int op, int amount, Gen::X64Reg rm, bool S, bool& carryUsed)
{
carryUsed = true;
switch (op)
{
case 0: // LSL
if (amount > 0)
{
MOV(32, R(RSCRATCH), R(rm));
SHL(32, R(RSCRATCH), Imm8(amount));
if (S)
SETcc(CC_C, R(RSCRATCH2));
return R(RSCRATCH);
}
else
{
carryUsed = false;
return R(rm);
}
case 1: // LSR
if (amount > 0)
{
MOV(32, R(RSCRATCH), R(rm));
SHR(32, R(RSCRATCH), Imm8(amount));
if (S)
SETcc(CC_C, R(RSCRATCH2));
return R(RSCRATCH);
}
else
{
if (S)
{
MOV(32, R(RSCRATCH2), R(rm));
SHR(32, R(RSCRATCH2), Imm8(31));
}
return Imm32(0);
}
case 2: // ASR
MOV(32, R(RSCRATCH), R(rm));
SAR(32, R(RSCRATCH), Imm8(amount ? amount : 31));
if (S)
{
if (amount == 0)
{
MOV(32, R(RSCRATCH2), R(rm));
SHR(32, R(RSCRATCH2), Imm8(31));
}
else
SETcc(CC_C, R(RSCRATCH2));
}
return R(RSCRATCH);
case 3: // ROR
if (amount > 0)
{
MOV(32, R(RSCRATCH), R(rm));
ROR_(32, R(RSCRATCH), Imm8(amount));
}
else
{
BT(32, R(RCPSR), Imm8(29));
MOV(32, R(RSCRATCH), R(rm));
RCR(32, R(RSCRATCH), Imm8(1));
}
if (S)
SETcc(CC_C, R(RSCRATCH2));
return R(RSCRATCH);
}
}
void Compiler::A_Comp_ALU(const FetchedInstr& instr)
{
}
}

View File

@ -0,0 +1,54 @@
#ifndef ARMJIT_COMPILER_H
#define ARMJIT_COMPILER_H
#include "../dolphin/x64Emitter.h"
#include "../ARMJIT.h"
namespace ARMJIT
{
const Gen::X64Reg RCPU = Gen::RBP;
const Gen::X64Reg RCycles = Gen::R14;
const Gen::X64Reg RCPSR = Gen::R15;
const Gen::X64Reg RSCRATCH = Gen::EAX;
const Gen::X64Reg RSCRATCH2 = Gen::EDX;
const Gen::X64Reg RSCRATCH3 = Gen::ECX;
class Compiler : public Gen::X64CodeBlock
{
public:
Compiler();
CompiledBlock CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrsCount);
void StartBlock(ARM* cpu);
CompiledBlock FinaliseBlock();
void Compile(RegCache& regs, const FetchedInstr& instr);
private:
void AddCycles_C();
Gen::OpArg Comp_ShiftRegImm(int op, int amount, Gen::X64Reg rm, bool S, bool& carryUsed);
void A_Comp_ALU(const FetchedInstr& instr);
void LoadCPSR();
void SaveCPSR();
bool CPSRDirty = false;
FetchedInstr CurrentInstr;
bool Thumb;
u32 Num;
u32 R15;
u32 ConstantCycles;
};
}
#endif

376
src/ARM_InstrInfo.cpp Normal file
View File

@ -0,0 +1,376 @@
#include "ARM_InstrInfo.h"
#include <stdio.h>
namespace ARMInstrInfo
{
#define ak(x) ((x) << 13)
enum {
A_Read0 = 1 << 0,
A_Read16 = 1 << 1,
A_Read8 = 1 << 2,
A_Read12 = 1 << 3,
A_Write12 = 1 << 4,
A_Write16 = 1 << 5,
A_MemWriteback = 1 << 6,
A_BranchAlways = 1 << 7,
// for STRD/LDRD
A_Read12Double = 1 << 8,
A_Write12Double = 1 << 9,
A_Link = 1 << 10,
A_LDMSTM = 1 << 11,
A_ARM9Only = 1 << 12,
};
#define A_BIOP A_Read16
#define A_MONOOP 0
#define A_IMPLEMENT_ALU_OP(x,k) \
const u32 A_##x##_IMM = A_Write12 | A_##k | ak(ak_##x##_IMM); \
const u32 A_##x##_REG_LSL_IMM = A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_LSL_IMM); \
const u32 A_##x##_REG_LSR_IMM = A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_LSR_IMM); \
const u32 A_##x##_REG_ASR_IMM = A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_ASR_IMM); \
const u32 A_##x##_REG_ROR_IMM = A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_ROR_IMM); \
const u32 A_##x##_REG_LSL_REG = A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSL_REG); \
const u32 A_##x##_REG_LSR_REG = A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSR_REG); \
const u32 A_##x##_REG_ASR_REG = A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_ASR_REG); \
const u32 A_##x##_REG_ROR_REG = A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_ROR_REG); \
\
const u32 A_##x##_IMM_S = A_Write12 | A_##k | ak(ak_##x##_IMM_S); \
const u32 A_##x##_REG_LSL_IMM_S = A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_LSL_IMM_S); \
const u32 A_##x##_REG_LSR_IMM_S = A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_LSR_IMM_S); \
const u32 A_##x##_REG_ASR_IMM_S = A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_ASR_IMM_S); \
const u32 A_##x##_REG_ROR_IMM_S = A_Write12 | A_##k | A_Read0 | ak(ak_##x##_REG_ROR_IMM_S); \
const u32 A_##x##_REG_LSL_REG_S = A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSL_REG_S); \
const u32 A_##x##_REG_LSR_REG_S = A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSR_REG_S); \
const u32 A_##x##_REG_ASR_REG_S = A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_ASR_REG_S); \
const u32 A_##x##_REG_ROR_REG_S = A_Write12 | A_##k | A_Read0 | A_Read8 | ak(ak_##x##_REG_ROR_REG_S);
A_IMPLEMENT_ALU_OP(AND,BIOP)
A_IMPLEMENT_ALU_OP(EOR,BIOP)
A_IMPLEMENT_ALU_OP(SUB,BIOP)
A_IMPLEMENT_ALU_OP(RSB,BIOP)
A_IMPLEMENT_ALU_OP(ADD,BIOP)
A_IMPLEMENT_ALU_OP(ADC,BIOP)
A_IMPLEMENT_ALU_OP(SBC,BIOP)
A_IMPLEMENT_ALU_OP(RSC,BIOP)
A_IMPLEMENT_ALU_OP(ORR,BIOP)
A_IMPLEMENT_ALU_OP(MOV,MONOOP)
A_IMPLEMENT_ALU_OP(BIC,BIOP)
A_IMPLEMENT_ALU_OP(MVN,MONOOP)
const u32 A_MOV_REG_LSL_IMM_DBG = A_MOV_REG_LSL_IMM;
#define A_IMPLEMENT_ALU_TEST(x) \
const u32 A_##x##_IMM = A_Read16 | A_Read0 | ak(ak_##x##_IMM); \
const u32 A_##x##_REG_LSL_IMM = A_Read16 | A_Read0 | ak(ak_##x##_REG_LSL_IMM); \
const u32 A_##x##_REG_LSR_IMM = A_Read16 | A_Read0 | ak(ak_##x##_REG_LSR_IMM); \
const u32 A_##x##_REG_ASR_IMM = A_Read16 | A_Read0 | ak(ak_##x##_REG_ASR_IMM); \
const u32 A_##x##_REG_ROR_IMM = A_Read16 | A_Read0 | ak(ak_##x##_REG_ROR_IMM); \
const u32 A_##x##_REG_LSL_REG = A_Read16 | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSL_REG); \
const u32 A_##x##_REG_LSR_REG = A_Read16 | A_Read0 | A_Read8 | ak(ak_##x##_REG_LSR_REG); \
const u32 A_##x##_REG_ASR_REG = A_Read16 | A_Read0 | A_Read8 | ak(ak_##x##_REG_ASR_REG); \
const u32 A_##x##_REG_ROR_REG = A_Read16 | A_Read0 | A_Read8 | ak(ak_##x##_REG_ROR_REG);
A_IMPLEMENT_ALU_TEST(TST)
A_IMPLEMENT_ALU_TEST(TEQ)
A_IMPLEMENT_ALU_TEST(CMP)
A_IMPLEMENT_ALU_TEST(CMN)
const u32 A_MUL = A_Write16 | A_Read0 | A_Read8 | ak(ak_MUL);
const u32 A_MLA = A_Write16 | A_Read0 | A_Read8 | A_Read12 | ak(ak_MLA);
const u32 A_UMULL = A_Write16 | A_Write12 | A_Read0 | A_Read8 | ak(ak_UMULL);
const u32 A_UMLAL = A_Write16 | A_Write12 | A_Read16 | A_Read12 | A_Read0 | A_Read8 | ak(ak_UMLAL);
const u32 A_SMULL = A_Write16 | A_Write12 | A_Read0 | A_Read8 | ak(ak_SMULL);
const u32 A_SMLAL = A_Write16 | A_Write12 | A_Read16 | A_Read12 | A_Read0 | A_Read8 | ak(ak_SMLAL);
const u32 A_SMLAxy = A_Write16 | A_Read0 | A_Read8 | A_Read12 | ak(ak_SMLALxy);
const u32 A_SMLAWy = A_Write16 | A_Read0 | A_Read8 | A_Read12 | ak(ak_SMLAWy);
const u32 A_SMULWy = A_Write16 | A_Read0 | A_Read8 | ak(ak_SMULWy);
const u32 A_SMLALxy = A_Write16 | A_Write12 | A_Read16 | A_Read12 | A_Read0 | A_Read8 | ak(ak_SMLALxy);
const u32 A_SMULxy = A_Write16 | A_Read0 | A_Read8 | ak(ak_SMULxy);
const u32 A_CLZ = A_Write12 | A_Read0 | A_ARM9Only | ak(ak_CLZ);
const u32 A_QADD = A_Write12 | A_Read0 | A_Read16 | A_ARM9Only | ak(ak_QADD);
const u32 A_QSUB = A_Write12 | A_Read0 | A_Read16 | A_ARM9Only | ak(ak_QSUB);
const u32 A_QDADD = A_Write12 | A_Read0 | A_Read16 | A_ARM9Only | ak(ak_QDADD);
const u32 A_QDSUB = A_Write12 | A_Read0 | A_Read16 | A_ARM9Only | ak(ak_QDSUB);
#define A_LDR A_Write12
#define A_STR A_Read12
#define A_IMPLEMENT_WB_LDRSTR(x,k) \
const u32 A_##x##_IMM = A_##k | A_Read16 | A_MemWriteback | ak(ak_##x##_IMM); \
const u32 A_##x##_REG_LSL = A_##k | A_Read16 | A_MemWriteback | A_Read0 | ak(ak_##x##_REG_LSL); \
const u32 A_##x##_REG_LSR = A_##k | A_Read16 | A_MemWriteback | A_Read0 | ak(ak_##x##_REG_LSR); \
const u32 A_##x##_REG_ASR = A_##k | A_Read16 | A_MemWriteback | A_Read0 | ak(ak_##x##_REG_ASR); \
const u32 A_##x##_REG_ROR = A_##k | A_Read16 | A_MemWriteback | A_Read0 | ak(ak_##x##_REG_ROR); \
\
const u32 A_##x##_POST_IMM = A_##k | A_Read16 | A_Write16 | ak(ak_##x##_POST_IMM); \
const u32 A_##x##_POST_REG_LSL = A_##k | A_Read16 | A_Write16 | A_Read0 | ak(ak_##x##_POST_REG_LSL); \
const u32 A_##x##_POST_REG_LSR = A_##k | A_Read16 | A_Write16 | A_Read0 | ak(ak_##x##_POST_REG_LSR); \
const u32 A_##x##_POST_REG_ASR = A_##k | A_Read16 | A_Write16 | A_Read0 | ak(ak_##x##_POST_REG_ASR); \
const u32 A_##x##_POST_REG_ROR = A_##k | A_Read16 | A_Write16 | A_Read0 | ak(ak_##x##_POST_REG_ROR);
A_IMPLEMENT_WB_LDRSTR(STR,STR)
A_IMPLEMENT_WB_LDRSTR(STRB,STR)
A_IMPLEMENT_WB_LDRSTR(LDR,LDR)
A_IMPLEMENT_WB_LDRSTR(LDRB,LDR)
#define A_LDRD A_Write12Double
#define A_STRD A_Read12Double
#define A_IMPLEMENT_HD_LDRSTR(x,k) \
const u32 A_##x##_IMM = A_##k | A_Read16 | A_Write16 | ak(ak_##x##_IMM); \
const u32 A_##x##_REG = A_##k | A_Read16 | A_Write16 | A_Read0 | ak(ak_##x##_REG); \
const u32 A_##x##_POST_IMM = A_##k | A_Read16 | A_Write16 | ak(ak_##x##_POST_IMM); \
const u32 A_##x##_POST_REG = A_##k | A_Read16 | A_Write16 | A_Read0 | ak(ak_##x##_POST_REG);
A_IMPLEMENT_HD_LDRSTR(STRH,STR)
A_IMPLEMENT_HD_LDRSTR(LDRD,LDRD)
A_IMPLEMENT_HD_LDRSTR(STRD,STRD)
A_IMPLEMENT_HD_LDRSTR(LDRH,LDR)
A_IMPLEMENT_HD_LDRSTR(LDRSB,LDR)
A_IMPLEMENT_HD_LDRSTR(LDRSH,LDR)
const u32 A_SWP = A_Write12 | A_Read16 | A_Read0 | ak(ak_SWP);
const u32 A_SWPB = A_Write12 | A_Read16 | A_Read0 | ak(ak_SWPB);
const u32 A_LDM = A_Read16 | A_LDMSTM | ak(ak_LDM);
const u32 A_STM = A_Read16 | A_LDMSTM | ak(ak_STM);
const u32 A_B = A_BranchAlways | ak(ak_B);
const u32 A_BL = A_BranchAlways | A_Link | ak(ak_BL);
const u32 A_BLX_IMM = A_BranchAlways | A_Link | ak(ak_BLX_IMM);
const u32 A_BX = A_BranchAlways | A_Read0 | ak(ak_BX);
const u32 A_BLX_REG = A_BranchAlways | A_Link | A_Read0 | ak(ak_BLX_REG);
const u32 A_UNK = A_BranchAlways | A_Link | ak(ak_UNK);
const u32 A_MSR_IMM = A_ARM9Only | ak(ak_MSR_IMM);
const u32 A_MSR_REG = A_Read0 | A_ARM9Only | ak(ak_MSR_REG);
const u32 A_MRS = A_Write12 | A_ARM9Only | ak(ak_MRS);
const u32 A_MCR = A_Read12 | A_ARM9Only | ak(ak_MCR);
const u32 A_MRC = A_Write12 | A_ARM9Only | ak(ak_MRC);
const u32 A_SVC = A_BranchAlways | A_Link | ak(ak_SVC);
// THUMB
#define tk(x) ((x) << 16)
enum {
T_Read0 = 1 << 0,
T_Read3 = 1 << 1,
T_Read6 = 1 << 2,
T_Read8 = 1 << 3,
T_Write0 = 1 << 4,
T_Write8 = 1 << 5,
T_ReadHi0 = 1 << 6,
T_ReadHi3 = 1 << 7,
T_WriteHi0 = 1 << 8,
T_ReadR13 = 1 << 9,
T_WriteR13 = 1 << 10,
T_ReadR15 = 1 << 11,
T_BranchAlways = 1 << 12,
T_ReadR14 = 1 << 13,
T_WriteR14 = 1 << 14,
T_PopPC = 1 << 15
};
const u32 T_LSL_IMM = T_Write0 | T_Read3 | tk(tk_LSL_IMM);
const u32 T_LSR_IMM = T_Write0 | T_Read3 | tk(tk_LSR_IMM);
const u32 T_ASR_IMM = T_Write0 | T_Read3 | tk(tk_ASR_IMM);
const u32 T_ADD_REG_ = T_Write0 | T_Read3 | T_Read6 | tk(tk_ADD_REG_);
const u32 T_SUB_REG_ = T_Write0 | T_Read3 | T_Read6 | tk(tk_SUB_REG_);
const u32 T_ADD_IMM_ = T_Write0 | T_Read3 | tk(tk_ADD_IMM_);
const u32 T_SUB_IMM_ = T_Write0 | T_Read3 | tk(tk_SUB_IMM_);
const u32 T_MOV_IMM = T_Write8 | tk(tk_MOV_IMM);
const u32 T_CMP_IMM = T_Write8 | tk(tk_CMP_IMM);
const u32 T_ADD_IMM = T_Write8 | T_Read8 | tk(tk_ADD_IMM);
const u32 T_SUB_IMM = T_Write8 | T_Read8 | tk(tk_SUB_IMM);
const u32 T_AND_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_AND_REG);
const u32 T_EOR_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_EOR_REG);
const u32 T_LSL_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_LSL_REG);
const u32 T_LSR_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_LSR_REG);
const u32 T_ASR_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_ASR_REG);
const u32 T_ADC_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_ADC_REG);
const u32 T_SBC_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_SBC_REG);
const u32 T_ROR_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_ROR_REG);
const u32 T_TST_REG = T_Read0 | T_Read3 | tk(tk_TST_REG);
const u32 T_NEG_REG = T_Write0 | T_Read3 | tk(tk_NEG_REG);
const u32 T_CMP_REG = T_Read0 | T_Read3 | tk(tk_CMP_REG);
const u32 T_CMN_REG = T_Read0 | T_Read3 | tk(tk_CMN_REG);
const u32 T_ORR_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_ORR_REG);
const u32 T_MUL_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_MUL_REG);
const u32 T_BIC_REG = T_Write0 | T_Read0 | T_Read3 | tk(tk_BIC_REG);
const u32 T_MVN_REG = T_Write0 | T_Read3 | tk(tk_MVN_REG);
const u32 T_ADD_HIREG = T_WriteHi0 | T_ReadHi0 | T_ReadHi3 | tk(tk_ADD_HIREG);
const u32 T_CMP_HIREG = T_ReadHi0 | T_ReadHi3 | tk(tk_CMP_HIREG);
const u32 T_MOV_HIREG = T_WriteHi0 | T_ReadHi3 | tk(tk_MOV_HIREG);
const u32 T_ADD_PCREL = T_Write8 | T_ReadR15 | tk(tk_ADD_PCREL);
const u32 T_ADD_SPREL = T_Write8 | T_ReadR13 | tk(tk_ADD_SPREL);
const u32 T_ADD_SP = T_WriteR13 | tk(tk_ADD_SP);
const u32 T_LDR_PCREL = T_Write8 | tk(tk_LDR_PCREL);
const u32 T_STR_REG = T_Read0 | T_Read3 | T_Read6 | tk(tk_STR_REG);
const u32 T_STRB_REG = T_Read0 | T_Read3 | T_Read6 | tk(tk_STRB_REG);
const u32 T_LDR_REG = T_Write0 | T_Read3 | T_Read6 | tk(tk_LDR_REG);
const u32 T_LDRB_REG = T_Write0 | T_Read3 | T_Read6 | tk(tk_LDRB_REG);
const u32 T_STRH_REG = T_Read0 | T_Read3 | T_Read6 | tk(tk_STRH_REG);
const u32 T_LDRSB_REG = T_Write0 | T_Read3 | T_Read6 | tk(tk_LDRSB_REG);
const u32 T_LDRH_REG = T_Write0 | T_Read3 | T_Read6 | tk(tk_LDRH_REG);
const u32 T_LDRSH_REG = T_Write0 | T_Read3 | T_Read6 | tk(tk_LDRSH_REG);
const u32 T_STR_IMM = T_Read0 | T_Read3 | tk(tk_STR_IMM);
const u32 T_LDR_IMM = T_Write0 | T_Read3 | tk(tk_LDR_IMM);
const u32 T_STRB_IMM = T_Read0 | T_Read3 | tk(tk_STRB_IMM);
const u32 T_LDRB_IMM = T_Write0 | T_Read3 | tk(tk_LDRB_IMM);
const u32 T_STRH_IMM = T_Read0 | T_Read3 | tk(tk_STRH_IMM);
const u32 T_LDRH_IMM = T_Write0 | T_Read3 | tk(tk_LDRH_IMM);
const u32 T_STR_SPREL = T_Read8 | T_ReadR13 | tk(tk_STR_SPREL);
const u32 T_LDR_SPREL = T_Write8 | T_ReadR13 | tk(tk_LDR_SPREL);
const u32 T_PUSH = T_ReadR15 | T_ReadR13 | T_WriteR13 | tk(tk_PUSH);
const u32 T_POP = T_PopPC | T_ReadR13 | T_WriteR13 | tk(tk_POP);
const u32 T_LDMIA = T_Read8 | T_Write8 | tk(tk_LDMIA);
const u32 T_STMIA = T_Read8 | T_Write8 | tk(tk_STMIA);
const u32 T_BCOND = T_BranchAlways | tk(tk_BCOND);
const u32 T_BX = T_BranchAlways | T_ReadHi3 | tk(tk_BX);
const u32 T_BLX_REG = T_BranchAlways | T_ReadR15 | T_WriteR14 | T_ReadHi3 | tk(tk_BLX_REG);
const u32 T_B = T_BranchAlways | tk(tk_B);
const u32 T_BL_LONG_1 = T_WriteR14 | T_ReadR15 | tk(tk_BL_LONG_1);
const u32 T_BL_LONG_2 = T_BranchAlways | T_ReadR14 | T_WriteR14 | T_ReadR15 | tk(tk_BL_LONG_2);
const u32 T_UNK = T_BranchAlways | T_WriteR14 | tk(tk_UNK);
const u32 T_SVC = T_BranchAlways | T_WriteR14 | T_ReadR15 | tk(tk_SVC);
#define INSTRFUNC_PROTO(x) u32 x
#include "ARM_InstrTable.h"
#undef INSTRFUNC_PROTO
Info Decode(bool thumb, u32 num, u32 instr)
{
Info res = {0};
if (thumb)
{
u32 data = THUMBInstrTable[(instr >> 6) & 0x3FF];
if (data & T_Read0)
res.SrcRegs |= 1 << (instr & 0x7);
if (data & T_Read3)
res.SrcRegs |= 1 << ((instr >> 3) & 0x7);
if (data & T_Read6)
res.SrcRegs |= 1 << ((instr >> 6) & 0x7);
if (data & T_Read8)
res.SrcRegs |= 1 << ((instr >> 8) & 0x7);
if (data & T_Write0)
res.DstRegs |= 1 << (instr & 0x7);
if (data & T_Write8)
res.DstRegs |= 1 << ((instr >> 8) & 0x7);
if (data & T_ReadHi0)
res.SrcRegs |= 1 << ((instr & 0x7) | ((instr >> 4) & 0x8));
if (data & T_ReadHi3)
res.SrcRegs |= 1 << ((instr >> 3) & 0xF);
if (data & T_WriteHi0)
res.DstRegs |= 1 << ((instr & 0x7) | ((instr >> 4) & 0x8));
if (data & T_ReadR13)
res.SrcRegs |= (1 << 13);
if (data & T_WriteR13)
res.DstRegs |= (1 << 13);
if (data & T_ReadR15)
res.SrcRegs |= (1 << 15);
if (data & T_BranchAlways)
res.DstRegs |= (1 << 15);
if (data & T_PopPC && instr & (1 << 8))
res.DstRegs |= 1 << 15;
res.Kind = (data >> 16) & 0x3F;
return res;
}
else
{
u32 data = ARMInstrTable[((instr >> 4) & 0xF) | ((instr >> 16) & 0xFF0)];
if ((instr & 0xFE000000) == 0xFA000000)
data = A_BLX_IMM;
if (data & A_ARM9Only && num != 0)
data |= A_BranchAlways | A_Link;
if (data & A_Read0)
res.SrcRegs |= 1 << (instr & 0xF);
if (data & A_Read16)
res.SrcRegs |= 1 << ((instr >> 16) & 0xF);
if (data & A_Read8)
res.SrcRegs |= 1 << ((instr >> 8) & 0xF);
if (data & A_Read12)
res.SrcRegs |= 1 << ((instr >> 12) & 0xF);
if (data & A_Write12)
res.DstRegs |= 1 << ((instr >> 12) & 0xF);
if (data & A_Write16)
res.DstRegs |= 1 << ((instr >> 16) & 0xF);
if (data & A_MemWriteback && instr & (1 << 21))
res.DstRegs |= 1 << ((instr >> 16) & 0xF);
if (data & A_BranchAlways)
res.DstRegs |= 1 << 15;
if (data & A_Read12Double)
{
res.SrcRegs |= 1 << ((instr >> 12) & 0xF);
res.SrcRegs |= 1 << (((instr >> 12) & 0xF) + 1);
}
if (data & A_Write12Double)
{
res.DstRegs |= 1 << ((instr >> 12) & 0xF);
res.DstRegs |= 1 << (((instr >> 12) & 0xF) + 1);
}
if (data & A_Link)
{
res.DstRegs |= 1 << 14;
res.SrcRegs |= 1 << 15;
}
if (data & A_LDMSTM)
{
res.DstRegs |= instr & (!!(instr & (1 << 20)) << 15);
if (instr & (1 << 21))
res.DstRegs |= 1 << ((instr >> 16) & 0xF);
}
res.Kind = (data >> 13) & 0x1FF;
return res;
}
}
}

232
src/ARM_InstrInfo.h Normal file
View File

@ -0,0 +1,232 @@
#ifndef ARMINSTRINFO_H
#define ARMINSTRINFO_H
#include "types.h"
namespace ARMInstrInfo
{
// Instruction kinds, for faster dispatch
#define ak_ALU(n) \
ak_##n##_REG_LSL_IMM, \
ak_##n##_REG_LSR_IMM, \
ak_##n##_REG_ASR_IMM, \
ak_##n##_REG_ROR_IMM, \
\
ak_##n##_REG_LSL_REG, \
ak_##n##_REG_LSR_REG, \
ak_##n##_REG_ASR_REG, \
ak_##n##_REG_ROR_REG, \
\
ak_##n##_IMM, \
\
ak_##n##_REG_LSL_IMM_S, \
ak_##n##_REG_LSR_IMM_S, \
ak_##n##_REG_ASR_IMM_S, \
ak_##n##_REG_ROR_IMM_S, \
\
ak_##n##_REG_LSL_REG_S, \
ak_##n##_REG_LSR_REG_S, \
ak_##n##_REG_ASR_REG_S, \
ak_##n##_REG_ROR_REG_S, \
\
ak_##n##_IMM_S \
#define ak_Test(n) \
ak_##n##_REG_LSL_IMM, \
ak_##n##_REG_LSR_IMM, \
ak_##n##_REG_ASR_IMM, \
ak_##n##_REG_ROR_IMM, \
\
ak_##n##_REG_LSL_REG, \
ak_##n##_REG_LSR_REG, \
ak_##n##_REG_ASR_REG, \
ak_##n##_REG_ROR_REG, \
\
ak_##n##_IMM
#define ak_WB_LDRSTR(n) \
ak_##n##_REG_LSL, \
ak_##n##_REG_LSR, \
ak_##n##_REG_ASR, \
ak_##n##_REG_ROR, \
\
ak_##n##_IMM, \
\
ak_##n##_POST_REG_LSL, \
ak_##n##_POST_REG_LSR, \
ak_##n##_POST_REG_ASR, \
ak_##n##_POST_REG_ROR, \
\
ak_##n##_POST_IMM
#define ak_HD_LDRSTR(n) \
ak_##n##_REG, \
ak_##n##_IMM, \
\
ak_##n##_POST_REG, \
ak_##n##_POST_IMM
enum
{
ak_ALU(AND),
ak_ALU(EOR),
ak_ALU(SUB),
ak_ALU(RSB),
ak_ALU(ADD),
ak_ALU(ADC),
ak_ALU(SBC),
ak_ALU(RSC),
ak_ALU(ORR),
ak_ALU(MOV),
ak_ALU(BIC),
ak_ALU(MVN),
ak_ALU(TST),
ak_ALU(TEQ),
ak_ALU(CMP),
ak_ALU(CMN),
ak_MUL,
ak_MLA,
ak_UMULL,
ak_UMLAL,
ak_SMULL,
ak_SMLAL,
ak_SMLAxy,
ak_SMLAWy,
ak_SMULWy,
ak_SMLALxy,
ak_SMULxy,
ak_CLZ,
ak_QADD,
ak_QSUB,
ak_QDADD,
ak_QDSUB,
ak_WB_LDRSTR(STR),
ak_WB_LDRSTR(STRB),
ak_WB_LDRSTR(LDR),
ak_WB_LDRSTR(LDRB),
ak_HD_LDRSTR(STRH),
ak_HD_LDRSTR(LDRD),
ak_HD_LDRSTR(STRD),
ak_HD_LDRSTR(LDRH),
ak_HD_LDRSTR(LDRSB),
ak_HD_LDRSTR(LDRSH),
ak_SWP,
ak_SWPB,
ak_LDM,
ak_STM,
ak_B,
ak_BL,
ak_BLX_IMM,
ak_BX,
ak_BLX_REG,
ak_UNK,
ak_MSR_IMM,
ak_MSR_REG,
ak_MRS,
ak_MCR,
ak_MRC,
ak_SVC,
ak_Count,
tk_LSL_IMM = 0,
tk_LSR_IMM,
tk_ASR_IMM,
tk_ADD_REG_,
tk_SUB_REG_,
tk_ADD_IMM_,
tk_SUB_IMM_,
tk_MOV_IMM,
tk_CMP_IMM,
tk_ADD_IMM,
tk_SUB_IMM,
tk_AND_REG,
tk_EOR_REG,
tk_LSL_REG,
tk_LSR_REG,
tk_ASR_REG,
tk_ADC_REG,
tk_SBC_REG,
tk_ROR_REG,
tk_TST_REG,
tk_NEG_REG,
tk_CMP_REG,
tk_CMN_REG,
tk_ORR_REG,
tk_MUL_REG,
tk_BIC_REG,
tk_MVN_REG,
tk_ADD_HIREG,
tk_CMP_HIREG,
tk_MOV_HIREG,
tk_ADD_PCREL,
tk_ADD_SPREL,
tk_ADD_SP,
tk_LDR_PCREL,
tk_STR_REG,
tk_STRB_REG,
tk_LDR_REG,
tk_LDRB_REG,
tk_STRH_REG,
tk_LDRSB_REG,
tk_LDRH_REG,
tk_LDRSH_REG,
tk_STR_IMM,
tk_LDR_IMM,
tk_STRB_IMM,
tk_LDRB_IMM,
tk_STRH_IMM,
tk_LDRH_IMM,
tk_STR_SPREL,
tk_LDR_SPREL,
tk_PUSH,
tk_POP,
tk_LDMIA,
tk_STMIA,
tk_BCOND,
tk_BX,
tk_BLX_REG,
tk_B,
tk_BL_LONG_1,
tk_BL_LONG_2,
tk_UNK,
tk_SVC,
tk_Count
};
struct Info
{
u16 DstRegs, SrcRegs;
u16 Kind;
bool Branches()
{
return DstRegs & (1 << 15);
}
};
Info Decode(bool thumb, u32 num, u32 instr);
}
#endif

View File

@ -1,5 +1,7 @@
project(core)
set (CMAKE_CXX_STANDARD 14)
add_library(core STATIC
ARCodeList.cpp
AREngine.cpp
@ -8,6 +10,7 @@ add_library(core STATIC
ARMInterpreter_ALU.cpp
ARMInterpreter_Branch.cpp
ARMInterpreter_LoadStore.cpp
ARM_InstrInfo.cpp
Config.cpp
CP15.cpp
CRC32.cpp
@ -27,6 +30,15 @@ add_library(core STATIC
SPU.cpp
Wifi.cpp
WifiAP.cpp
ARMJIT.cpp
ARMJIT_x64/ARMJIT_Compiler.cpp
dolphin/CommonFuncs.cpp
dolphin/x64ABI.cpp
dolphin/x64CPUDetect.cpp
dolphin/x64Emitter.cpp
dolphin/MemoryUtil.cpp
)
if (WIN32)

View File

@ -20,6 +20,7 @@
#include <string.h>
#include "NDS.h"
#include "ARM.h"
#include "ARMJIT.h"
// access timing for cached regions
@ -811,6 +812,7 @@ void ARMv5::DataWrite8(u32 addr, u8 val)
{
DataCycles = 1;
*(u8*)&ITCM[addr & 0x7FFF] = val;
ARMJIT::cache.ARM9_ITCM[(addr & 0x7FFF) >> 1] = NULL;
return;
}
if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
@ -832,6 +834,7 @@ void ARMv5::DataWrite16(u32 addr, u16 val)
{
DataCycles = 1;
*(u16*)&ITCM[addr & 0x7FFF] = val;
ARMJIT::cache.ARM9_ITCM[(addr & 0x7FFF) >> 1] = NULL;
return;
}
if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
@ -853,6 +856,8 @@ void ARMv5::DataWrite32(u32 addr, u32 val)
{
DataCycles = 1;
*(u32*)&ITCM[addr & 0x7FFF] = val;
ARMJIT::cache.ARM9_ITCM[(addr & 0x7FFF) >> 1] = NULL;
ARMJIT::cache.ARM9_ITCM[((addr + 2) & 0x7FFF) >> 1] = NULL;
return;
}
if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
@ -874,6 +879,8 @@ void ARMv5::DataWrite32S(u32 addr, u32 val)
{
DataCycles += 1;
*(u32*)&ITCM[addr & 0x7FFF] = val;
ARMJIT::cache.ARM9_ITCM[(addr & 0x7FFF) / 2] = NULL;
ARMJIT::cache.ARM9_ITCM[(addr & 0x7FFF) / 2 + 1] = NULL;
return;
}
if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))

View File

@ -32,6 +32,7 @@
#include "Wifi.h"
#include "AREngine.h"
#include "Platform.h"
#include "ARMJIT.h"
namespace NDS
@ -161,6 +162,8 @@ bool Init()
ARM9 = new ARMv5();
ARM7 = new ARMv4();
ARMJIT::Init();
DMAs[0] = new DMA(0, 0);
DMAs[1] = new DMA(0, 1);
DMAs[2] = new DMA(0, 2);
@ -191,6 +194,8 @@ void DeInit()
delete ARM9;
delete ARM7;
ARMJIT::DeInit();
for (int i = 0; i < 8; i++)
delete DMAs[i];
@ -1822,6 +1827,8 @@ u32 ARM9Read32(u32 addr)
void ARM9Write8(u32 addr, u8 val)
{
ARMJIT::Invalidate16(0, addr);
switch (addr & 0xFF000000)
{
case 0x02000000:
@ -1872,6 +1879,8 @@ void ARM9Write8(u32 addr, u8 val)
void ARM9Write16(u32 addr, u16 val)
{
ARMJIT::Invalidate16(0, addr);
switch (addr & 0xFF000000)
{
case 0x02000000:
@ -1938,6 +1947,8 @@ void ARM9Write16(u32 addr, u16 val)
void ARM9Write32(u32 addr, u32 val)
{
ARMJIT::Invalidate32(0, addr);
switch (addr & 0xFF000000)
{
case 0x02000000:
@ -2231,6 +2242,8 @@ u32 ARM7Read32(u32 addr)
void ARM7Write8(u32 addr, u8 val)
{
ARMJIT::Invalidate16(1, addr);
switch (addr & 0xFF800000)
{
case 0x02000000:
@ -2290,6 +2303,8 @@ void ARM7Write8(u32 addr, u8 val)
void ARM7Write16(u32 addr, u16 val)
{
ARMJIT::Invalidate16(1, addr);
switch (addr & 0xFF800000)
{
case 0x02000000:
@ -2359,6 +2374,8 @@ void ARM7Write16(u32 addr, u16 val)
void ARM7Write32(u32 addr, u32 val)
{
ARMJIT::Invalidate32(1, addr);
switch (addr & 0xFF800000)
{
case 0x02000000:

47
src/dolphin/Assert.h Normal file
View File

@ -0,0 +1,47 @@
// Copyright 2015 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license_dolphin.txt file included.
#pragma once
#include <assert.h>
#define ASSERT_MSG(_t_, _a_, _fmt_, ...) \
assert(_a_) \
/*do \
{ \
if (!(_a_)) \
{ \
if (!PanicYesNo(_fmt_, ##__VA_ARGS__)) \
Crash(); \
} \
} while (0)*/
#define DEBUG_ASSERT_MSG(_t_, _a_, _msg_, ...) \
assert(_a_); \
/*do \
{ \
if (MAX_LOGLEVEL >= LogTypes::LOG_LEVELS::LDEBUG && !(_a_)) \
{ \
ERROR_LOG(_t_, _msg_, ##__VA_ARGS__); \
if (!PanicYesNo(_msg_, ##__VA_ARGS__)) \
Crash(); \
} \
} while (0)*/
#define ASSERT(_a_) \
assert(_a_) \
/*do \
{ \
ASSERT_MSG(MASTER_LOG, _a_, \
_trans("An error occurred.\n\n Line: %d\n File: %s\n\nIgnore and continue?"), \
__LINE__, __FILE__); \
} while (0)*/
#define DEBUG_ASSERT(_a_) \
assert(_a_) \
/*do \
{ \
if (MAX_LOGLEVEL >= LogTypes::LOG_LEVELS::LDEBUG) \
ASSERT(_a_); \
} while (0)*/

218
src/dolphin/BitSet.h Normal file
View File

@ -0,0 +1,218 @@
// This file is under the public domain.
#pragma once
#include <cstddef>
#include <initializer_list>
#include <type_traits>
#include "../types.h"
#ifdef _WIN32
#include <intrin.h>
namespace Common
{
template <typename T>
constexpr int CountSetBits(T v)
{
// from https://graphics.stanford.edu/~seander/bithacks.html
// GCC has this built in, but MSVC's intrinsic will only emit the actual
// POPCNT instruction, which we're not depending on
v = v - ((v >> 1) & (T) ~(T)0 / 3);
v = (v & (T) ~(T)0 / 15 * 3) + ((v >> 2) & (T) ~(T)0 / 15 * 3);
v = (v + (v >> 4)) & (T) ~(T)0 / 255 * 15;
return (T)(v * ((T) ~(T)0 / 255)) >> (sizeof(T) - 1) * 8;
}
inline int LeastSignificantSetBit(u8 val)
{
unsigned long index;
_BitScanForward(&index, val);
return (int)index;
}
inline int LeastSignificantSetBit(u16 val)
{
unsigned long index;
_BitScanForward(&index, val);
return (int)index;
}
inline int LeastSignificantSetBit(u32 val)
{
unsigned long index;
_BitScanForward(&index, val);
return (int)index;
}
inline int LeastSignificantSetBit(u64 val)
{
unsigned long index;
_BitScanForward64(&index, val);
return (int)index;
}
#else
namespace Common
{
constexpr int CountSetBits(u8 val)
{
return __builtin_popcount(val);
}
constexpr int CountSetBits(u16 val)
{
return __builtin_popcount(val);
}
constexpr int CountSetBits(u32 val)
{
return __builtin_popcount(val);
}
constexpr int CountSetBits(u64 val)
{
return __builtin_popcountll(val);
}
inline int LeastSignificantSetBit(u8 val)
{
return __builtin_ctz(val);
}
inline int LeastSignificantSetBit(u16 val)
{
return __builtin_ctz(val);
}
inline int LeastSignificantSetBit(u32 val)
{
return __builtin_ctz(val);
}
inline int LeastSignificantSetBit(u64 val)
{
return __builtin_ctzll(val);
}
#endif
// Similar to std::bitset, this is a class which encapsulates a bitset, i.e.
// using the set bits of an integer to represent a set of integers. Like that
// class, it acts like an array of bools:
// BitSet32 bs;
// bs[1] = true;
// but also like the underlying integer ([0] = least significant bit):
// BitSet32 bs2 = ...;
// bs = (bs ^ bs2) & BitSet32(0xffff);
// The following additional functionality is provided:
// - Construction using an initializer list.
// BitSet bs { 1, 2, 4, 8 };
// - Efficiently iterating through the set bits:
// for (int i : bs)
// [i is the *index* of a set bit]
// (This uses the appropriate CPU instruction to find the next set bit in one
// operation.)
// - Counting set bits using .Count() - see comment on that method.
// TODO: use constexpr when MSVC gets out of the Dark Ages
template <typename IntTy>
class BitSet
{
static_assert(!std::is_signed<IntTy>::value, "BitSet should not be used with signed types");
public:
// A reference to a particular bit, returned from operator[].
class Ref
{
public:
constexpr Ref(Ref&& other) : m_bs(other.m_bs), m_mask(other.m_mask) {}
constexpr Ref(BitSet* bs, IntTy mask) : m_bs(bs), m_mask(mask) {}
constexpr operator bool() const { return (m_bs->m_val & m_mask) != 0; }
bool operator=(bool set)
{
m_bs->m_val = (m_bs->m_val & ~m_mask) | (set ? m_mask : 0);
return set;
}
private:
BitSet* m_bs;
IntTy m_mask;
};
// A STL-like iterator is required to be able to use range-based for loops.
class Iterator
{
public:
constexpr Iterator(const Iterator& other) : m_val(other.m_val), m_bit(other.m_bit) {}
constexpr Iterator(IntTy val, int bit) : m_val(val), m_bit(bit) {}
Iterator& operator=(Iterator other)
{
new (this) Iterator(other);
return *this;
}
Iterator& operator++()
{
if (m_val == 0)
{
m_bit = -1;
}
else
{
int bit = LeastSignificantSetBit(m_val);
m_val &= ~(1 << bit);
m_bit = bit;
}
return *this;
}
Iterator operator++(int)
{
Iterator other(*this);
++*this;
return other;
}
constexpr int operator*() const { return m_bit; }
constexpr bool operator==(Iterator other) const { return m_bit == other.m_bit; }
constexpr bool operator!=(Iterator other) const { return m_bit != other.m_bit; }
private:
IntTy m_val;
int m_bit;
};
constexpr BitSet() : m_val(0) {}
constexpr explicit BitSet(IntTy val) : m_val(val) {}
BitSet(std::initializer_list<int> init)
{
m_val = 0;
for (int bit : init)
m_val |= (IntTy)1 << bit;
}
constexpr static BitSet AllTrue(size_t count)
{
return BitSet(count == sizeof(IntTy) * 8 ? ~(IntTy)0 : (((IntTy)1 << count) - 1));
}
Ref operator[](size_t bit) { return Ref(this, (IntTy)1 << bit); }
constexpr const Ref operator[](size_t bit) const { return (*const_cast<BitSet*>(this))[bit]; }
constexpr bool operator==(BitSet other) const { return m_val == other.m_val; }
constexpr bool operator!=(BitSet other) const { return m_val != other.m_val; }
constexpr bool operator<(BitSet other) const { return m_val < other.m_val; }
constexpr bool operator>(BitSet other) const { return m_val > other.m_val; }
constexpr BitSet operator|(BitSet other) const { return BitSet(m_val | other.m_val); }
constexpr BitSet operator&(BitSet other) const { return BitSet(m_val & other.m_val); }
constexpr BitSet operator^(BitSet other) const { return BitSet(m_val ^ other.m_val); }
constexpr BitSet operator~() const { return BitSet(~m_val); }
constexpr BitSet operator<<(IntTy shift) const { return BitSet(m_val << shift); }
constexpr BitSet operator>>(IntTy shift) const { return BitSet(m_val >> shift); }
constexpr explicit operator bool() const { return m_val != 0; }
BitSet& operator|=(BitSet other) { return *this = *this | other; }
BitSet& operator&=(BitSet other) { return *this = *this & other; }
BitSet& operator^=(BitSet other) { return *this = *this ^ other; }
BitSet& operator<<=(IntTy shift) { return *this = *this << shift; }
BitSet& operator>>=(IntTy shift) { return *this = *this >> shift; }
// Warning: Even though on modern CPUs this is a single fast instruction,
// Dolphin's official builds do not currently assume POPCNT support on x86,
// so slower explicit bit twiddling is generated. Still should generally
// be faster than a loop.
constexpr unsigned int Count() const { return CountSetBits(m_val); }
constexpr Iterator begin() const { return ++Iterator(m_val, 0); }
constexpr Iterator end() const { return Iterator(m_val, -1); }
IntTy m_val;
};
} // namespace Common
using BitSet8 = Common::BitSet<u8>;
using BitSet16 = Common::BitSet<u16>;
using BitSet32 = Common::BitSet<u32>;
using BitSet64 = Common::BitSet<u64>;

76
src/dolphin/CPUDetect.h Normal file
View File

@ -0,0 +1,76 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license_dolphin.txt file included.
// Detect the CPU, so we'll know which optimizations to use
#pragma once
#include <string>
enum class CPUVendor
{
Intel,
AMD,
ARM,
Other,
};
struct CPUInfo
{
CPUVendor vendor = CPUVendor::Intel;
char cpu_string[0x41] = {};
char brand_string[0x21] = {};
bool OS64bit = false;
bool CPU64bit = false;
bool Mode64bit = false;
bool HTT = false;
int num_cores = 0;
int logical_cpu_count = 0;
bool bSSE = false;
bool bSSE2 = false;
bool bSSE3 = false;
bool bSSSE3 = false;
bool bPOPCNT = false;
bool bSSE4_1 = false;
bool bSSE4_2 = false;
bool bLZCNT = false;
bool bSSE4A = false;
bool bAVX = false;
bool bAVX2 = false;
bool bBMI1 = false;
bool bBMI2 = false;
bool bFMA = false;
bool bFMA4 = false;
bool bAES = false;
// FXSAVE/FXRSTOR
bool bFXSR = false;
bool bMOVBE = false;
// This flag indicates that the hardware supports some mode
// in which denormal inputs _and_ outputs are automatically set to (signed) zero.
bool bFlushToZero = false;
bool bLAHFSAHF64 = false;
bool bLongMode = false;
bool bAtom = false;
// ARMv8 specific
bool bFP = false;
bool bASIMD = false;
bool bCRC32 = false;
bool bSHA1 = false;
bool bSHA2 = false;
// Call Detect()
explicit CPUInfo();
// Turn the CPU info into a string we can show
std::string Summarize();
private:
// Detects the various CPU features
void Detect();
};
extern CPUInfo cpu_info;

121
src/dolphin/CodeBlock.h Normal file
View File

@ -0,0 +1,121 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license_dolphin.txt file included.
#pragma once
#include <cstddef>
#include <vector>
#include "Assert.h"
#include "../types.h"
#include "MemoryUtil.h"
namespace Common
{
// Everything that needs to generate code should inherit from this.
// You get memory management for free, plus, you can use all emitter functions without
// having to prefix them with gen-> or something similar.
// Example implementation:
// class JIT : public CodeBlock<ARMXEmitter> {}
template <class T>
class CodeBlock : public T
{
private:
// A privately used function to set the executable RAM space to something invalid.
// For debugging usefulness it should be used to set the RAM to a host specific breakpoint
// instruction
virtual void PoisonMemory() = 0;
protected:
u8* region = nullptr;
// Size of region we can use.
size_t region_size = 0;
// Original size of the region we allocated.
size_t total_region_size = 0;
bool m_is_child = false;
std::vector<CodeBlock*> m_children;
public:
CodeBlock() = default;
virtual ~CodeBlock()
{
if (region)
FreeCodeSpace();
}
CodeBlock(const CodeBlock&) = delete;
CodeBlock& operator=(const CodeBlock&) = delete;
CodeBlock(CodeBlock&&) = delete;
CodeBlock& operator=(CodeBlock&&) = delete;
// Call this before you generate any code.
void AllocCodeSpace(size_t size)
{
region_size = size;
total_region_size = size;
region = static_cast<u8*>(Common::AllocateExecutableMemory(total_region_size));
T::SetCodePtr(region);
}
// Always clear code space with breakpoints, so that if someone accidentally executes
// uninitialized, it just breaks into the debugger.
void ClearCodeSpace()
{
PoisonMemory();
ResetCodePtr();
}
// Call this when shutting down. Don't rely on the destructor, even though it'll do the job.
void FreeCodeSpace()
{
ASSERT(!m_is_child);
Common::FreeMemoryPages(region, total_region_size);
region = nullptr;
region_size = 0;
total_region_size = 0;
for (CodeBlock* child : m_children)
{
child->region = nullptr;
child->region_size = 0;
child->total_region_size = 0;
}
}
bool IsInSpace(const u8* ptr) const { return ptr >= region && ptr < (region + region_size); }
// Cannot currently be undone. Will write protect the entire code region.
// Start over if you need to change the code (call FreeCodeSpace(), AllocCodeSpace()).
void WriteProtect() { Common::WriteProtectMemory(region, region_size, true); }
void ResetCodePtr() { T::SetCodePtr(region); }
size_t GetSpaceLeft() const
{
ASSERT(static_cast<size_t>(T::GetCodePtr() - region) < region_size);
return region_size - (T::GetCodePtr() - region);
}
bool IsAlmostFull() const
{
// This should be bigger than the biggest block ever.
return GetSpaceLeft() < 0x10000;
}
bool HasChildren() const { return region_size != total_region_size; }
u8* AllocChildCodeSpace(size_t child_size)
{
ASSERT_MSG(DYNA_REC, child_size < GetSpaceLeft(), "Insufficient space for child allocation.");
u8* child_region = region + region_size - child_size;
region_size -= child_size;
return child_region;
}
void AddChildCodeSpace(CodeBlock* child, size_t child_size)
{
u8* child_region = AllocChildCodeSpace(child_size);
child->m_is_child = true;
child->region = child_region;
child->region_size = child_size;
child->total_region_size = child_size;
child->ResetCodePtr();
m_children.emplace_back(child);
}
};
} // namespace Common

View File

@ -0,0 +1,52 @@
// Copyright 2009 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license_dolphin.txt file included.
#include <cstddef>
#include <cstring>
#include <errno.h>
#include <type_traits>
#include "CommonFuncs.h"
#ifdef _WIN32
#include <windows.h>
#define strerror_r(err, buf, len) strerror_s(buf, len, err)
#endif
constexpr size_t BUFFER_SIZE = 256;
// Wrapper function to get last strerror(errno) string.
// This function might change the error code.
std::string LastStrerrorString()
{
char error_message[BUFFER_SIZE];
// There are two variants of strerror_r. The XSI version stores the message to the passed-in
// buffer and returns an int (0 on success). The GNU version returns a pointer to the message,
// which might have been stored in the passed-in buffer or might be a static string.
// We check defines in order to figure out variant is in use, and we store the returned value
// to a variable so that we'll get a compile-time check that our assumption was correct.
#if defined(__GLIBC__) && (_GNU_SOURCE || (_POSIX_C_SOURCE < 200112L && _XOPEN_SOURCE < 600))
const char* str = strerror_r(errno, error_message, BUFFER_SIZE);
return std::string(str);
#else
int error_code = strerror_r(errno, error_message, BUFFER_SIZE);
return error_code == 0 ? std::string(error_message) : "";
#endif
}
#ifdef _WIN32
// Wrapper function to get GetLastError() string.
// This function might change the error code.
std::string GetLastErrorString()
{
char error_message[BUFFER_SIZE];
FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, nullptr, GetLastError(),
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), error_message, BUFFER_SIZE, nullptr);
return std::string(error_message);
}
#endif

58
src/dolphin/CommonFuncs.h Normal file
View File

@ -0,0 +1,58 @@
// Copyright 2009 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license_dolphin.txt file included.
#pragma once
#include <cstddef>
#include <string>
#include "../types.h"
// Will fail to compile on a non-array:
template <typename T, size_t N>
constexpr size_t ArraySize(T (&arr)[N])
{
return N;
}
#ifndef _WIN32
// go to debugger mode
#define Crash() \
{ \
__builtin_trap(); \
}
#else // WIN32
// Function Cross-Compatibility
#define strcasecmp _stricmp
#define strncasecmp _strnicmp
#define unlink _unlink
#define vscprintf _vscprintf
// 64 bit offsets for Windows
#define fseeko _fseeki64
#define ftello _ftelli64
#define atoll _atoi64
#define stat _stat64
#define fstat _fstat64
#define fileno _fileno
extern "C" {
__declspec(dllimport) void __stdcall DebugBreak(void);
}
#define Crash() \
{ \
DebugBreak(); \
}
#endif // WIN32 ndef
// Wrapper function to get last strerror(errno) string.
// This function might change the error code.
std::string LastStrerrorString();
#ifdef _WIN32
// Wrapper function to get GetLastError() string.
// This function might change the error code.
std::string GetLastErrorString();
#endif

72
src/dolphin/Intrinsics.h Normal file
View File

@ -0,0 +1,72 @@
// Copyright 2015 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license_dolphin.txt file included.
#pragma once
#if defined(_M_X86)
/**
* It is assumed that all compilers used to build Dolphin support intrinsics up to and including
* SSE 4.2 on x86/x64.
*/
#if defined(__GNUC__) || defined(__clang__)
/**
* Due to limitations in GCC, SSE intrinsics are only available when compiling with the
* corresponding instruction set enabled. However, using the target attribute, we can compile
* single functions with a different target instruction set, while still creating a generic build.
*
* Since this instruction set is enabled per-function, any callers should verify that the
* instruction set is supported at runtime before calling it, and provide a fallback implementation
* when not supported.
*
* When building with -march=native, or enabling the instruction sets in the compile flags, permit
* usage of the instrinsics without any function attributes. If the command-line architecture does
* not support this instruction set, enable it via function targeting.
*/
#include <x86intrin.h>
#ifndef __SSE4_2__
#define FUNCTION_TARGET_SSE42 [[gnu::target("sse4.2")]]
#endif
#ifndef __SSE4_1__
#define FUNCTION_TARGET_SSR41 [[gnu::target("sse4.1")]]
#endif
#ifndef __SSSE3__
#define FUNCTION_TARGET_SSSE3 [[gnu::target("ssse3")]]
#endif
#ifndef __SSE3__
#define FUNCTION_TARGET_SSE3 [[gnu::target("sse3")]]
#endif
#elif defined(_MSC_VER) || defined(__INTEL_COMPILER)
/**
* MSVC and ICC support intrinsics for any instruction set without any function attributes.
*/
#include <intrin.h>
#endif // defined(_MSC_VER) || defined(__INTEL_COMPILER)
#endif // _M_X86
/**
* Define the FUNCTION_TARGET macros to nothing if they are not needed, or not on an X86 platform.
* This way when a function is defined with FUNCTION_TARGET you don't need to define a second
* version without the macro around a #ifdef guard. Be careful when using intrinsics, as all use
* should still be placed around a #ifdef _M_X86 if the file is compiled on all architectures.
*/
#ifndef FUNCTION_TARGET_SSE42
#define FUNCTION_TARGET_SSE42
#endif
#ifndef FUNCTION_TARGET_SSR41
#define FUNCTION_TARGET_SSR41
#endif
#ifndef FUNCTION_TARGET_SSSE3
#define FUNCTION_TARGET_SSSE3
#endif
#ifndef FUNCTION_TARGET_SSE3
#define FUNCTION_TARGET_SSE3
#endif

20
src/dolphin/Log.h Normal file
View File

@ -0,0 +1,20 @@
#pragma once
#include "CommonFuncs.h"
#include <stdio.h>
#define PanicAlert(msg) \
do \
{ \
printf("%s\n", msg); \
Crash(); \
} while (false)
#define DYNA_REC 0
#define ERROR_LOG(which, fmt, ...) \
do \
{ \
printf(fmt "\n", ## __VA_ARGS__); \
} while (false)

193
src/dolphin/MemoryUtil.cpp Normal file
View File

@ -0,0 +1,193 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license_dolphin.txt file included.
#include <cstddef>
#include <cstdlib>
#include <string>
#define PanicAlert(fmt, ...) \
do \
{ \
printf(fmt "\n", ## __VA_ARGS__); \
abort(); \
} while (false)
#include "../types.h"
#include "CommonFuncs.h"
#ifdef _WIN32
#include <windows.h>
//#include "Common/StringUtil.h"
#else
#include <stdio.h>
#include <sys/mman.h>
#include <sys/types.h>
#if defined __APPLE__ || defined __FreeBSD__ || defined __OpenBSD__
#include <sys/sysctl.h>
#elif defined __HAIKU__
#include <OS.h>
#else
#include <sys/sysinfo.h>
#endif
#endif
namespace Common
{
// This is purposely not a full wrapper for virtualalloc/mmap, but it
// provides exactly the primitive operations that Dolphin needs.
void* AllocateExecutableMemory(size_t size)
{
printf("c\n");
#if defined(_WIN32)
void* ptr = VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
#else
void* ptr =
mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0);
if (ptr == MAP_FAILED)
ptr = nullptr;
#endif
printf("a\n");
if (ptr == nullptr)
PanicAlert("Failed to allocate executable memory");
printf("b\n");
return ptr;
}
void* AllocateMemoryPages(size_t size)
{
#ifdef _WIN32
void* ptr = VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_READWRITE);
#else
void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
if (ptr == MAP_FAILED)
ptr = nullptr;
#endif
if (ptr == nullptr)
PanicAlert("Failed to allocate raw memory");
return ptr;
}
void* AllocateAlignedMemory(size_t size, size_t alignment)
{
#ifdef _WIN32
void* ptr = _aligned_malloc(size, alignment);
#else
void* ptr = nullptr;
if (posix_memalign(&ptr, alignment, size) != 0)
ERROR_LOG(MEMMAP, "Failed to allocate aligned memory");
#endif
if (ptr == nullptr)
PanicAlert("Failed to allocate aligned memory");
return ptr;
}
void FreeMemoryPages(void* ptr, size_t size)
{
if (ptr)
{
#ifdef _WIN32
if (!VirtualFree(ptr, 0, MEM_RELEASE))
PanicAlert("FreeMemoryPages failed!\nVirtualFree: %s", GetLastErrorString().c_str());
#else
if (munmap(ptr, size) != 0)
PanicAlert("FreeMemoryPages failed!\nmunmap: %s", LastStrerrorString().c_str());
#endif
}
}
void FreeAlignedMemory(void* ptr)
{
if (ptr)
{
#ifdef _WIN32
_aligned_free(ptr);
#else
free(ptr);
#endif
}
}
void ReadProtectMemory(void* ptr, size_t size)
{
#ifdef _WIN32
DWORD oldValue;
if (!VirtualProtect(ptr, size, PAGE_NOACCESS, &oldValue))
PanicAlert("ReadProtectMemory failed!\nVirtualProtect: %s", GetLastErrorString().c_str());
#else
if (mprotect(ptr, size, PROT_NONE) != 0)
PanicAlert("ReadProtectMemory failed!\nmprotect: %s", LastStrerrorString().c_str());
#endif
}
void WriteProtectMemory(void* ptr, size_t size, bool allowExecute)
{
#ifdef _WIN32
DWORD oldValue;
if (!VirtualProtect(ptr, size, allowExecute ? PAGE_EXECUTE_READ : PAGE_READONLY, &oldValue))
PanicAlert("WriteProtectMemory failed!\nVirtualProtect: %s", GetLastErrorString().c_str());
#else
if (mprotect(ptr, size, allowExecute ? (PROT_READ | PROT_EXEC) : PROT_READ) != 0)
PanicAlert("WriteProtectMemory failed!\nmprotect: %s", LastStrerrorString().c_str());
#endif
}
void UnWriteProtectMemory(void* ptr, size_t size, bool allowExecute)
{
#ifdef _WIN32
DWORD oldValue;
if (!VirtualProtect(ptr, size, allowExecute ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE, &oldValue))
PanicAlert("UnWriteProtectMemory failed!\nVirtualProtect: %s", GetLastErrorString().c_str());
#else
if (mprotect(ptr, size,
allowExecute ? (PROT_READ | PROT_WRITE | PROT_EXEC) : PROT_WRITE | PROT_READ) != 0)
{
PanicAlert("UnWriteProtectMemory failed!\nmprotect: %s", LastStrerrorString().c_str());
}
#endif
}
size_t MemPhysical()
{
#ifdef _WIN32
MEMORYSTATUSEX memInfo;
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
GlobalMemoryStatusEx(&memInfo);
return memInfo.ullTotalPhys;
#elif defined __APPLE__ || defined __FreeBSD__ || defined __OpenBSD__
int mib[2];
size_t physical_memory;
mib[0] = CTL_HW;
#ifdef __APPLE__
mib[1] = HW_MEMSIZE;
#elif defined __FreeBSD__
mib[1] = HW_REALMEM;
#elif defined __OpenBSD__
mib[1] = HW_PHYSMEM;
#endif
size_t length = sizeof(size_t);
sysctl(mib, 2, &physical_memory, &length, NULL, 0);
return physical_memory;
#elif defined __HAIKU__
system_info sysinfo;
get_system_info(&sysinfo);
return static_cast<size_t>(sysinfo.max_pages * B_PAGE_SIZE);
#else
struct sysinfo memInfo;
sysinfo(&memInfo);
return (size_t)memInfo.totalram * memInfo.mem_unit;
#endif
}
} // namespace Common

22
src/dolphin/MemoryUtil.h Normal file
View File

@ -0,0 +1,22 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license_dolphin.txt file included.
#pragma once
#include <cstddef>
#include <string>
namespace Common
{
void* AllocateExecutableMemory(size_t size);
void* AllocateMemoryPages(size_t size);
void FreeMemoryPages(void* ptr, size_t size);
void* AllocateAlignedMemory(size_t size, size_t alignment);
void FreeAlignedMemory(void* ptr);
void ReadProtectMemory(void* ptr, size_t size);
void WriteProtectMemory(void* ptr, size_t size, bool executable = false);
void UnWriteProtectMemory(void* ptr, size_t size, bool allowExecute = false);
size_t MemPhysical();
} // namespace Common

View File

@ -0,0 +1,339 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Lesser General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License.

119
src/dolphin/x64ABI.cpp Normal file
View File

@ -0,0 +1,119 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license_dolphin.txt file included.
#include "../types.h"
#include "x64ABI.h"
#include "x64Emitter.h"
using namespace Gen;
// Shared code between Win64 and Unix64
void XEmitter::ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size,
size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp)
{
size_t shadow = 0;
#if defined(_WIN32)
shadow = 0x20;
#endif
int count = (mask & ABI_ALL_GPRS).Count();
rsp_alignment -= count * 8;
size_t subtraction = 0;
int fpr_count = (mask & ABI_ALL_FPRS).Count();
if (fpr_count)
{
// If we have any XMMs to save, we must align the stack here.
subtraction = rsp_alignment & 0xf;
}
subtraction += 16 * fpr_count;
size_t xmm_base_subtraction = subtraction;
subtraction += needed_frame_size;
subtraction += shadow;
// Final alignment.
rsp_alignment -= subtraction;
subtraction += rsp_alignment & 0xf;
*shadowp = shadow;
*subtractionp = subtraction;
*xmm_offsetp = subtraction - xmm_base_subtraction;
}
size_t XEmitter::ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment,
size_t needed_frame_size)
{
size_t shadow, subtraction, xmm_offset;
ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction,
&xmm_offset);
for (int r : mask& ABI_ALL_GPRS)
PUSH((X64Reg)r);
if (subtraction)
SUB(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction));
for (int x : mask& ABI_ALL_FPRS)
{
MOVAPD(MDisp(RSP, (int)xmm_offset), (X64Reg)(x - 16));
xmm_offset += 16;
}
return shadow;
}
void XEmitter::ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment,
size_t needed_frame_size)
{
size_t shadow, subtraction, xmm_offset;
ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction,
&xmm_offset);
for (int x : mask& ABI_ALL_FPRS)
{
MOVAPD((X64Reg)(x - 16), MDisp(RSP, (int)xmm_offset));
xmm_offset += 16;
}
if (subtraction)
ADD(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction));
for (int r = 15; r >= 0; r--)
{
if (mask[r])
POP((X64Reg)r);
}
}
void XEmitter::MOVTwo(int bits, Gen::X64Reg dst1, Gen::X64Reg src1, s32 offset1, Gen::X64Reg dst2,
Gen::X64Reg src2)
{
if (dst1 == src2 && dst2 == src1)
{
XCHG(bits, R(src1), R(src2));
if (offset1)
ADD(bits, R(dst1), Imm32(offset1));
}
else if (src2 != dst1)
{
if (dst1 != src1 && offset1)
LEA(bits, dst1, MDisp(src1, offset1));
else if (dst1 != src1)
MOV(bits, R(dst1), R(src1));
else if (offset1)
ADD(bits, R(dst1), Imm32(offset1));
if (dst2 != src2)
MOV(bits, R(dst2), R(src2));
}
else
{
if (dst2 != src2)
MOV(bits, R(dst2), R(src2));
if (dst1 != src1 && offset1)
LEA(bits, dst1, MDisp(src1, offset1));
else if (dst1 != src1)
MOV(bits, R(dst1), R(src1));
else if (offset1)
ADD(bits, R(dst1), Imm32(offset1));
}
}

57
src/dolphin/x64ABI.h Normal file
View File

@ -0,0 +1,57 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license_dolphin.txt file included.
#pragma once
#include "BitSet.h"
#include "x64Reg.h"
// x64 ABI:s, and helpers to help follow them when JIT-ing code.
// All convensions return values in EAX (+ possibly EDX).
// Windows 64-bit
// * 4-reg "fastcall" variant, very new-skool stack handling
// * Callee moves stack pointer, to make room for shadow regs for the biggest function _it itself
// calls_
// * Parameters passed in RCX, RDX, ... further parameters are MOVed into the allocated stack space.
// Scratch: RAX RCX RDX R8 R9 R10 R11
// Callee-save: RBX RSI RDI RBP R12 R13 R14 R15
// Parameters: RCX RDX R8 R9, further MOV-ed
// Linux 64-bit
// * 6-reg "fastcall" variant, old skool stack handling (parameters are pushed)
// Scratch: RAX RCX RDX RSI RDI R8 R9 R10 R11
// Callee-save: RBX RBP R12 R13 R14 R15
// Parameters: RDI RSI RDX RCX R8 R9
#define ABI_ALL_FPRS BitSet32(0xffff0000)
#define ABI_ALL_GPRS BitSet32(0x0000ffff)
#ifdef _WIN32 // 64-bit Windows - the really exotic calling convention
#define ABI_PARAM1 RCX
#define ABI_PARAM2 RDX
#define ABI_PARAM3 R8
#define ABI_PARAM4 R9
// xmm0-xmm15 use the upper 16 bits in the functions that push/pop registers.
#define ABI_ALL_CALLER_SAVED \
(BitSet32{RAX, RCX, RDX, R8, R9, R10, R11})
#else // 64-bit Unix / OS X
#define ABI_PARAM1 RDI
#define ABI_PARAM2 RSI
#define ABI_PARAM3 RDX
#define ABI_PARAM4 RCX
#define ABI_PARAM5 R8
#define ABI_PARAM6 R9
// FIXME: avoid pushing all 16 XMM registers when possible? most functions we call probably
// don't actually clobber them.
#define ABI_ALL_CALLER_SAVED (BitSet32{RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11} | ABI_ALL_FPRS)
#endif // WIN32
#define ABI_ALL_CALLEE_SAVED (~ABI_ALL_CALLER_SAVED)
#define ABI_RETURN RAX

View File

@ -0,0 +1,274 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license_dolphin.txt file included.
#include <cstring>
#include <string>
#include "CPUDetect.h"
#include "../types.h"
#include "Intrinsics.h"
#ifndef _MSVC_VER
#ifdef __FreeBSD__
#include <unistd.h>
#include <machine/cpufunc.h>
#include <sys/types.h>
#endif
static inline void __cpuidex(int info[4], int function_id, int subfunction_id)
{
#ifdef __FreeBSD__
// Despite the name, this is just do_cpuid() with ECX as second input.
cpuid_count((u_int)function_id, (u_int)subfunction_id, (u_int*)info);
#else
info[0] = function_id; // eax
info[2] = subfunction_id; // ecx
__asm__("cpuid"
: "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3])
: "a"(function_id), "c"(subfunction_id));
#endif
}
static inline void __cpuid(int info[4], int function_id)
{
return __cpuidex(info, function_id, 0);
}
#endif // ifndef _WIN32
#ifdef _MSVC_VER
static u64 xgetbv(u32 index)
{
return _xgetbv(index);
}
constexpr u32 XCR_XFEATURE_ENABLED_MASK = _XCR_XFEATURE_ENABLED_MASK;
#else
static u64 xgetbv(u32 index)
{
u32 eax, edx;
__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
return ((u64)edx << 32) | eax;
}
constexpr u32 XCR_XFEATURE_ENABLED_MASK = 0;
#endif // ifdef _WIN32
CPUInfo cpu_info;
CPUInfo::CPUInfo()
{
Detect();
}
// Detects the various CPU features
void CPUInfo::Detect()
{
#ifdef _M_X86_64
Mode64bit = true;
OS64bit = true;
#endif
num_cores = 1;
// Set obvious defaults, for extra safety
if (Mode64bit)
{
bSSE = true;
bSSE2 = true;
bLongMode = true;
}
// Assume CPU supports the CPUID instruction. Those that don't can barely
// boot modern OS:es anyway.
int cpu_id[4];
// Detect CPU's CPUID capabilities, and grab CPU string
__cpuid(cpu_id, 0x00000000);
u32 max_std_fn = cpu_id[0]; // EAX
std::memcpy(&brand_string[0], &cpu_id[1], sizeof(int));
std::memcpy(&brand_string[4], &cpu_id[3], sizeof(int));
std::memcpy(&brand_string[8], &cpu_id[2], sizeof(int));
__cpuid(cpu_id, 0x80000000);
u32 max_ex_fn = cpu_id[0];
if (!strcmp(brand_string, "GenuineIntel"))
vendor = CPUVendor::Intel;
else if (!strcmp(brand_string, "AuthenticAMD"))
vendor = CPUVendor::AMD;
else
vendor = CPUVendor::Other;
// Set reasonable default brand string even if brand string not available.
strcpy(cpu_string, brand_string);
// Detect family and other misc stuff.
bool ht = false;
HTT = ht;
logical_cpu_count = 1;
if (max_std_fn >= 1)
{
__cpuid(cpu_id, 0x00000001);
int family = ((cpu_id[0] >> 8) & 0xf) + ((cpu_id[0] >> 20) & 0xff);
int model = ((cpu_id[0] >> 4) & 0xf) + ((cpu_id[0] >> 12) & 0xf0);
// Detect people unfortunate enough to be running Dolphin on an Atom
if (family == 6 &&
(model == 0x1C || model == 0x26 || model == 0x27 || model == 0x35 || model == 0x36 ||
model == 0x37 || model == 0x4A || model == 0x4D || model == 0x5A || model == 0x5D))
bAtom = true;
logical_cpu_count = (cpu_id[1] >> 16) & 0xFF;
ht = (cpu_id[3] >> 28) & 1;
if ((cpu_id[3] >> 25) & 1)
bSSE = true;
if ((cpu_id[3] >> 26) & 1)
bSSE2 = true;
if ((cpu_id[2]) & 1)
bSSE3 = true;
if ((cpu_id[2] >> 9) & 1)
bSSSE3 = true;
if ((cpu_id[2] >> 19) & 1)
bSSE4_1 = true;
if ((cpu_id[2] >> 20) & 1)
bSSE4_2 = true;
if ((cpu_id[2] >> 22) & 1)
bMOVBE = true;
if ((cpu_id[2] >> 25) & 1)
bAES = true;
if ((cpu_id[3] >> 24) & 1)
{
// We can use FXSAVE.
bFXSR = true;
}
// AVX support requires 3 separate checks:
// - Is the AVX bit set in CPUID?
// - Is the XSAVE bit set in CPUID?
// - XGETBV result has the XCR bit set.
if (((cpu_id[2] >> 28) & 1) && ((cpu_id[2] >> 27) & 1))
{
if ((xgetbv(XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6)
{
bAVX = true;
if ((cpu_id[2] >> 12) & 1)
bFMA = true;
}
}
if (max_std_fn >= 7)
{
__cpuidex(cpu_id, 0x00000007, 0x00000000);
// careful; we can't enable AVX2 unless the XSAVE/XGETBV checks above passed
if ((cpu_id[1] >> 5) & 1)
bAVX2 = bAVX;
if ((cpu_id[1] >> 3) & 1)
bBMI1 = true;
if ((cpu_id[1] >> 8) & 1)
bBMI2 = true;
}
}
bFlushToZero = bSSE;
if (max_ex_fn >= 0x80000004)
{
// Extract CPU model string
__cpuid(cpu_id, 0x80000002);
memcpy(cpu_string, cpu_id, sizeof(cpu_id));
__cpuid(cpu_id, 0x80000003);
memcpy(cpu_string + 16, cpu_id, sizeof(cpu_id));
__cpuid(cpu_id, 0x80000004);
memcpy(cpu_string + 32, cpu_id, sizeof(cpu_id));
}
if (max_ex_fn >= 0x80000001)
{
// Check for more features.
__cpuid(cpu_id, 0x80000001);
if (cpu_id[2] & 1)
bLAHFSAHF64 = true;
if ((cpu_id[2] >> 5) & 1)
bLZCNT = true;
if ((cpu_id[2] >> 16) & 1)
bFMA4 = true;
if ((cpu_id[3] >> 29) & 1)
bLongMode = true;
}
num_cores = (logical_cpu_count == 0) ? 1 : logical_cpu_count;
if (max_ex_fn >= 0x80000008)
{
// Get number of cores. This is a bit complicated. Following AMD manual here.
__cpuid(cpu_id, 0x80000008);
int apic_id_core_id_size = (cpu_id[2] >> 12) & 0xF;
if (apic_id_core_id_size == 0)
{
if (ht)
{
// New mechanism for modern Intel CPUs.
if (vendor == CPUVendor::Intel)
{
__cpuidex(cpu_id, 0x00000004, 0x00000000);
int cores_x_package = ((cpu_id[0] >> 26) & 0x3F) + 1;
HTT = (cores_x_package < logical_cpu_count);
cores_x_package = ((logical_cpu_count % cores_x_package) == 0) ? cores_x_package : 1;
num_cores = (cores_x_package > 1) ? cores_x_package : num_cores;
logical_cpu_count /= cores_x_package;
}
}
}
else
{
// Use AMD's new method.
num_cores = (cpu_id[2] & 0xFF) + 1;
}
}
}
// Turn the CPU info into a string we can show
std::string CPUInfo::Summarize()
{
std::string sum(cpu_string);
sum += " (";
sum += brand_string;
sum += ")";
if (bSSE)
sum += ", SSE";
if (bSSE2)
{
sum += ", SSE2";
if (!bFlushToZero)
sum += " (but not DAZ!)";
}
if (bSSE3)
sum += ", SSE3";
if (bSSSE3)
sum += ", SSSE3";
if (bSSE4_1)
sum += ", SSE4.1";
if (bSSE4_2)
sum += ", SSE4.2";
if (HTT)
sum += ", HTT";
if (bAVX)
sum += ", AVX";
if (bAVX2)
sum += ", AVX2";
if (bBMI1)
sum += ", BMI1";
if (bBMI2)
sum += ", BMI2";
if (bFMA)
sum += ", FMA";
if (bAES)
sum += ", AES";
if (bMOVBE)
sum += ", MOVBE";
if (bLongMode)
sum += ", 64-bit support";
return sum;
}

3398
src/dolphin/x64Emitter.cpp Normal file

File diff suppressed because it is too large Load Diff

1180
src/dolphin/x64Emitter.h Normal file

File diff suppressed because it is too large Load Diff

96
src/dolphin/x64Reg.h Normal file
View File

@ -0,0 +1,96 @@
// Copyright 2016 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license_dolphin.txt file included.
#pragma once
namespace Gen
{
enum X64Reg
{
EAX = 0,
EBX = 3,
ECX = 1,
EDX = 2,
ESI = 6,
EDI = 7,
EBP = 5,
ESP = 4,
RAX = 0,
RBX = 3,
RCX = 1,
RDX = 2,
RSI = 6,
RDI = 7,
RBP = 5,
RSP = 4,
R8 = 8,
R9 = 9,
R10 = 10,
R11 = 11,
R12 = 12,
R13 = 13,
R14 = 14,
R15 = 15,
AL = 0,
BL = 3,
CL = 1,
DL = 2,
SIL = 6,
DIL = 7,
BPL = 5,
SPL = 4,
AH = 0x104,
BH = 0x107,
CH = 0x105,
DH = 0x106,
AX = 0,
BX = 3,
CX = 1,
DX = 2,
SI = 6,
DI = 7,
BP = 5,
SP = 4,
XMM0 = 0,
XMM1,
XMM2,
XMM3,
XMM4,
XMM5,
XMM6,
XMM7,
XMM8,
XMM9,
XMM10,
XMM11,
XMM12,
XMM13,
XMM14,
XMM15,
YMM0 = 0,
YMM1,
YMM2,
YMM3,
YMM4,
YMM5,
YMM6,
YMM7,
YMM8,
YMM9,
YMM10,
YMM11,
YMM12,
YMM13,
YMM14,
YMM15,
INVALID_REG = 0xFFFFFFFF
};
} // namespace Gen

View File

@ -38,6 +38,8 @@ uiWindow* win;
uiCheckbox* cbDirectBoot;
uiCheckbox* cbJITEnabled;
uiEntry* enJITMaxBlockSize;
int OnCloseWindow(uiWindow* window, void* blarg)
{
@ -61,6 +63,14 @@ void OnOk(uiButton* btn, void* blarg)
opened = false;
}
void OnJITStateChanged(uiCheckbox* cb, void* blarg)
{
if (uiCheckboxChecked(cb))
uiControlEnable(uiControl(enJITMaxBlockSize));
else
uiControlDisable(uiControl(enJITMaxBlockSize));
}
void Open()
{
if (opened)
@ -70,7 +80,7 @@ void Open()
}
opened = true;
win = uiNewWindow("Emu settings - melonDS", 300, 200, 0, 0, 0);
win = uiNewWindow("Emu settings - melonDS", 300, 170, 0, 0, 0);
uiWindowSetMargined(win, 1);
uiWindowOnClosing(win, OnCloseWindow, NULL);
@ -79,12 +89,41 @@ void Open()
{
uiBox* in_ctrl = uiNewVerticalBox();
uiBoxAppend(top, uiControl(in_ctrl), 1);
uiBoxAppend(top, uiControl(in_ctrl), 0);
cbDirectBoot = uiNewCheckbox("Boot game directly");
uiBoxAppend(in_ctrl, uiControl(cbDirectBoot), 0);
}
{
uiLabel* dummy = uiNewLabel("");
uiBoxAppend(top, uiControl(dummy), 0);
}
{
uiGroup* grp = uiNewGroup("JIT");
uiBoxAppend(top, uiControl(grp), 1);
uiBox* in_ctrl = uiNewVerticalBox();
uiGroupSetChild(grp, uiControl(in_ctrl));
cbJITEnabled = uiNewCheckbox("Enable JIT recompiler");
uiBoxAppend(in_ctrl, uiControl(cbJITEnabled), 0);
uiCheckboxOnToggled(cbJITEnabled, OnJITStateChanged, NULL);
{
uiBox* row = uiNewHorizontalBox();
uiBoxAppend(in_ctrl, uiControl(row), 0);
uiLabel* lbl = uiNewLabel("Maximum block size (1-32): ");
uiBoxAppend(row, uiControl(lbl), 0);
enJITMaxBlockSize = uiNewEntry();
uiBoxAppend(row, uiControl(enJITMaxBlockSize), 0);
}
}
{
uiBox* in_ctrl = uiNewHorizontalBox();
uiBoxSetPadded(in_ctrl, 1);
@ -104,6 +143,8 @@ void Open()
uiCheckboxSetChecked(cbDirectBoot, Config::DirectBoot);
OnJITStateChanged(cbJITEnabled, NULL);
uiControlShow(uiControl(win));
}