Consolidate some compare instructions in JIT, preparations for separate CR flag storage, misc other cleanup in cpu core.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1547 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
4b5cfed314
commit
866d4e6bc8
|
@ -672,7 +672,8 @@ bool Init()
|
||||||
else
|
else
|
||||||
InitHWMemFuncs();
|
InitHWMemFuncs();
|
||||||
|
|
||||||
LOG(MEMMAP, "Memory system initialized. RAM at %p (0x80000000 @ %p)", base, base + 0x80000000);
|
LOG(MEMMAP, "Memory system initialized. RAM at %p (mirrors at 0 @ %p, 0x80000000 @ %p , 0xC0000000 @ %p)",
|
||||||
|
m_pRAM, m_pPhysicalRAM, m_pVirtualCachedRAM, m_pVirtualUncachedRAM);
|
||||||
m_IsInitialized = true;
|
m_IsInitialized = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -117,7 +117,7 @@ void andis_rc(UGeckoInstruction _inst)
|
||||||
|
|
||||||
void cmpi(UGeckoInstruction _inst)
|
void cmpi(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
Helper_UpdateCRx(_inst.CRFD, m_GPR[_inst.RA]-_inst.SIMM_16);
|
Helper_UpdateCRx(_inst.CRFD, m_GPR[_inst.RA] - _inst.SIMM_16);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cmpli(UGeckoInstruction _inst)
|
void cmpli(UGeckoInstruction _inst)
|
||||||
|
@ -128,7 +128,7 @@ void cmpli(UGeckoInstruction _inst)
|
||||||
if (a < b) f = 0x8;
|
if (a < b) f = 0x8;
|
||||||
else if (a > b) f = 0x4;
|
else if (a > b) f = 0x4;
|
||||||
else f = 0x2; //equals
|
else f = 0x2; //equals
|
||||||
if (XER.SO) f = 0x1;
|
if (GetXER_SO()) f |= 0x1;
|
||||||
SetCRField(_inst.CRFD, f);
|
SetCRField(_inst.CRFD, f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -151,13 +151,12 @@ void subfic(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
/* u32 rra = ~m_GPR[_inst.RA];
|
/* u32 rra = ~m_GPR[_inst.RA];
|
||||||
s32 immediate = (s16)_inst.SIMM_16 + 1;
|
s32 immediate = (s16)_inst.SIMM_16 + 1;
|
||||||
|
|
||||||
|
|
||||||
// #define CALC_XER_CA(X,Y) (((X) + (Y) < X) ? SET_XER_CA : CLEAR_XER_CA)
|
// #define CALC_XER_CA(X,Y) (((X) + (Y) < X) ? SET_XER_CA : CLEAR_XER_CA)
|
||||||
if ((rra + immediate) < rra)
|
if ((rra + immediate) < rra)
|
||||||
XER.CA = 1;
|
SetCarry(1);
|
||||||
else
|
else
|
||||||
XER.CA = 0;
|
SetCarry(0);
|
||||||
|
|
||||||
m_GPR[_inst.RD] = rra - immediate;
|
m_GPR[_inst.RD] = rra - immediate;
|
||||||
*/
|
*/
|
||||||
|
@ -227,11 +226,10 @@ void cmp(UGeckoInstruction _inst)
|
||||||
s32 a = (s32)m_GPR[_inst.RA];
|
s32 a = (s32)m_GPR[_inst.RA];
|
||||||
s32 b = (s32)m_GPR[_inst.RB];
|
s32 b = (s32)m_GPR[_inst.RB];
|
||||||
int fTemp = 0x8; // a < b
|
int fTemp = 0x8; // a < b
|
||||||
|
// if (a < b) fTemp = 0x8; else
|
||||||
// if (a < b) fTemp = 0x8; else
|
if (a > b) fTemp = 0x4;
|
||||||
if (a > b) fTemp = 0x4;
|
|
||||||
else if (a == b) fTemp = 0x2;
|
else if (a == b) fTemp = 0x2;
|
||||||
if (XER.SO) PanicAlert("cmp getting overflow flag"); // fTemp |= 0x1
|
if (GetXER_SO()) PanicAlert("cmp getting overflow flag"); // fTemp |= 0x1
|
||||||
SetCRField(_inst.CRFD, fTemp);
|
SetCRField(_inst.CRFD, fTemp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -241,10 +239,10 @@ void cmpl(UGeckoInstruction _inst)
|
||||||
u32 b = m_GPR[_inst.RB];
|
u32 b = m_GPR[_inst.RB];
|
||||||
u32 fTemp = 0x8; // a < b
|
u32 fTemp = 0x8; // a < b
|
||||||
|
|
||||||
// if (a < b) fTemp = 0x8;else
|
// if (a < b) fTemp = 0x8;else
|
||||||
if (a > b) fTemp = 0x4;
|
if (a > b) fTemp = 0x4;
|
||||||
else if (a == b) fTemp = 0x2;
|
else if (a == b) fTemp = 0x2;
|
||||||
if (XER.SO) PanicAlert("cmpl getting overflow flag"); // fTemp |= 0x1;
|
if (GetXER_SO()) PanicAlert("cmpl getting overflow flag"); // fTemp |= 0x1;
|
||||||
SetCRField(_inst.CRFD, fTemp);
|
SetCRField(_inst.CRFD, fTemp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -28,6 +28,11 @@
|
||||||
namespace Interpreter
|
namespace Interpreter
|
||||||
{
|
{
|
||||||
|
|
||||||
|
// TODO: These should really be in the save state, although it's unlikely to matter much.
|
||||||
|
// They are for lwarx and its friend stwcxd.
|
||||||
|
static bool g_bReserve = false;
|
||||||
|
static u32 g_reserveAddr;
|
||||||
|
|
||||||
u32 Helper_Get_EA(const UGeckoInstruction _inst)
|
u32 Helper_Get_EA(const UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
return _inst.RA ? (m_GPR[_inst.RA] + _inst.SIMM_16) : _inst.SIMM_16;
|
return _inst.RA ? (m_GPR[_inst.RA] + _inst.SIMM_16) : _inst.SIMM_16;
|
||||||
|
@ -581,37 +586,32 @@ void stwbrx(UGeckoInstruction _inst)
|
||||||
|
|
||||||
|
|
||||||
// The following two instructions are for SMP communications. On a single
|
// The following two instructions are for SMP communications. On a single
|
||||||
// CPU, they cannot fail unless an interrupt happens in between, which usually
|
// CPU, they cannot fail unless an interrupt happens in between.
|
||||||
// won't happen with the JIT.
|
|
||||||
bool g_bReserve = false;
|
|
||||||
u32 g_reserveAddr;
|
|
||||||
|
|
||||||
void lwarx(UGeckoInstruction _inst)
|
void lwarx(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
u32 uAddress = Helper_Get_EA_X(_inst);
|
u32 uAddress = Helper_Get_EA_X(_inst);
|
||||||
|
|
||||||
m_GPR[_inst.RD] = Memory::Read_U32(uAddress);
|
m_GPR[_inst.RD] = Memory::Read_U32(uAddress);
|
||||||
g_bReserve = true;
|
|
||||||
g_reserveAddr = uAddress;
|
g_bReserve = true;
|
||||||
|
g_reserveAddr = uAddress;
|
||||||
}
|
}
|
||||||
|
|
||||||
void stwcxd(UGeckoInstruction _inst)
|
void stwcxd(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
// Stores Word Conditional indeXed
|
// Stores Word Conditional indeXed
|
||||||
|
u32 uAddress;
|
||||||
u32 uAddress;
|
if (g_bReserve) {
|
||||||
|
|
||||||
if(g_bReserve) {
|
|
||||||
uAddress = Helper_Get_EA_X(_inst);
|
uAddress = Helper_Get_EA_X(_inst);
|
||||||
if(uAddress == g_reserveAddr) {
|
if (uAddress == g_reserveAddr) {
|
||||||
Memory::Write_U32(m_GPR[_inst.RS], uAddress);
|
Memory::Write_U32(m_GPR[_inst.RS], uAddress);
|
||||||
g_bReserve = false;
|
g_bReserve = false;
|
||||||
SetCRField(0, 2 | XER.SO);
|
SetCRField(0, 2 | GetXER_SO());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SetCRField(0, XER.SO);
|
SetCRField(0, GetXER_SO());
|
||||||
}
|
}
|
||||||
|
|
||||||
void stwux(UGeckoInstruction _inst)
|
void stwux(UGeckoInstruction _inst)
|
||||||
|
|
|
@ -229,7 +229,6 @@ void ps_cmpu1(UGeckoInstruction _inst)
|
||||||
if (fa < fb) compareResult = 8;
|
if (fa < fb) compareResult = 8;
|
||||||
else if (fa > fb) compareResult = 4;
|
else if (fa > fb) compareResult = 4;
|
||||||
else compareResult = 2;
|
else compareResult = 2;
|
||||||
|
|
||||||
SetCRField(_inst.CRFD, compareResult);
|
SetCRField(_inst.CRFD, compareResult);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -226,8 +226,9 @@ void mtfsfx(UGeckoInstruction _inst)
|
||||||
|
|
||||||
void mcrxr(UGeckoInstruction _inst)
|
void mcrxr(UGeckoInstruction _inst)
|
||||||
{
|
{
|
||||||
SetCRField(_inst.CRFD, XER.Hex >> 28);
|
// USES_XER
|
||||||
XER.Hex &= ~0xF0000000; // clear 0-3
|
SetCRField(_inst.CRFD, PowerPC::ppcState.spr[SPR_XER] >> 28);
|
||||||
|
PowerPC::ppcState.spr[SPR_XER] &= ~0xF0000000; // clear 0-3
|
||||||
}
|
}
|
||||||
|
|
||||||
void mfcr(UGeckoInstruction _inst)
|
void mfcr(UGeckoInstruction _inst)
|
||||||
|
|
|
@ -385,6 +385,7 @@ namespace Jit64
|
||||||
js.instructionNumber = i;
|
js.instructionNumber = i;
|
||||||
if (i == (int)size - 1) {
|
if (i == (int)size - 1) {
|
||||||
js.isLastInstruction = true;
|
js.isLastInstruction = true;
|
||||||
|
js.next_inst = 0;
|
||||||
if (Profiler::g_ProfileBlocks) {
|
if (Profiler::g_ProfileBlocks) {
|
||||||
// CAUTION!!! push on stack regs you use, do your stuff, then pop
|
// CAUTION!!! push on stack regs you use, do your stuff, then pop
|
||||||
PROFILER_VPUSH;
|
PROFILER_VPUSH;
|
||||||
|
@ -394,6 +395,9 @@ namespace Jit64
|
||||||
PROFILER_ADD_DIFF_LARGE_INTEGER(&b.ticCounter, &b.ticStop, &b.ticStart);
|
PROFILER_ADD_DIFF_LARGE_INTEGER(&b.ticCounter, &b.ticStop, &b.ticStart);
|
||||||
PROFILER_VPOP;
|
PROFILER_VPOP;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
// help peephole optimizations
|
||||||
|
js.next_inst = ops[i + 1].inst;
|
||||||
}
|
}
|
||||||
|
|
||||||
// const GekkoOpInfo *info = GetOpInfo();
|
// const GekkoOpInfo *info = GetOpInfo();
|
||||||
|
|
|
@ -49,6 +49,7 @@ namespace Jit64
|
||||||
{
|
{
|
||||||
u32 compilerPC;
|
u32 compilerPC;
|
||||||
u32 blockStart;
|
u32 blockStart;
|
||||||
|
UGeckoInstruction next_inst; // for easy peephole opt.
|
||||||
int blockSize;
|
int blockSize;
|
||||||
int instructionNumber;
|
int instructionNumber;
|
||||||
int downcountAmount;
|
int downcountAmount;
|
||||||
|
@ -142,10 +143,8 @@ namespace Jit64
|
||||||
void fcmpx(UGeckoInstruction inst);
|
void fcmpx(UGeckoInstruction inst);
|
||||||
void fmrx(UGeckoInstruction inst);
|
void fmrx(UGeckoInstruction inst);
|
||||||
|
|
||||||
void cmpli(UGeckoInstruction inst);
|
void cmpXi(UGeckoInstruction inst);
|
||||||
void cmpi(UGeckoInstruction inst);
|
void cmpX(UGeckoInstruction inst);
|
||||||
void cmpl(UGeckoInstruction inst);
|
|
||||||
void cmp(UGeckoInstruction inst);
|
|
||||||
|
|
||||||
void cntlzwx(UGeckoInstruction inst);
|
void cntlzwx(UGeckoInstruction inst);
|
||||||
|
|
||||||
|
|
|
@ -324,18 +324,19 @@ void GenFifoXmm64Write()
|
||||||
|
|
||||||
void GenerateCommon()
|
void GenerateCommon()
|
||||||
{
|
{
|
||||||
|
// USES_CR
|
||||||
computeRc = AlignCode16();
|
computeRc = AlignCode16();
|
||||||
AND(32, M(&CR), Imm32(0x0FFFFFFF));
|
AND(32, M(&PowerPC::ppcState.cr), Imm32(0x0FFFFFFF));
|
||||||
CMP(32, R(EAX), Imm8(0));
|
CMP(32, R(EAX), Imm8(0));
|
||||||
FixupBranch pLesser = J_CC(CC_L);
|
FixupBranch pLesser = J_CC(CC_L);
|
||||||
FixupBranch pGreater = J_CC(CC_G);
|
FixupBranch pGreater = J_CC(CC_G);
|
||||||
OR(32, M(&CR), Imm32(0x20000000)); // _x86Reg == 0
|
OR(32, M(&PowerPC::ppcState.cr), Imm32(0x20000000)); // _x86Reg == 0
|
||||||
RET();
|
RET();
|
||||||
SetJumpTarget(pGreater);
|
SetJumpTarget(pGreater);
|
||||||
OR(32, M(&CR), Imm32(0x40000000)); // _x86Reg > 0
|
OR(32, M(&PowerPC::ppcState.cr), Imm32(0x40000000)); // _x86Reg > 0
|
||||||
RET();
|
RET();
|
||||||
SetJumpTarget(pLesser);
|
SetJumpTarget(pLesser);
|
||||||
OR(32, M(&CR), Imm32(0x80000000)); // _x86Reg < 0
|
OR(32, M(&PowerPC::ppcState.cr), Imm32(0x80000000)); // _x86Reg < 0
|
||||||
RET();
|
RET();
|
||||||
|
|
||||||
fifoDirectWrite8 = AlignCode4();
|
fifoDirectWrite8 = AlignCode4();
|
||||||
|
|
|
@ -108,6 +108,7 @@ namespace Jit64
|
||||||
// variants of this instruction.
|
// variants of this instruction.
|
||||||
void bcx(UGeckoInstruction inst)
|
void bcx(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
|
// USES_CR
|
||||||
_assert_msg_(DYNA_REC, js.isLastInstruction, "bcx not last instruction of block");
|
_assert_msg_(DYNA_REC, js.isLastInstruction, "bcx not last instruction of block");
|
||||||
|
|
||||||
gpr.Flush(FLUSH_ALL);
|
gpr.Flush(FLUSH_ALL);
|
||||||
|
@ -124,7 +125,7 @@ namespace Jit64
|
||||||
|
|
||||||
if ((inst.BO & 16) == 0) // Test a CR bit
|
if ((inst.BO & 16) == 0) // Test a CR bit
|
||||||
{
|
{
|
||||||
TEST(32, M(&CR), Imm32(0x80000000 >> inst.BI));
|
TEST(32, M(&PowerPC::ppcState.cr), Imm32(0x80000000 >> inst.BI));
|
||||||
if (inst.BO & 8) // Conditional branch
|
if (inst.BO & 8) // Conditional branch
|
||||||
branch = CC_NZ;
|
branch = CC_NZ;
|
||||||
else
|
else
|
||||||
|
@ -181,14 +182,14 @@ namespace Jit64
|
||||||
{
|
{
|
||||||
skip = J_CC(branch);
|
skip = J_CC(branch);
|
||||||
}
|
}
|
||||||
u32 destination;
|
u32 destination;
|
||||||
if (inst.LK)
|
if (inst.LK)
|
||||||
MOV(32, M(&LR), Imm32(js.compilerPC + 4));
|
MOV(32, M(&LR), Imm32(js.compilerPC + 4));
|
||||||
if(inst.AA)
|
if(inst.AA)
|
||||||
destination = SignExt16(inst.BD << 2);
|
destination = SignExt16(inst.BD << 2);
|
||||||
else
|
else
|
||||||
destination = js.compilerPC + SignExt16(inst.BD << 2);
|
destination = js.compilerPC + SignExt16(inst.BD << 2);
|
||||||
WriteExit(destination, 0);
|
WriteExit(destination, 0);
|
||||||
if (inst.BO != 20)
|
if (inst.BO != 20)
|
||||||
{
|
{
|
||||||
SetJumpTarget(skip);
|
SetJumpTarget(skip);
|
||||||
|
|
|
@ -205,8 +205,8 @@ namespace Jit64
|
||||||
{
|
{
|
||||||
fpr.LoadToX64(a, true);
|
fpr.LoadToX64(a, true);
|
||||||
}
|
}
|
||||||
|
// USES_CR
|
||||||
AND(32, M(&CR), Imm32(~(0xF0000000 >> shift)));
|
AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xF0000000 >> shift)));
|
||||||
if (ordered)
|
if (ordered)
|
||||||
COMISD(fpr.R(a).GetSimpleReg(), fpr.R(b));
|
COMISD(fpr.R(a).GetSimpleReg(), fpr.R(b));
|
||||||
else
|
else
|
||||||
|
@ -226,7 +226,7 @@ namespace Jit64
|
||||||
SetJumpTarget(continue1);
|
SetJumpTarget(continue1);
|
||||||
SetJumpTarget(continue2);
|
SetJumpTarget(continue2);
|
||||||
SHR(32, R(EAX), Imm8(shift));
|
SHR(32, R(EAX), Imm8(shift));
|
||||||
OR(32, M(&CR), R(EAX));
|
OR(32, M(&PowerPC::ppcState.cr), R(EAX));
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,7 @@
|
||||||
#include "JitCache.h"
|
#include "JitCache.h"
|
||||||
#include "JitRegCache.h"
|
#include "JitRegCache.h"
|
||||||
#include "JitAsm.h"
|
#include "JitAsm.h"
|
||||||
|
#include "Jit_Util.h"
|
||||||
|
|
||||||
// #define INSTRUCTION_START Default(inst); return;
|
// #define INSTRUCTION_START Default(inst); return;
|
||||||
#define INSTRUCTION_START
|
#define INSTRUCTION_START
|
||||||
|
@ -32,10 +33,11 @@ namespace Jit64
|
||||||
{
|
{
|
||||||
// Assumes that the flags were just set through an addition.
|
// Assumes that the flags were just set through an addition.
|
||||||
void GenerateCarry(X64Reg temp_reg) {
|
void GenerateCarry(X64Reg temp_reg) {
|
||||||
|
// USES_XER
|
||||||
SETcc(CC_C, R(temp_reg));
|
SETcc(CC_C, R(temp_reg));
|
||||||
AND(32, M(&XER), Imm32(~(1 << 29)));
|
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~(1 << 29)));
|
||||||
SHL(32, R(temp_reg), Imm8(29));
|
SHL(32, R(temp_reg), Imm8(29));
|
||||||
OR(32, M(&XER), R(temp_reg));
|
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(temp_reg));
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef u32 (*Operation)(u32 a, u32 b);
|
typedef u32 (*Operation)(u32 a, u32 b);
|
||||||
|
@ -133,26 +135,49 @@ namespace Jit64
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
|
||||||
|
if (js.next_inst.OPCD == 16) { // bcx
|
||||||
|
if (!js.next_inst.LK && (js.next_inst.BO & BO_DONT_DECREMENT_FLAG))
|
||||||
|
{
|
||||||
|
// it's clear there's plenty of opportunity.
|
||||||
|
//PanicAlert("merge");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
// unsigned
|
// unsigned
|
||||||
void cmpli(UGeckoInstruction inst)
|
void cmpXi(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
// Should check if the next intruction is a branch - if it is, merge the two. This can save
|
// USES_CR
|
||||||
// a whole bunch of instructions and cycles, especially if we aggressively bubble down compares
|
|
||||||
// towards branches.
|
|
||||||
#ifdef JIT_OFF_OPTIONS
|
#ifdef JIT_OFF_OPTIONS
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
|
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
|
||||||
{Default(inst); return;} // turn off from debugger
|
{Default(inst); return;} // turn off from debugger
|
||||||
#endif
|
#endif
|
||||||
|
// Should check if the next intruction is a branch - if it is, merge the two. This can save
|
||||||
|
// a whole bunch of instructions and cycles, especially if we aggressively bubble down compares
|
||||||
|
// towards branches.
|
||||||
INSTRUCTION_START;
|
INSTRUCTION_START;
|
||||||
int a = inst.RA;
|
int a = inst.RA;
|
||||||
u32 uimm = inst.UIMM;
|
|
||||||
int crf = inst.CRFD;
|
int crf = inst.CRFD;
|
||||||
int shift = crf * 4;
|
int shift = crf * 4;
|
||||||
|
Gen::CCFlags less_than, greater_than;
|
||||||
|
OpArg comparand;
|
||||||
|
if (inst.OPCD == 10) {
|
||||||
|
less_than = CC_B;
|
||||||
|
greater_than = CC_A;
|
||||||
|
comparand = Imm32(inst.UIMM);
|
||||||
|
} else {
|
||||||
|
less_than = CC_L;
|
||||||
|
greater_than = CC_G;
|
||||||
|
comparand = Imm32((s32)(s16)inst.UIMM);
|
||||||
|
}
|
||||||
|
|
||||||
gpr.KillImmediate(a); // todo, optimize instead, but unlikely to make a difference
|
gpr.KillImmediate(a); // todo, optimize instead, but unlikely to make a difference
|
||||||
AND(32, M(&CR), Imm32(~(0xF0000000 >> (crf*4))));
|
AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xF0000000 >> (crf*4))));
|
||||||
CMP(32, gpr.R(a), Imm32(uimm));
|
CMP(32, gpr.R(a), comparand);
|
||||||
FixupBranch pLesser = J_CC(CC_B);
|
FixupBranch pLesser = J_CC(less_than);
|
||||||
FixupBranch pGreater = J_CC(CC_A);
|
FixupBranch pGreater = J_CC(greater_than);
|
||||||
|
|
||||||
MOV(32, R(EAX), Imm32(0x20000000 >> shift)); // _x86Reg == 0
|
MOV(32, R(EAX), Imm32(0x20000000 >> shift)); // _x86Reg == 0
|
||||||
FixupBranch continue1 = J();
|
FixupBranch continue1 = J();
|
||||||
|
@ -165,44 +190,17 @@ namespace Jit64
|
||||||
MOV(32, R(EAX), Imm32(0x80000000 >> shift));// _x86Reg < 0
|
MOV(32, R(EAX), Imm32(0x80000000 >> shift));// _x86Reg < 0
|
||||||
SetJumpTarget(continue1);
|
SetJumpTarget(continue1);
|
||||||
SetJumpTarget(continue2);
|
SetJumpTarget(continue2);
|
||||||
OR(32, M(&CR), R(EAX));
|
OR(32, M(&PowerPC::ppcState.cr), R(EAX));
|
||||||
|
|
||||||
|
// TODO: Add extra code at the end for the "taken" case. Jump to it from the matching branches.
|
||||||
|
// Since it's the last block, some liberties can be taken.
|
||||||
|
// don't forget to flush registers AFTER the cmp BEFORE the jmp. Flushing doesn't affect flags.
|
||||||
}
|
}
|
||||||
|
|
||||||
// signed
|
// signed
|
||||||
void cmpi(UGeckoInstruction inst)
|
void cmpX(UGeckoInstruction inst)
|
||||||
{
|
|
||||||
#ifdef JIT_OFF_OPTIONS
|
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
|
|
||||||
{Default(inst); return;} // turn off from debugger
|
|
||||||
#endif
|
|
||||||
INSTRUCTION_START;
|
|
||||||
int a = inst.RA;
|
|
||||||
s32 simm = (s32)(s16)inst.UIMM;
|
|
||||||
int crf = inst.CRFD;
|
|
||||||
int shift = crf * 4;
|
|
||||||
gpr.KillImmediate(a); // todo, optimize instead, but unlikely to make a difference
|
|
||||||
AND(32, M(&CR), Imm32(~(0xF0000000 >> (crf*4))));
|
|
||||||
CMP(32, gpr.R(a), Imm32(simm));
|
|
||||||
FixupBranch pLesser = J_CC(CC_L);
|
|
||||||
FixupBranch pGreater = J_CC(CC_G);
|
|
||||||
// _x86Reg == 0
|
|
||||||
MOV(32, R(EAX), Imm32(0x20000000 >> shift));
|
|
||||||
FixupBranch continue1 = J();
|
|
||||||
// _x86Reg > 0
|
|
||||||
SetJumpTarget(pGreater);
|
|
||||||
MOV(32, R(EAX), Imm32(0x40000000 >> shift));
|
|
||||||
FixupBranch continue2 = J();
|
|
||||||
// _x86Reg < 0
|
|
||||||
SetJumpTarget(pLesser);
|
|
||||||
MOV(32, R(EAX), Imm32(0x80000000 >> shift));
|
|
||||||
SetJumpTarget(continue1);
|
|
||||||
SetJumpTarget(continue2);
|
|
||||||
OR(32, M(&CR), R(EAX));
|
|
||||||
}
|
|
||||||
|
|
||||||
// signed
|
|
||||||
void cmp(UGeckoInstruction inst)
|
|
||||||
{
|
{
|
||||||
|
// USES_CR
|
||||||
#ifdef JIT_OFF_OPTIONS
|
#ifdef JIT_OFF_OPTIONS
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
|
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
|
||||||
{Default(inst); return;} // turn off from debugger
|
{Default(inst); return;} // turn off from debugger
|
||||||
|
@ -212,12 +210,21 @@ namespace Jit64
|
||||||
int b = inst.RB;
|
int b = inst.RB;
|
||||||
int crf = inst.CRFD;
|
int crf = inst.CRFD;
|
||||||
int shift = crf * 4;
|
int shift = crf * 4;
|
||||||
|
Gen::CCFlags less_than, greater_than;
|
||||||
|
Gen::OpArg comparand = gpr.R(b);
|
||||||
|
if (inst.SUBOP10 == 32) {
|
||||||
|
less_than = CC_B;
|
||||||
|
greater_than = CC_A;
|
||||||
|
} else {
|
||||||
|
less_than = CC_L;
|
||||||
|
greater_than = CC_G;
|
||||||
|
}
|
||||||
gpr.Lock(a, b);
|
gpr.Lock(a, b);
|
||||||
gpr.LoadToX64(a, true, false);
|
gpr.LoadToX64(a, true, false);
|
||||||
AND(32, M(&CR), Imm32(~(0xF0000000 >> (crf*4))));
|
AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xF0000000 >> (crf*4))));
|
||||||
CMP(32, gpr.R(a), gpr.R(b));
|
CMP(32, gpr.R(a), comparand);
|
||||||
FixupBranch pLesser = J_CC(CC_L);
|
FixupBranch pLesser = J_CC(less_than);
|
||||||
FixupBranch pGreater = J_CC(CC_G);
|
FixupBranch pGreater = J_CC(greater_than);
|
||||||
// _x86Reg == 0
|
// _x86Reg == 0
|
||||||
MOV(32, R(EAX), Imm32(0x20000000 >> shift));
|
MOV(32, R(EAX), Imm32(0x20000000 >> shift));
|
||||||
FixupBranch continue1 = J();
|
FixupBranch continue1 = J();
|
||||||
|
@ -230,41 +237,7 @@ namespace Jit64
|
||||||
MOV(32, R(EAX), Imm32(0x80000000 >> shift));
|
MOV(32, R(EAX), Imm32(0x80000000 >> shift));
|
||||||
SetJumpTarget(continue1);
|
SetJumpTarget(continue1);
|
||||||
SetJumpTarget(continue2);
|
SetJumpTarget(continue2);
|
||||||
OR(32, M(&CR), R(EAX));
|
OR(32, M(&PowerPC::ppcState.cr), R(EAX));
|
||||||
gpr.UnlockAll();
|
|
||||||
}
|
|
||||||
|
|
||||||
// unsigned
|
|
||||||
void cmpl(UGeckoInstruction inst)
|
|
||||||
{
|
|
||||||
#ifdef JIT_OFF_OPTIONS
|
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
|
|
||||||
{Default(inst); return;} // turn off from debugger
|
|
||||||
#endif
|
|
||||||
INSTRUCTION_START;
|
|
||||||
int a = inst.RA;
|
|
||||||
int b = inst.RB;
|
|
||||||
int crf = inst.CRFD;
|
|
||||||
int shift = crf * 4;
|
|
||||||
gpr.Lock(a, b);
|
|
||||||
gpr.LoadToX64(a, true, false);
|
|
||||||
AND(32, M(&CR), Imm32(~(0xF0000000 >> (crf*4))));
|
|
||||||
CMP(32, gpr.R(a), gpr.R(b));
|
|
||||||
FixupBranch pLesser = J_CC(CC_B);
|
|
||||||
FixupBranch pGreater = J_CC(CC_A);
|
|
||||||
// _x86Reg == 0
|
|
||||||
MOV(32, R(EAX), Imm32(0x20000000 >> shift));
|
|
||||||
FixupBranch continue1 = J();
|
|
||||||
// _x86Reg > 0
|
|
||||||
SetJumpTarget(pGreater);
|
|
||||||
MOV(32, R(EAX), Imm32(0x40000000 >> shift));
|
|
||||||
FixupBranch continue2 = J();
|
|
||||||
// _x86Reg < 0
|
|
||||||
SetJumpTarget(pLesser);
|
|
||||||
MOV(32, R(EAX), Imm32(0x80000000 >> shift));
|
|
||||||
SetJumpTarget(continue1);
|
|
||||||
SetJumpTarget(continue2);
|
|
||||||
OR(32, M(&CR), R(EAX));
|
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -652,6 +625,7 @@ namespace Jit64
|
||||||
// This can be optimized
|
// This can be optimized
|
||||||
void addex(UGeckoInstruction inst)
|
void addex(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
|
// USES_XER
|
||||||
#ifdef JIT_OFF_OPTIONS
|
#ifdef JIT_OFF_OPTIONS
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
|
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
|
||||||
{Default(inst); return;} // turn off from debugger
|
{Default(inst); return;} // turn off from debugger
|
||||||
|
@ -664,7 +638,7 @@ namespace Jit64
|
||||||
gpr.LoadToX64(d, false);
|
gpr.LoadToX64(d, false);
|
||||||
else
|
else
|
||||||
gpr.LoadToX64(d, true);
|
gpr.LoadToX64(d, true);
|
||||||
MOV(32, R(EAX), M(&XER));
|
MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER]));
|
||||||
SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag
|
SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag
|
||||||
MOV(32, R(EAX), gpr.R(a));
|
MOV(32, R(EAX), gpr.R(a));
|
||||||
ADC(32, R(EAX), gpr.R(b));
|
ADC(32, R(EAX), gpr.R(b));
|
||||||
|
@ -895,6 +869,7 @@ namespace Jit64
|
||||||
|
|
||||||
void srawx(UGeckoInstruction inst)
|
void srawx(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
|
// USES_XER
|
||||||
#ifdef JIT_OFF_OPTIONS
|
#ifdef JIT_OFF_OPTIONS
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
|
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
|
||||||
{Default(inst); return;} // turn off from debugger
|
{Default(inst); return;} // turn off from debugger
|
||||||
|
@ -919,17 +894,17 @@ namespace Jit64
|
||||||
CMP(32, R(EAX), Imm32(-1));
|
CMP(32, R(EAX), Imm32(-1));
|
||||||
SETcc(CC_L, R(EAX));
|
SETcc(CC_L, R(EAX));
|
||||||
SAR(32, gpr.R(a), R(ECX));
|
SAR(32, gpr.R(a), R(ECX));
|
||||||
AND(32, M(&XER), Imm32(~(1 << 29)));
|
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~(1 << 29)));
|
||||||
SHL(32, R(EAX), Imm8(29));
|
SHL(32, R(EAX), Imm8(29));
|
||||||
OR(32, M(&XER), R(EAX));
|
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(EAX));
|
||||||
FixupBranch end = J();
|
FixupBranch end = J();
|
||||||
SetJumpTarget(topBitSet);
|
SetJumpTarget(topBitSet);
|
||||||
MOV(32, R(EAX), gpr.R(s));
|
MOV(32, R(EAX), gpr.R(s));
|
||||||
SAR(32, R(EAX), Imm8(31));
|
SAR(32, R(EAX), Imm8(31));
|
||||||
MOV(32, gpr.R(a), R(EAX));
|
MOV(32, gpr.R(a), R(EAX));
|
||||||
AND(32, M(&XER), Imm32(~(1 << 29)));
|
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~(1 << 29)));
|
||||||
AND(32, R(EAX), Imm32(1<<29));
|
AND(32, R(EAX), Imm32(1<<29));
|
||||||
OR(32, M(&XER), R(EAX));
|
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(EAX));
|
||||||
SetJumpTarget(end);
|
SetJumpTarget(end);
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
gpr.UnlockAllX();
|
gpr.UnlockAllX();
|
||||||
|
@ -961,11 +936,11 @@ namespace Jit64
|
||||||
FixupBranch nocarry1 = J_CC(CC_GE);
|
FixupBranch nocarry1 = J_CC(CC_GE);
|
||||||
TEST(32, R(EAX), Imm32((u32)0xFFFFFFFF >> (32 - amount))); // were any 1s shifted out?
|
TEST(32, R(EAX), Imm32((u32)0xFFFFFFFF >> (32 - amount))); // were any 1s shifted out?
|
||||||
FixupBranch nocarry2 = J_CC(CC_Z);
|
FixupBranch nocarry2 = J_CC(CC_Z);
|
||||||
OR(32, M(&XER), Imm32(XER_CA_MASK)); //XER.CA = 1
|
JitSetCA();
|
||||||
FixupBranch carry = J(false);
|
FixupBranch carry = J(false);
|
||||||
SetJumpTarget(nocarry1);
|
SetJumpTarget(nocarry1);
|
||||||
SetJumpTarget(nocarry2);
|
SetJumpTarget(nocarry2);
|
||||||
AND(32, M(&XER), Imm32(~XER_CA_MASK)); //XER.CA = 0
|
JitClearCA();
|
||||||
SetJumpTarget(carry);
|
SetJumpTarget(carry);
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
@ -973,7 +948,7 @@ namespace Jit64
|
||||||
{
|
{
|
||||||
Default(inst); return;
|
Default(inst); return;
|
||||||
gpr.Lock(a, s);
|
gpr.Lock(a, s);
|
||||||
AND(32, M(&XER), Imm32(~XER_CA_MASK)); //XER.CA = 0
|
JitClearCA();
|
||||||
gpr.LoadToX64(a, a == s, true);
|
gpr.LoadToX64(a, a == s, true);
|
||||||
if (a != s)
|
if (a != s)
|
||||||
MOV(32, gpr.R(a), gpr.R(s));
|
MOV(32, gpr.R(a), gpr.R(s));
|
||||||
|
|
|
@ -37,6 +37,16 @@
|
||||||
namespace Jit64
|
namespace Jit64
|
||||||
{
|
{
|
||||||
|
|
||||||
|
void JitClearCA()
|
||||||
|
{
|
||||||
|
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
void JitSetCA()
|
||||||
|
{
|
||||||
|
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1
|
||||||
|
}
|
||||||
|
|
||||||
void UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend)
|
void UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend)
|
||||||
{
|
{
|
||||||
#ifdef _M_IX86
|
#ifdef _M_IX86
|
||||||
|
|
|
@ -33,4 +33,7 @@ void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address);
|
||||||
void ForceSinglePrecisionS(X64Reg xmm);
|
void ForceSinglePrecisionS(X64Reg xmm);
|
||||||
void ForceSinglePrecisionP(X64Reg xmm);
|
void ForceSinglePrecisionP(X64Reg xmm);
|
||||||
|
|
||||||
|
void JitClearCA();
|
||||||
|
void JitSetCA();
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
|
@ -285,19 +285,10 @@ void FixUpInternalBranches(CodeOp *code, int begin, int end)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ShuffleUp(CodeOp *code, int first, int last)
|
|
||||||
{
|
|
||||||
CodeOp temp = code[first];
|
|
||||||
for (int i = first; i < last; i++)
|
|
||||||
code[i] = code[i + 1];
|
|
||||||
code[last] = temp;
|
|
||||||
}
|
|
||||||
|
|
||||||
// IMPORTANT - CURRENTLY ASSUMES THAT A IS A COMPARE
|
// IMPORTANT - CURRENTLY ASSUMES THAT A IS A COMPARE
|
||||||
bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
|
bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
|
||||||
{
|
{
|
||||||
// Disabled for now
|
return false; // Currently deactivated in SVN.
|
||||||
return false;
|
|
||||||
|
|
||||||
const GekkoOPInfo *a_info = GetOpInfo(a.inst);
|
const GekkoOPInfo *a_info = GetOpInfo(a.inst);
|
||||||
const GekkoOPInfo *b_info = GetOpInfo(b.inst);
|
const GekkoOPInfo *b_info = GetOpInfo(b.inst);
|
||||||
|
@ -308,7 +299,6 @@ bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
|
||||||
if ((b_flags & (FL_RC_BIT | FL_RC_BIT_F)) && (b.inst.hex & 1))
|
if ((b_flags & (FL_RC_BIT | FL_RC_BIT_F)) && (b.inst.hex & 1))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// 10 cmpi, 11 cmpli - we got a compare!
|
|
||||||
switch (b.inst.OPCD)
|
switch (b.inst.OPCD)
|
||||||
{
|
{
|
||||||
case 16:
|
case 16:
|
||||||
|
@ -323,20 +313,34 @@ bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
|
||||||
// For now, only integer ops acceptable.
|
// For now, only integer ops acceptable.
|
||||||
switch (b_info->type) {
|
switch (b_info->type) {
|
||||||
case OPTYPE_INTEGER:
|
case OPTYPE_INTEGER:
|
||||||
|
case OPTYPE_LOAD:
|
||||||
|
case OPTYPE_STORE:
|
||||||
|
case OPTYPE_LOADFP:
|
||||||
|
case OPTYPE_STOREFP:
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check that we have no register collisions.
|
// Check that we have no register collisions.
|
||||||
|
// That is, check that none of b's outputs matches any of a's inputs,
|
||||||
|
// and that none of a's outputs matches any of b's inputs.
|
||||||
|
// The latter does not apply if a is a cmp, of course, but doesn't hurt to check.
|
||||||
bool no_swap = false;
|
bool no_swap = false;
|
||||||
for (int j = 0; j < 3; j++)
|
for (int j = 0; j < 3; j++)
|
||||||
{
|
{
|
||||||
int regIn = a.regsIn[j];
|
int regInA = a.regsIn[j];
|
||||||
if (regIn < 0)
|
int regInB = b.regsIn[j];
|
||||||
continue;
|
if (regInA >= 0 &&
|
||||||
if (b.regsOut[0] == regIn ||
|
b.regsOut[0] == regInA ||
|
||||||
b.regsOut[1] == regIn)
|
b.regsOut[1] == regInA)
|
||||||
|
{
|
||||||
|
// reg collision! don't swap
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (regInB >= 0 &&
|
||||||
|
a.regsOut[0] == regInB ||
|
||||||
|
a.regsOut[1] == regInB)
|
||||||
{
|
{
|
||||||
// reg collision! don't swap
|
// reg collision! don't swap
|
||||||
return false;
|
return false;
|
||||||
|
@ -346,6 +350,7 @@ bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Does not yet perform inlining - although there are plans for that.
|
||||||
CodeOp *Flatten(u32 address, int &realsize, BlockStats &st, BlockRegStats &gpa, BlockRegStats &fpa)
|
CodeOp *Flatten(u32 address, int &realsize, BlockStats &st, BlockRegStats &gpa, BlockRegStats &fpa)
|
||||||
{
|
{
|
||||||
int numCycles = 0;
|
int numCycles = 0;
|
||||||
|
@ -623,9 +628,8 @@ CodeOp *Flatten(u32 address, int &realsize, BlockStats &st, BlockRegStats &gpa,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Scan for CR0 dependency
|
||||||
//Scan for CR0 dependency
|
// assume next block wants CR0 to be safe
|
||||||
//assume next block wants CR0 to be safe
|
|
||||||
bool wantsCR0 = true;
|
bool wantsCR0 = true;
|
||||||
bool wantsCR1 = true;
|
bool wantsCR1 = true;
|
||||||
bool wantsPS1 = true;
|
bool wantsPS1 = true;
|
||||||
|
|
|
@ -79,8 +79,6 @@ struct BlockRegStats
|
||||||
void Init();
|
void Init();
|
||||||
void Shutdown();
|
void Shutdown();
|
||||||
|
|
||||||
void ShuffleUp(CodeOp *code, int first, int last);
|
|
||||||
|
|
||||||
CodeOp *Flatten(u32 address, int &realsize, BlockStats &st, BlockRegStats &gpa, BlockRegStats &fpa);
|
CodeOp *Flatten(u32 address, int &realsize, BlockStats &st, BlockRegStats &gpa, BlockRegStats &fpa);
|
||||||
|
|
||||||
void LogFunctionCall(u32 addr);
|
void LogFunctionCall(u32 addr);
|
||||||
|
|
|
@ -141,8 +141,8 @@ GekkoOPTemplate primarytable[] =
|
||||||
|
|
||||||
{7, Interpreter::mulli, Jit64::mulli, {"mulli", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_RC_BIT, 2}},
|
{7, Interpreter::mulli, Jit64::mulli, {"mulli", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_RC_BIT, 2}},
|
||||||
{8, Interpreter::subfic, Jit64::subfic, {"subfic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}},
|
{8, Interpreter::subfic, Jit64::subfic, {"subfic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}},
|
||||||
{10, Interpreter::cmpli, Jit64::cmpli, {"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}},
|
{10, Interpreter::cmpli, Jit64::cmpXi, {"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}},
|
||||||
{11, Interpreter::cmpi, Jit64::cmpi, {"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}},
|
{11, Interpreter::cmpi, Jit64::cmpXi, {"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}},
|
||||||
{12, Interpreter::addic, Jit64::reg_imm, {"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}},
|
{12, Interpreter::addic, Jit64::reg_imm, {"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}},
|
||||||
{13, Interpreter::addic_rc, Jit64::reg_imm, {"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0}},
|
{13, Interpreter::addic_rc, Jit64::reg_imm, {"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0}},
|
||||||
{14, Interpreter::addi, Jit64::reg_imm, {"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}},
|
{14, Interpreter::addi, Jit64::reg_imm, {"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}},
|
||||||
|
@ -283,8 +283,8 @@ GekkoOPTemplate table31[] =
|
||||||
{412, Interpreter::orcx, Jit64::Default, {"orcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
|
{412, Interpreter::orcx, Jit64::Default, {"orcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
|
||||||
{476, Interpreter::nandx, Jit64::Default, {"nandx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
|
{476, Interpreter::nandx, Jit64::Default, {"nandx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
|
||||||
{284, Interpreter::eqvx, Jit64::Default, {"eqvx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
|
{284, Interpreter::eqvx, Jit64::Default, {"eqvx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
|
||||||
{0, Interpreter::cmp, Jit64::cmp, {"cmp", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}},
|
{0, Interpreter::cmp, Jit64::cmpX, {"cmp", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}},
|
||||||
{32, Interpreter::cmpl, Jit64::cmpl, {"cmpl", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}},
|
{32, Interpreter::cmpl, Jit64::cmpX, {"cmpl", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}},
|
||||||
{26, Interpreter::cntlzwx, Jit64::cntlzwx, {"cntlzwx",OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
|
{26, Interpreter::cntlzwx, Jit64::cntlzwx, {"cntlzwx",OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
|
||||||
{922, Interpreter::extshx, Jit64::extshx, {"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
|
{922, Interpreter::extshx, Jit64::extshx, {"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
|
||||||
{954, Interpreter::extsbx, Jit64::extsbx, {"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
|
{954, Interpreter::extsbx, Jit64::extsbx, {"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
|
||||||
|
|
|
@ -25,77 +25,75 @@ class PointerWrap;
|
||||||
|
|
||||||
namespace PowerPC
|
namespace PowerPC
|
||||||
{
|
{
|
||||||
enum CoreMode
|
|
||||||
{
|
|
||||||
MODE_INTERPRETER,
|
|
||||||
MODE_JIT,
|
|
||||||
};
|
|
||||||
|
|
||||||
// This contains the entire state of the emulated PowerPC "Gekko" CPU.
|
enum CoreMode
|
||||||
struct GC_ALIGNED64(PowerPCState)
|
{
|
||||||
{
|
MODE_INTERPRETER,
|
||||||
u32 mojs[128]; // Try to isolate the regs from other variables in the cache.
|
MODE_JIT,
|
||||||
u32 gpr[32]; // General purpose registers. r1 = stack pointer.
|
};
|
||||||
|
|
||||||
// The paired singles are strange : PS0 is stored in the full 64 bits of each FPR
|
// This contains the entire state of the emulated PowerPC "Gekko" CPU.
|
||||||
// but ps calculations are only done in 32-bit precision, and PS1 is only 32 bits.
|
struct GC_ALIGNED64(PowerPCState)
|
||||||
// Since we want to use SIMD, SSE2 is the only viable alternative - 2x double.
|
{
|
||||||
u64 ps[32][2];
|
u32 mojs[128]; // Try to isolate the regs from other variables in the cache.
|
||||||
|
u32 gpr[32]; // General purpose registers. r1 = stack pointer.
|
||||||
|
|
||||||
u32 pc; // program counter
|
// The paired singles are strange : PS0 is stored in the full 64 bits of each FPR
|
||||||
u32 npc;
|
// but ps calculations are only done in 32-bit precision, and PS1 is only 32 bits.
|
||||||
|
// Since we want to use SIMD, SSE2 is the only viable alternative - 2x double.
|
||||||
|
u64 ps[32][2];
|
||||||
|
|
||||||
u32 cr; // flags
|
u32 pc; // program counter
|
||||||
u32 msr; // machine specific register
|
u32 npc;
|
||||||
u32 fpscr; // floating point flags/status bits
|
|
||||||
|
|
||||||
// Exception management.
|
u32 cr; // flags
|
||||||
u32 Exceptions;
|
u32 msr; // machine specific register
|
||||||
|
u32 fpscr; // floating point flags/status bits
|
||||||
|
|
||||||
u32 sr[16]; // Segment registers. Unused.
|
// Exception management.
|
||||||
|
u32 Exceptions;
|
||||||
|
|
||||||
u32 DebugCount;
|
u32 sr[16]; // Segment registers. Unused.
|
||||||
|
|
||||||
// special purpose registers - controlls quantizers, DMA, and lots of other misc extensions.
|
|
||||||
// also for power management, but we don't care about that.
|
|
||||||
u32 spr[1024];
|
|
||||||
};
|
|
||||||
|
|
||||||
enum CPUState
|
u32 DebugCount;
|
||||||
{
|
|
||||||
CPU_RUNNING = 0,
|
// special purpose registers - controlls quantizers, DMA, and lots of other misc extensions.
|
||||||
CPU_RUNNINGDEBUG = 1,
|
// also for power management, but we don't care about that.
|
||||||
CPU_STEPPING = 2,
|
u32 spr[1024];
|
||||||
CPU_POWERDOWN = 3,
|
};
|
||||||
};
|
|
||||||
|
|
||||||
extern PowerPCState ppcState;
|
enum CPUState
|
||||||
extern volatile CPUState state; // Execution engines should poll this to know when to exit.
|
{
|
||||||
|
CPU_RUNNING = 0,
|
||||||
|
CPU_RUNNINGDEBUG = 1,
|
||||||
|
CPU_STEPPING = 2,
|
||||||
|
CPU_POWERDOWN = 3,
|
||||||
|
};
|
||||||
|
|
||||||
void Init();
|
extern PowerPCState ppcState;
|
||||||
void Shutdown();
|
extern volatile CPUState state; // Execution engines should poll this to know when to exit.
|
||||||
void DoState(PointerWrap &p);
|
|
||||||
|
|
||||||
void SetMode(CoreMode _coreType);
|
void Init();
|
||||||
|
void Shutdown();
|
||||||
|
void DoState(PointerWrap &p);
|
||||||
|
|
||||||
void SingleStep();
|
void SetMode(CoreMode _coreType);
|
||||||
void CheckExceptions();
|
|
||||||
void RunLoop();
|
|
||||||
void Start();
|
|
||||||
void Pause();
|
|
||||||
void Stop();
|
|
||||||
|
|
||||||
void OnIdle(u32 _uThreadAddr);
|
void SingleStep();
|
||||||
}
|
void CheckExceptions();
|
||||||
|
void RunLoop();
|
||||||
|
void Start();
|
||||||
|
void Pause();
|
||||||
|
void Stop();
|
||||||
|
|
||||||
// Easy register access macros.
|
void OnIdle(u32 _uThreadAddr);
|
||||||
|
|
||||||
|
// Easy register access macros.
|
||||||
#define HID2 ((UReg_HID2&)PowerPC::ppcState.spr[SPR_HID2])
|
#define HID2 ((UReg_HID2&)PowerPC::ppcState.spr[SPR_HID2])
|
||||||
#define DMAU (*(UReg_DMAU*)&PowerPC::ppcState.spr[SPR_DMAU])
|
#define DMAU (*(UReg_DMAU*)&PowerPC::ppcState.spr[SPR_DMAU])
|
||||||
#define DMAL (*(UReg_DMAL*)&PowerPC::ppcState.spr[SPR_DMAL])
|
#define DMAL (*(UReg_DMAL*)&PowerPC::ppcState.spr[SPR_DMAL])
|
||||||
#define XER ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER])
|
|
||||||
#define PC PowerPC::ppcState.pc
|
#define PC PowerPC::ppcState.pc
|
||||||
#define NPC PowerPC::ppcState.npc
|
#define NPC PowerPC::ppcState.npc
|
||||||
#define CR PowerPC::ppcState.cr
|
|
||||||
#define FPSCR ((UReg_FPSCR&)PowerPC::ppcState.fpscr)
|
#define FPSCR ((UReg_FPSCR&)PowerPC::ppcState.fpscr)
|
||||||
#define MSR PowerPC::ppcState.msr
|
#define MSR PowerPC::ppcState.msr
|
||||||
#define GPR(n) PowerPC::ppcState.gpr[n]
|
#define GPR(n) PowerPC::ppcState.gpr[n]
|
||||||
|
@ -121,11 +119,13 @@ namespace PowerPC
|
||||||
#define riPS0(i) (*(u64*)(&PowerPC::ppcState.ps[i][0]))
|
#define riPS0(i) (*(u64*)(&PowerPC::ppcState.ps[i][0]))
|
||||||
#define riPS1(i) (*(u64*)(&PowerPC::ppcState.ps[i][1]))
|
#define riPS1(i) (*(u64*)(&PowerPC::ppcState.ps[i][1]))
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
// Wrappers to make it easier to in the future completely replace the storage of CR and Carry bits
|
// Wrappers to make it easier to in the future completely replace the storage of CR and Carry bits
|
||||||
// to something more x86-friendly. These are not used 100% consistently yet - and if we do this, we
|
// to something more x86-friendly. These are not used 100% consistently yet - and if we do this, we
|
||||||
// need the corresponding stuff on the JIT side too.
|
// need the corresponding stuff on the JIT side too.
|
||||||
|
|
||||||
|
// These are intended to stay fast, probably become faster, and are not likely to slow down much if at all.
|
||||||
inline void SetCRField(int cr_field, int value) {
|
inline void SetCRField(int cr_field, int value) {
|
||||||
PowerPC::ppcState.cr = (PowerPC::ppcState.cr & (~(0xF0000000 >> (cr_field * 4)))) | (value << ((7 - cr_field) * 4));
|
PowerPC::ppcState.cr = (PowerPC::ppcState.cr & (~(0xF0000000 >> (cr_field * 4)))) | (value << ((7 - cr_field) * 4));
|
||||||
}
|
}
|
||||||
|
@ -135,9 +135,10 @@ inline u32 GetCRField(int cr_field) {
|
||||||
}
|
}
|
||||||
|
|
||||||
inline u32 GetCRBit(int bit) {
|
inline u32 GetCRBit(int bit) {
|
||||||
return (CR >> (31 - bit)) & 1;
|
return (PowerPC::ppcState.cr >> (31 - bit)) & 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetCR and GetCR may become fairly slow soon. Should be avoided if possible.
|
||||||
inline void SetCR(u32 new_cr) {
|
inline void SetCR(u32 new_cr) {
|
||||||
PowerPC::ppcState.cr = new_cr;
|
PowerPC::ppcState.cr = new_cr;
|
||||||
}
|
}
|
||||||
|
@ -146,12 +147,29 @@ inline u32 GetCR() {
|
||||||
return PowerPC::ppcState.cr;
|
return PowerPC::ppcState.cr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetCarry/GetCarry may speed up soon.
|
||||||
inline void SetCarry(int ca) {
|
inline void SetCarry(int ca) {
|
||||||
XER.CA = ca;
|
((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).CA = ca;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int GetCarry() {
|
inline int GetCarry() {
|
||||||
return XER.CA;
|
return ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).CA;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline UReg_XER GetXER() {
|
||||||
|
return ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void SetXER(UReg_XER new_xer) {
|
||||||
|
((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]) = new_xer;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline int GetXER_SO() {
|
||||||
|
return ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).SO;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void SetXER_SO(int value) {
|
||||||
|
((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).SO = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue