Consolidate some compare instructions in JIT, preparations for separate CR flag storage, misc other cleanup in cpu core.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1547 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2008-12-15 19:22:34 +00:00
parent 4b5cfed314
commit 866d4e6bc8
17 changed files with 240 additions and 228 deletions

View File

@ -672,7 +672,8 @@ bool Init()
else else
InitHWMemFuncs(); InitHWMemFuncs();
LOG(MEMMAP, "Memory system initialized. RAM at %p (0x80000000 @ %p)", base, base + 0x80000000); LOG(MEMMAP, "Memory system initialized. RAM at %p (mirrors at 0 @ %p, 0x80000000 @ %p , 0xC0000000 @ %p)",
m_pRAM, m_pPhysicalRAM, m_pVirtualCachedRAM, m_pVirtualUncachedRAM);
m_IsInitialized = true; m_IsInitialized = true;
return true; return true;
} }

View File

@ -117,7 +117,7 @@ void andis_rc(UGeckoInstruction _inst)
void cmpi(UGeckoInstruction _inst) void cmpi(UGeckoInstruction _inst)
{ {
Helper_UpdateCRx(_inst.CRFD, m_GPR[_inst.RA]-_inst.SIMM_16); Helper_UpdateCRx(_inst.CRFD, m_GPR[_inst.RA] - _inst.SIMM_16);
} }
void cmpli(UGeckoInstruction _inst) void cmpli(UGeckoInstruction _inst)
@ -128,7 +128,7 @@ void cmpli(UGeckoInstruction _inst)
if (a < b) f = 0x8; if (a < b) f = 0x8;
else if (a > b) f = 0x4; else if (a > b) f = 0x4;
else f = 0x2; //equals else f = 0x2; //equals
if (XER.SO) f = 0x1; if (GetXER_SO()) f |= 0x1;
SetCRField(_inst.CRFD, f); SetCRField(_inst.CRFD, f);
} }
@ -151,13 +151,12 @@ void subfic(UGeckoInstruction _inst)
{ {
/* u32 rra = ~m_GPR[_inst.RA]; /* u32 rra = ~m_GPR[_inst.RA];
s32 immediate = (s16)_inst.SIMM_16 + 1; s32 immediate = (s16)_inst.SIMM_16 + 1;
// #define CALC_XER_CA(X,Y) (((X) + (Y) < X) ? SET_XER_CA : CLEAR_XER_CA) // #define CALC_XER_CA(X,Y) (((X) + (Y) < X) ? SET_XER_CA : CLEAR_XER_CA)
if ((rra + immediate) < rra) if ((rra + immediate) < rra)
XER.CA = 1; SetCarry(1);
else else
XER.CA = 0; SetCarry(0);
m_GPR[_inst.RD] = rra - immediate; m_GPR[_inst.RD] = rra - immediate;
*/ */
@ -227,11 +226,10 @@ void cmp(UGeckoInstruction _inst)
s32 a = (s32)m_GPR[_inst.RA]; s32 a = (s32)m_GPR[_inst.RA];
s32 b = (s32)m_GPR[_inst.RB]; s32 b = (s32)m_GPR[_inst.RB];
int fTemp = 0x8; // a < b int fTemp = 0x8; // a < b
// if (a < b) fTemp = 0x8; else
// if (a < b) fTemp = 0x8; else if (a > b) fTemp = 0x4;
if (a > b) fTemp = 0x4;
else if (a == b) fTemp = 0x2; else if (a == b) fTemp = 0x2;
if (XER.SO) PanicAlert("cmp getting overflow flag"); // fTemp |= 0x1 if (GetXER_SO()) PanicAlert("cmp getting overflow flag"); // fTemp |= 0x1
SetCRField(_inst.CRFD, fTemp); SetCRField(_inst.CRFD, fTemp);
} }
@ -241,10 +239,10 @@ void cmpl(UGeckoInstruction _inst)
u32 b = m_GPR[_inst.RB]; u32 b = m_GPR[_inst.RB];
u32 fTemp = 0x8; // a < b u32 fTemp = 0x8; // a < b
// if (a < b) fTemp = 0x8;else // if (a < b) fTemp = 0x8;else
if (a > b) fTemp = 0x4; if (a > b) fTemp = 0x4;
else if (a == b) fTemp = 0x2; else if (a == b) fTemp = 0x2;
if (XER.SO) PanicAlert("cmpl getting overflow flag"); // fTemp |= 0x1; if (GetXER_SO()) PanicAlert("cmpl getting overflow flag"); // fTemp |= 0x1;
SetCRField(_inst.CRFD, fTemp); SetCRField(_inst.CRFD, fTemp);
} }

View File

@ -28,6 +28,11 @@
namespace Interpreter namespace Interpreter
{ {
// TODO: These should really be in the save state, although it's unlikely to matter much.
// They are for lwarx and its friend stwcxd.
static bool g_bReserve = false;
static u32 g_reserveAddr;
u32 Helper_Get_EA(const UGeckoInstruction _inst) u32 Helper_Get_EA(const UGeckoInstruction _inst)
{ {
return _inst.RA ? (m_GPR[_inst.RA] + _inst.SIMM_16) : _inst.SIMM_16; return _inst.RA ? (m_GPR[_inst.RA] + _inst.SIMM_16) : _inst.SIMM_16;
@ -581,37 +586,32 @@ void stwbrx(UGeckoInstruction _inst)
// The following two instructions are for SMP communications. On a single // The following two instructions are for SMP communications. On a single
// CPU, they cannot fail unless an interrupt happens in between, which usually // CPU, they cannot fail unless an interrupt happens in between.
// won't happen with the JIT.
bool g_bReserve = false;
u32 g_reserveAddr;
void lwarx(UGeckoInstruction _inst) void lwarx(UGeckoInstruction _inst)
{ {
u32 uAddress = Helper_Get_EA_X(_inst); u32 uAddress = Helper_Get_EA_X(_inst);
m_GPR[_inst.RD] = Memory::Read_U32(uAddress); m_GPR[_inst.RD] = Memory::Read_U32(uAddress);
g_bReserve = true;
g_reserveAddr = uAddress; g_bReserve = true;
g_reserveAddr = uAddress;
} }
void stwcxd(UGeckoInstruction _inst) void stwcxd(UGeckoInstruction _inst)
{ {
// Stores Word Conditional indeXed // Stores Word Conditional indeXed
u32 uAddress;
u32 uAddress; if (g_bReserve) {
if(g_bReserve) {
uAddress = Helper_Get_EA_X(_inst); uAddress = Helper_Get_EA_X(_inst);
if(uAddress == g_reserveAddr) { if (uAddress == g_reserveAddr) {
Memory::Write_U32(m_GPR[_inst.RS], uAddress); Memory::Write_U32(m_GPR[_inst.RS], uAddress);
g_bReserve = false; g_bReserve = false;
SetCRField(0, 2 | XER.SO); SetCRField(0, 2 | GetXER_SO());
return; return;
} }
} }
SetCRField(0, XER.SO); SetCRField(0, GetXER_SO());
} }
void stwux(UGeckoInstruction _inst) void stwux(UGeckoInstruction _inst)

View File

@ -229,7 +229,6 @@ void ps_cmpu1(UGeckoInstruction _inst)
if (fa < fb) compareResult = 8; if (fa < fb) compareResult = 8;
else if (fa > fb) compareResult = 4; else if (fa > fb) compareResult = 4;
else compareResult = 2; else compareResult = 2;
SetCRField(_inst.CRFD, compareResult); SetCRField(_inst.CRFD, compareResult);
} }

View File

@ -226,8 +226,9 @@ void mtfsfx(UGeckoInstruction _inst)
void mcrxr(UGeckoInstruction _inst) void mcrxr(UGeckoInstruction _inst)
{ {
SetCRField(_inst.CRFD, XER.Hex >> 28); // USES_XER
XER.Hex &= ~0xF0000000; // clear 0-3 SetCRField(_inst.CRFD, PowerPC::ppcState.spr[SPR_XER] >> 28);
PowerPC::ppcState.spr[SPR_XER] &= ~0xF0000000; // clear 0-3
} }
void mfcr(UGeckoInstruction _inst) void mfcr(UGeckoInstruction _inst)

View File

@ -385,6 +385,7 @@ namespace Jit64
js.instructionNumber = i; js.instructionNumber = i;
if (i == (int)size - 1) { if (i == (int)size - 1) {
js.isLastInstruction = true; js.isLastInstruction = true;
js.next_inst = 0;
if (Profiler::g_ProfileBlocks) { if (Profiler::g_ProfileBlocks) {
// CAUTION!!! push on stack regs you use, do your stuff, then pop // CAUTION!!! push on stack regs you use, do your stuff, then pop
PROFILER_VPUSH; PROFILER_VPUSH;
@ -394,6 +395,9 @@ namespace Jit64
PROFILER_ADD_DIFF_LARGE_INTEGER(&b.ticCounter, &b.ticStop, &b.ticStart); PROFILER_ADD_DIFF_LARGE_INTEGER(&b.ticCounter, &b.ticStop, &b.ticStart);
PROFILER_VPOP; PROFILER_VPOP;
} }
} else {
// help peephole optimizations
js.next_inst = ops[i + 1].inst;
} }
// const GekkoOpInfo *info = GetOpInfo(); // const GekkoOpInfo *info = GetOpInfo();

View File

@ -49,6 +49,7 @@ namespace Jit64
{ {
u32 compilerPC; u32 compilerPC;
u32 blockStart; u32 blockStart;
UGeckoInstruction next_inst; // for easy peephole opt.
int blockSize; int blockSize;
int instructionNumber; int instructionNumber;
int downcountAmount; int downcountAmount;
@ -142,10 +143,8 @@ namespace Jit64
void fcmpx(UGeckoInstruction inst); void fcmpx(UGeckoInstruction inst);
void fmrx(UGeckoInstruction inst); void fmrx(UGeckoInstruction inst);
void cmpli(UGeckoInstruction inst); void cmpXi(UGeckoInstruction inst);
void cmpi(UGeckoInstruction inst); void cmpX(UGeckoInstruction inst);
void cmpl(UGeckoInstruction inst);
void cmp(UGeckoInstruction inst);
void cntlzwx(UGeckoInstruction inst); void cntlzwx(UGeckoInstruction inst);

View File

@ -324,18 +324,19 @@ void GenFifoXmm64Write()
void GenerateCommon() void GenerateCommon()
{ {
// USES_CR
computeRc = AlignCode16(); computeRc = AlignCode16();
AND(32, M(&CR), Imm32(0x0FFFFFFF)); AND(32, M(&PowerPC::ppcState.cr), Imm32(0x0FFFFFFF));
CMP(32, R(EAX), Imm8(0)); CMP(32, R(EAX), Imm8(0));
FixupBranch pLesser = J_CC(CC_L); FixupBranch pLesser = J_CC(CC_L);
FixupBranch pGreater = J_CC(CC_G); FixupBranch pGreater = J_CC(CC_G);
OR(32, M(&CR), Imm32(0x20000000)); // _x86Reg == 0 OR(32, M(&PowerPC::ppcState.cr), Imm32(0x20000000)); // _x86Reg == 0
RET(); RET();
SetJumpTarget(pGreater); SetJumpTarget(pGreater);
OR(32, M(&CR), Imm32(0x40000000)); // _x86Reg > 0 OR(32, M(&PowerPC::ppcState.cr), Imm32(0x40000000)); // _x86Reg > 0
RET(); RET();
SetJumpTarget(pLesser); SetJumpTarget(pLesser);
OR(32, M(&CR), Imm32(0x80000000)); // _x86Reg < 0 OR(32, M(&PowerPC::ppcState.cr), Imm32(0x80000000)); // _x86Reg < 0
RET(); RET();
fifoDirectWrite8 = AlignCode4(); fifoDirectWrite8 = AlignCode4();

View File

@ -108,6 +108,7 @@ namespace Jit64
// variants of this instruction. // variants of this instruction.
void bcx(UGeckoInstruction inst) void bcx(UGeckoInstruction inst)
{ {
// USES_CR
_assert_msg_(DYNA_REC, js.isLastInstruction, "bcx not last instruction of block"); _assert_msg_(DYNA_REC, js.isLastInstruction, "bcx not last instruction of block");
gpr.Flush(FLUSH_ALL); gpr.Flush(FLUSH_ALL);
@ -124,7 +125,7 @@ namespace Jit64
if ((inst.BO & 16) == 0) // Test a CR bit if ((inst.BO & 16) == 0) // Test a CR bit
{ {
TEST(32, M(&CR), Imm32(0x80000000 >> inst.BI)); TEST(32, M(&PowerPC::ppcState.cr), Imm32(0x80000000 >> inst.BI));
if (inst.BO & 8) // Conditional branch if (inst.BO & 8) // Conditional branch
branch = CC_NZ; branch = CC_NZ;
else else
@ -181,14 +182,14 @@ namespace Jit64
{ {
skip = J_CC(branch); skip = J_CC(branch);
} }
u32 destination; u32 destination;
if (inst.LK) if (inst.LK)
MOV(32, M(&LR), Imm32(js.compilerPC + 4)); MOV(32, M(&LR), Imm32(js.compilerPC + 4));
if(inst.AA) if(inst.AA)
destination = SignExt16(inst.BD << 2); destination = SignExt16(inst.BD << 2);
else else
destination = js.compilerPC + SignExt16(inst.BD << 2); destination = js.compilerPC + SignExt16(inst.BD << 2);
WriteExit(destination, 0); WriteExit(destination, 0);
if (inst.BO != 20) if (inst.BO != 20)
{ {
SetJumpTarget(skip); SetJumpTarget(skip);

View File

@ -205,8 +205,8 @@ namespace Jit64
{ {
fpr.LoadToX64(a, true); fpr.LoadToX64(a, true);
} }
// USES_CR
AND(32, M(&CR), Imm32(~(0xF0000000 >> shift))); AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xF0000000 >> shift)));
if (ordered) if (ordered)
COMISD(fpr.R(a).GetSimpleReg(), fpr.R(b)); COMISD(fpr.R(a).GetSimpleReg(), fpr.R(b));
else else
@ -226,7 +226,7 @@ namespace Jit64
SetJumpTarget(continue1); SetJumpTarget(continue1);
SetJumpTarget(continue2); SetJumpTarget(continue2);
SHR(32, R(EAX), Imm8(shift)); SHR(32, R(EAX), Imm8(shift));
OR(32, M(&CR), R(EAX)); OR(32, M(&PowerPC::ppcState.cr), R(EAX));
fpr.UnlockAll(); fpr.UnlockAll();
} }

View File

@ -24,6 +24,7 @@
#include "JitCache.h" #include "JitCache.h"
#include "JitRegCache.h" #include "JitRegCache.h"
#include "JitAsm.h" #include "JitAsm.h"
#include "Jit_Util.h"
// #define INSTRUCTION_START Default(inst); return; // #define INSTRUCTION_START Default(inst); return;
#define INSTRUCTION_START #define INSTRUCTION_START
@ -32,10 +33,11 @@ namespace Jit64
{ {
// Assumes that the flags were just set through an addition. // Assumes that the flags were just set through an addition.
void GenerateCarry(X64Reg temp_reg) { void GenerateCarry(X64Reg temp_reg) {
// USES_XER
SETcc(CC_C, R(temp_reg)); SETcc(CC_C, R(temp_reg));
AND(32, M(&XER), Imm32(~(1 << 29))); AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~(1 << 29)));
SHL(32, R(temp_reg), Imm8(29)); SHL(32, R(temp_reg), Imm8(29));
OR(32, M(&XER), R(temp_reg)); OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(temp_reg));
} }
typedef u32 (*Operation)(u32 a, u32 b); typedef u32 (*Operation)(u32 a, u32 b);
@ -133,26 +135,49 @@ namespace Jit64
} }
} }
/*
if (js.next_inst.OPCD == 16) { // bcx
if (!js.next_inst.LK && (js.next_inst.BO & BO_DONT_DECREMENT_FLAG))
{
// it's clear there's plenty of opportunity.
//PanicAlert("merge");
}
}
*/
// unsigned // unsigned
void cmpli(UGeckoInstruction inst) void cmpXi(UGeckoInstruction inst)
{ {
// Should check if the next intruction is a branch - if it is, merge the two. This can save // USES_CR
// a whole bunch of instructions and cycles, especially if we aggressively bubble down compares
// towards branches.
#ifdef JIT_OFF_OPTIONS #ifdef JIT_OFF_OPTIONS
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
{Default(inst); return;} // turn off from debugger {Default(inst); return;} // turn off from debugger
#endif #endif
// Should check if the next intruction is a branch - if it is, merge the two. This can save
// a whole bunch of instructions and cycles, especially if we aggressively bubble down compares
// towards branches.
INSTRUCTION_START; INSTRUCTION_START;
int a = inst.RA; int a = inst.RA;
u32 uimm = inst.UIMM;
int crf = inst.CRFD; int crf = inst.CRFD;
int shift = crf * 4; int shift = crf * 4;
Gen::CCFlags less_than, greater_than;
OpArg comparand;
if (inst.OPCD == 10) {
less_than = CC_B;
greater_than = CC_A;
comparand = Imm32(inst.UIMM);
} else {
less_than = CC_L;
greater_than = CC_G;
comparand = Imm32((s32)(s16)inst.UIMM);
}
gpr.KillImmediate(a); // todo, optimize instead, but unlikely to make a difference gpr.KillImmediate(a); // todo, optimize instead, but unlikely to make a difference
AND(32, M(&CR), Imm32(~(0xF0000000 >> (crf*4)))); AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xF0000000 >> (crf*4))));
CMP(32, gpr.R(a), Imm32(uimm)); CMP(32, gpr.R(a), comparand);
FixupBranch pLesser = J_CC(CC_B); FixupBranch pLesser = J_CC(less_than);
FixupBranch pGreater = J_CC(CC_A); FixupBranch pGreater = J_CC(greater_than);
MOV(32, R(EAX), Imm32(0x20000000 >> shift)); // _x86Reg == 0 MOV(32, R(EAX), Imm32(0x20000000 >> shift)); // _x86Reg == 0
FixupBranch continue1 = J(); FixupBranch continue1 = J();
@ -165,44 +190,17 @@ namespace Jit64
MOV(32, R(EAX), Imm32(0x80000000 >> shift));// _x86Reg < 0 MOV(32, R(EAX), Imm32(0x80000000 >> shift));// _x86Reg < 0
SetJumpTarget(continue1); SetJumpTarget(continue1);
SetJumpTarget(continue2); SetJumpTarget(continue2);
OR(32, M(&CR), R(EAX)); OR(32, M(&PowerPC::ppcState.cr), R(EAX));
// TODO: Add extra code at the end for the "taken" case. Jump to it from the matching branches.
// Since it's the last block, some liberties can be taken.
// don't forget to flush registers AFTER the cmp BEFORE the jmp. Flushing doesn't affect flags.
} }
// signed // signed
void cmpi(UGeckoInstruction inst) void cmpX(UGeckoInstruction inst)
{
#ifdef JIT_OFF_OPTIONS
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
{Default(inst); return;} // turn off from debugger
#endif
INSTRUCTION_START;
int a = inst.RA;
s32 simm = (s32)(s16)inst.UIMM;
int crf = inst.CRFD;
int shift = crf * 4;
gpr.KillImmediate(a); // todo, optimize instead, but unlikely to make a difference
AND(32, M(&CR), Imm32(~(0xF0000000 >> (crf*4))));
CMP(32, gpr.R(a), Imm32(simm));
FixupBranch pLesser = J_CC(CC_L);
FixupBranch pGreater = J_CC(CC_G);
// _x86Reg == 0
MOV(32, R(EAX), Imm32(0x20000000 >> shift));
FixupBranch continue1 = J();
// _x86Reg > 0
SetJumpTarget(pGreater);
MOV(32, R(EAX), Imm32(0x40000000 >> shift));
FixupBranch continue2 = J();
// _x86Reg < 0
SetJumpTarget(pLesser);
MOV(32, R(EAX), Imm32(0x80000000 >> shift));
SetJumpTarget(continue1);
SetJumpTarget(continue2);
OR(32, M(&CR), R(EAX));
}
// signed
void cmp(UGeckoInstruction inst)
{ {
// USES_CR
#ifdef JIT_OFF_OPTIONS #ifdef JIT_OFF_OPTIONS
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
{Default(inst); return;} // turn off from debugger {Default(inst); return;} // turn off from debugger
@ -212,12 +210,21 @@ namespace Jit64
int b = inst.RB; int b = inst.RB;
int crf = inst.CRFD; int crf = inst.CRFD;
int shift = crf * 4; int shift = crf * 4;
Gen::CCFlags less_than, greater_than;
Gen::OpArg comparand = gpr.R(b);
if (inst.SUBOP10 == 32) {
less_than = CC_B;
greater_than = CC_A;
} else {
less_than = CC_L;
greater_than = CC_G;
}
gpr.Lock(a, b); gpr.Lock(a, b);
gpr.LoadToX64(a, true, false); gpr.LoadToX64(a, true, false);
AND(32, M(&CR), Imm32(~(0xF0000000 >> (crf*4)))); AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xF0000000 >> (crf*4))));
CMP(32, gpr.R(a), gpr.R(b)); CMP(32, gpr.R(a), comparand);
FixupBranch pLesser = J_CC(CC_L); FixupBranch pLesser = J_CC(less_than);
FixupBranch pGreater = J_CC(CC_G); FixupBranch pGreater = J_CC(greater_than);
// _x86Reg == 0 // _x86Reg == 0
MOV(32, R(EAX), Imm32(0x20000000 >> shift)); MOV(32, R(EAX), Imm32(0x20000000 >> shift));
FixupBranch continue1 = J(); FixupBranch continue1 = J();
@ -230,41 +237,7 @@ namespace Jit64
MOV(32, R(EAX), Imm32(0x80000000 >> shift)); MOV(32, R(EAX), Imm32(0x80000000 >> shift));
SetJumpTarget(continue1); SetJumpTarget(continue1);
SetJumpTarget(continue2); SetJumpTarget(continue2);
OR(32, M(&CR), R(EAX)); OR(32, M(&PowerPC::ppcState.cr), R(EAX));
gpr.UnlockAll();
}
// unsigned
void cmpl(UGeckoInstruction inst)
{
#ifdef JIT_OFF_OPTIONS
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
{Default(inst); return;} // turn off from debugger
#endif
INSTRUCTION_START;
int a = inst.RA;
int b = inst.RB;
int crf = inst.CRFD;
int shift = crf * 4;
gpr.Lock(a, b);
gpr.LoadToX64(a, true, false);
AND(32, M(&CR), Imm32(~(0xF0000000 >> (crf*4))));
CMP(32, gpr.R(a), gpr.R(b));
FixupBranch pLesser = J_CC(CC_B);
FixupBranch pGreater = J_CC(CC_A);
// _x86Reg == 0
MOV(32, R(EAX), Imm32(0x20000000 >> shift));
FixupBranch continue1 = J();
// _x86Reg > 0
SetJumpTarget(pGreater);
MOV(32, R(EAX), Imm32(0x40000000 >> shift));
FixupBranch continue2 = J();
// _x86Reg < 0
SetJumpTarget(pLesser);
MOV(32, R(EAX), Imm32(0x80000000 >> shift));
SetJumpTarget(continue1);
SetJumpTarget(continue2);
OR(32, M(&CR), R(EAX));
gpr.UnlockAll(); gpr.UnlockAll();
} }
@ -652,6 +625,7 @@ namespace Jit64
// This can be optimized // This can be optimized
void addex(UGeckoInstruction inst) void addex(UGeckoInstruction inst)
{ {
// USES_XER
#ifdef JIT_OFF_OPTIONS #ifdef JIT_OFF_OPTIONS
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
{Default(inst); return;} // turn off from debugger {Default(inst); return;} // turn off from debugger
@ -664,7 +638,7 @@ namespace Jit64
gpr.LoadToX64(d, false); gpr.LoadToX64(d, false);
else else
gpr.LoadToX64(d, true); gpr.LoadToX64(d, true);
MOV(32, R(EAX), M(&XER)); MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER]));
SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag
MOV(32, R(EAX), gpr.R(a)); MOV(32, R(EAX), gpr.R(a));
ADC(32, R(EAX), gpr.R(b)); ADC(32, R(EAX), gpr.R(b));
@ -895,6 +869,7 @@ namespace Jit64
void srawx(UGeckoInstruction inst) void srawx(UGeckoInstruction inst)
{ {
// USES_XER
#ifdef JIT_OFF_OPTIONS #ifdef JIT_OFF_OPTIONS
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
{Default(inst); return;} // turn off from debugger {Default(inst); return;} // turn off from debugger
@ -919,17 +894,17 @@ namespace Jit64
CMP(32, R(EAX), Imm32(-1)); CMP(32, R(EAX), Imm32(-1));
SETcc(CC_L, R(EAX)); SETcc(CC_L, R(EAX));
SAR(32, gpr.R(a), R(ECX)); SAR(32, gpr.R(a), R(ECX));
AND(32, M(&XER), Imm32(~(1 << 29))); AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~(1 << 29)));
SHL(32, R(EAX), Imm8(29)); SHL(32, R(EAX), Imm8(29));
OR(32, M(&XER), R(EAX)); OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(EAX));
FixupBranch end = J(); FixupBranch end = J();
SetJumpTarget(topBitSet); SetJumpTarget(topBitSet);
MOV(32, R(EAX), gpr.R(s)); MOV(32, R(EAX), gpr.R(s));
SAR(32, R(EAX), Imm8(31)); SAR(32, R(EAX), Imm8(31));
MOV(32, gpr.R(a), R(EAX)); MOV(32, gpr.R(a), R(EAX));
AND(32, M(&XER), Imm32(~(1 << 29))); AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~(1 << 29)));
AND(32, R(EAX), Imm32(1<<29)); AND(32, R(EAX), Imm32(1<<29));
OR(32, M(&XER), R(EAX)); OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(EAX));
SetJumpTarget(end); SetJumpTarget(end);
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
@ -961,11 +936,11 @@ namespace Jit64
FixupBranch nocarry1 = J_CC(CC_GE); FixupBranch nocarry1 = J_CC(CC_GE);
TEST(32, R(EAX), Imm32((u32)0xFFFFFFFF >> (32 - amount))); // were any 1s shifted out? TEST(32, R(EAX), Imm32((u32)0xFFFFFFFF >> (32 - amount))); // were any 1s shifted out?
FixupBranch nocarry2 = J_CC(CC_Z); FixupBranch nocarry2 = J_CC(CC_Z);
OR(32, M(&XER), Imm32(XER_CA_MASK)); //XER.CA = 1 JitSetCA();
FixupBranch carry = J(false); FixupBranch carry = J(false);
SetJumpTarget(nocarry1); SetJumpTarget(nocarry1);
SetJumpTarget(nocarry2); SetJumpTarget(nocarry2);
AND(32, M(&XER), Imm32(~XER_CA_MASK)); //XER.CA = 0 JitClearCA();
SetJumpTarget(carry); SetJumpTarget(carry);
gpr.UnlockAll(); gpr.UnlockAll();
} }
@ -973,7 +948,7 @@ namespace Jit64
{ {
Default(inst); return; Default(inst); return;
gpr.Lock(a, s); gpr.Lock(a, s);
AND(32, M(&XER), Imm32(~XER_CA_MASK)); //XER.CA = 0 JitClearCA();
gpr.LoadToX64(a, a == s, true); gpr.LoadToX64(a, a == s, true);
if (a != s) if (a != s)
MOV(32, gpr.R(a), gpr.R(s)); MOV(32, gpr.R(a), gpr.R(s));

View File

@ -37,6 +37,16 @@
namespace Jit64 namespace Jit64
{ {
void JitClearCA()
{
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0
}
void JitSetCA()
{
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1
}
void UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend) void UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend)
{ {
#ifdef _M_IX86 #ifdef _M_IX86

View File

@ -33,4 +33,7 @@ void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address);
void ForceSinglePrecisionS(X64Reg xmm); void ForceSinglePrecisionS(X64Reg xmm);
void ForceSinglePrecisionP(X64Reg xmm); void ForceSinglePrecisionP(X64Reg xmm);
void JitClearCA();
void JitSetCA();
} // namespace } // namespace

View File

@ -285,19 +285,10 @@ void FixUpInternalBranches(CodeOp *code, int begin, int end)
} }
} }
void ShuffleUp(CodeOp *code, int first, int last)
{
CodeOp temp = code[first];
for (int i = first; i < last; i++)
code[i] = code[i + 1];
code[last] = temp;
}
// IMPORTANT - CURRENTLY ASSUMES THAT A IS A COMPARE // IMPORTANT - CURRENTLY ASSUMES THAT A IS A COMPARE
bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b) bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
{ {
// Disabled for now return false; // Currently deactivated in SVN.
return false;
const GekkoOPInfo *a_info = GetOpInfo(a.inst); const GekkoOPInfo *a_info = GetOpInfo(a.inst);
const GekkoOPInfo *b_info = GetOpInfo(b.inst); const GekkoOPInfo *b_info = GetOpInfo(b.inst);
@ -308,7 +299,6 @@ bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
if ((b_flags & (FL_RC_BIT | FL_RC_BIT_F)) && (b.inst.hex & 1)) if ((b_flags & (FL_RC_BIT | FL_RC_BIT_F)) && (b.inst.hex & 1))
return false; return false;
// 10 cmpi, 11 cmpli - we got a compare!
switch (b.inst.OPCD) switch (b.inst.OPCD)
{ {
case 16: case 16:
@ -323,20 +313,34 @@ bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
// For now, only integer ops acceptable. // For now, only integer ops acceptable.
switch (b_info->type) { switch (b_info->type) {
case OPTYPE_INTEGER: case OPTYPE_INTEGER:
case OPTYPE_LOAD:
case OPTYPE_STORE:
case OPTYPE_LOADFP:
case OPTYPE_STOREFP:
break; break;
default: default:
return false; return false;
} }
// Check that we have no register collisions. // Check that we have no register collisions.
// That is, check that none of b's outputs matches any of a's inputs,
// and that none of a's outputs matches any of b's inputs.
// The latter does not apply if a is a cmp, of course, but doesn't hurt to check.
bool no_swap = false; bool no_swap = false;
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
{ {
int regIn = a.regsIn[j]; int regInA = a.regsIn[j];
if (regIn < 0) int regInB = b.regsIn[j];
continue; if (regInA >= 0 &&
if (b.regsOut[0] == regIn || b.regsOut[0] == regInA ||
b.regsOut[1] == regIn) b.regsOut[1] == regInA)
{
// reg collision! don't swap
return false;
}
if (regInB >= 0 &&
a.regsOut[0] == regInB ||
a.regsOut[1] == regInB)
{ {
// reg collision! don't swap // reg collision! don't swap
return false; return false;
@ -346,6 +350,7 @@ bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
return true; return true;
} }
// Does not yet perform inlining - although there are plans for that.
CodeOp *Flatten(u32 address, int &realsize, BlockStats &st, BlockRegStats &gpa, BlockRegStats &fpa) CodeOp *Flatten(u32 address, int &realsize, BlockStats &st, BlockRegStats &gpa, BlockRegStats &fpa)
{ {
int numCycles = 0; int numCycles = 0;
@ -623,9 +628,8 @@ CodeOp *Flatten(u32 address, int &realsize, BlockStats &st, BlockRegStats &gpa,
} }
} }
// Scan for CR0 dependency
//Scan for CR0 dependency // assume next block wants CR0 to be safe
//assume next block wants CR0 to be safe
bool wantsCR0 = true; bool wantsCR0 = true;
bool wantsCR1 = true; bool wantsCR1 = true;
bool wantsPS1 = true; bool wantsPS1 = true;

View File

@ -79,8 +79,6 @@ struct BlockRegStats
void Init(); void Init();
void Shutdown(); void Shutdown();
void ShuffleUp(CodeOp *code, int first, int last);
CodeOp *Flatten(u32 address, int &realsize, BlockStats &st, BlockRegStats &gpa, BlockRegStats &fpa); CodeOp *Flatten(u32 address, int &realsize, BlockStats &st, BlockRegStats &gpa, BlockRegStats &fpa);
void LogFunctionCall(u32 addr); void LogFunctionCall(u32 addr);

View File

@ -141,8 +141,8 @@ GekkoOPTemplate primarytable[] =
{7, Interpreter::mulli, Jit64::mulli, {"mulli", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_RC_BIT, 2}}, {7, Interpreter::mulli, Jit64::mulli, {"mulli", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_RC_BIT, 2}},
{8, Interpreter::subfic, Jit64::subfic, {"subfic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}}, {8, Interpreter::subfic, Jit64::subfic, {"subfic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}},
{10, Interpreter::cmpli, Jit64::cmpli, {"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, {10, Interpreter::cmpli, Jit64::cmpXi, {"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}},
{11, Interpreter::cmpi, Jit64::cmpi, {"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, {11, Interpreter::cmpi, Jit64::cmpXi, {"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}},
{12, Interpreter::addic, Jit64::reg_imm, {"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}}, {12, Interpreter::addic, Jit64::reg_imm, {"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}},
{13, Interpreter::addic_rc, Jit64::reg_imm, {"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0}}, {13, Interpreter::addic_rc, Jit64::reg_imm, {"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0}},
{14, Interpreter::addi, Jit64::reg_imm, {"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}}, {14, Interpreter::addi, Jit64::reg_imm, {"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}},
@ -283,8 +283,8 @@ GekkoOPTemplate table31[] =
{412, Interpreter::orcx, Jit64::Default, {"orcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, {412, Interpreter::orcx, Jit64::Default, {"orcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{476, Interpreter::nandx, Jit64::Default, {"nandx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, {476, Interpreter::nandx, Jit64::Default, {"nandx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{284, Interpreter::eqvx, Jit64::Default, {"eqvx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, {284, Interpreter::eqvx, Jit64::Default, {"eqvx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{0, Interpreter::cmp, Jit64::cmp, {"cmp", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}}, {0, Interpreter::cmp, Jit64::cmpX, {"cmp", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}},
{32, Interpreter::cmpl, Jit64::cmpl, {"cmpl", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}}, {32, Interpreter::cmpl, Jit64::cmpX, {"cmpl", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}},
{26, Interpreter::cntlzwx, Jit64::cntlzwx, {"cntlzwx",OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, {26, Interpreter::cntlzwx, Jit64::cntlzwx, {"cntlzwx",OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
{922, Interpreter::extshx, Jit64::extshx, {"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, {922, Interpreter::extshx, Jit64::extshx, {"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
{954, Interpreter::extsbx, Jit64::extsbx, {"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, {954, Interpreter::extsbx, Jit64::extsbx, {"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},

View File

@ -25,77 +25,75 @@ class PointerWrap;
namespace PowerPC namespace PowerPC
{ {
enum CoreMode
{
MODE_INTERPRETER,
MODE_JIT,
};
// This contains the entire state of the emulated PowerPC "Gekko" CPU. enum CoreMode
struct GC_ALIGNED64(PowerPCState) {
{ MODE_INTERPRETER,
u32 mojs[128]; // Try to isolate the regs from other variables in the cache. MODE_JIT,
u32 gpr[32]; // General purpose registers. r1 = stack pointer. };
// The paired singles are strange : PS0 is stored in the full 64 bits of each FPR // This contains the entire state of the emulated PowerPC "Gekko" CPU.
// but ps calculations are only done in 32-bit precision, and PS1 is only 32 bits. struct GC_ALIGNED64(PowerPCState)
// Since we want to use SIMD, SSE2 is the only viable alternative - 2x double. {
u64 ps[32][2]; u32 mojs[128]; // Try to isolate the regs from other variables in the cache.
u32 gpr[32]; // General purpose registers. r1 = stack pointer.
u32 pc; // program counter // The paired singles are strange : PS0 is stored in the full 64 bits of each FPR
u32 npc; // but ps calculations are only done in 32-bit precision, and PS1 is only 32 bits.
// Since we want to use SIMD, SSE2 is the only viable alternative - 2x double.
u64 ps[32][2];
u32 cr; // flags u32 pc; // program counter
u32 msr; // machine specific register u32 npc;
u32 fpscr; // floating point flags/status bits
// Exception management. u32 cr; // flags
u32 Exceptions; u32 msr; // machine specific register
u32 fpscr; // floating point flags/status bits
u32 sr[16]; // Segment registers. Unused. // Exception management.
u32 Exceptions;
u32 DebugCount; u32 sr[16]; // Segment registers. Unused.
// special purpose registers - controlls quantizers, DMA, and lots of other misc extensions.
// also for power management, but we don't care about that.
u32 spr[1024];
};
enum CPUState u32 DebugCount;
{
CPU_RUNNING = 0, // special purpose registers - controlls quantizers, DMA, and lots of other misc extensions.
CPU_RUNNINGDEBUG = 1, // also for power management, but we don't care about that.
CPU_STEPPING = 2, u32 spr[1024];
CPU_POWERDOWN = 3, };
};
extern PowerPCState ppcState; enum CPUState
extern volatile CPUState state; // Execution engines should poll this to know when to exit. {
CPU_RUNNING = 0,
CPU_RUNNINGDEBUG = 1,
CPU_STEPPING = 2,
CPU_POWERDOWN = 3,
};
void Init(); extern PowerPCState ppcState;
void Shutdown(); extern volatile CPUState state; // Execution engines should poll this to know when to exit.
void DoState(PointerWrap &p);
void SetMode(CoreMode _coreType); void Init();
void Shutdown();
void DoState(PointerWrap &p);
void SingleStep(); void SetMode(CoreMode _coreType);
void CheckExceptions();
void RunLoop();
void Start();
void Pause();
void Stop();
void OnIdle(u32 _uThreadAddr); void SingleStep();
} void CheckExceptions();
void RunLoop();
void Start();
void Pause();
void Stop();
// Easy register access macros. void OnIdle(u32 _uThreadAddr);
// Easy register access macros.
#define HID2 ((UReg_HID2&)PowerPC::ppcState.spr[SPR_HID2]) #define HID2 ((UReg_HID2&)PowerPC::ppcState.spr[SPR_HID2])
#define DMAU (*(UReg_DMAU*)&PowerPC::ppcState.spr[SPR_DMAU]) #define DMAU (*(UReg_DMAU*)&PowerPC::ppcState.spr[SPR_DMAU])
#define DMAL (*(UReg_DMAL*)&PowerPC::ppcState.spr[SPR_DMAL]) #define DMAL (*(UReg_DMAL*)&PowerPC::ppcState.spr[SPR_DMAL])
#define XER ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER])
#define PC PowerPC::ppcState.pc #define PC PowerPC::ppcState.pc
#define NPC PowerPC::ppcState.npc #define NPC PowerPC::ppcState.npc
#define CR PowerPC::ppcState.cr
#define FPSCR ((UReg_FPSCR&)PowerPC::ppcState.fpscr) #define FPSCR ((UReg_FPSCR&)PowerPC::ppcState.fpscr)
#define MSR PowerPC::ppcState.msr #define MSR PowerPC::ppcState.msr
#define GPR(n) PowerPC::ppcState.gpr[n] #define GPR(n) PowerPC::ppcState.gpr[n]
@ -121,11 +119,13 @@ namespace PowerPC
#define riPS0(i) (*(u64*)(&PowerPC::ppcState.ps[i][0])) #define riPS0(i) (*(u64*)(&PowerPC::ppcState.ps[i][0]))
#define riPS1(i) (*(u64*)(&PowerPC::ppcState.ps[i][1])) #define riPS1(i) (*(u64*)(&PowerPC::ppcState.ps[i][1]))
} // namespace
// Wrappers to make it easier to in the future completely replace the storage of CR and Carry bits // Wrappers to make it easier to in the future completely replace the storage of CR and Carry bits
// to something more x86-friendly. These are not used 100% consistently yet - and if we do this, we // to something more x86-friendly. These are not used 100% consistently yet - and if we do this, we
// need the corresponding stuff on the JIT side too. // need the corresponding stuff on the JIT side too.
// These are intended to stay fast, probably become faster, and are not likely to slow down much if at all.
inline void SetCRField(int cr_field, int value) { inline void SetCRField(int cr_field, int value) {
PowerPC::ppcState.cr = (PowerPC::ppcState.cr & (~(0xF0000000 >> (cr_field * 4)))) | (value << ((7 - cr_field) * 4)); PowerPC::ppcState.cr = (PowerPC::ppcState.cr & (~(0xF0000000 >> (cr_field * 4)))) | (value << ((7 - cr_field) * 4));
} }
@ -135,9 +135,10 @@ inline u32 GetCRField(int cr_field) {
} }
inline u32 GetCRBit(int bit) { inline u32 GetCRBit(int bit) {
return (CR >> (31 - bit)) & 1; return (PowerPC::ppcState.cr >> (31 - bit)) & 1;
} }
// SetCR and GetCR may become fairly slow soon. Should be avoided if possible.
inline void SetCR(u32 new_cr) { inline void SetCR(u32 new_cr) {
PowerPC::ppcState.cr = new_cr; PowerPC::ppcState.cr = new_cr;
} }
@ -146,12 +147,29 @@ inline u32 GetCR() {
return PowerPC::ppcState.cr; return PowerPC::ppcState.cr;
} }
// SetCarry/GetCarry may speed up soon.
inline void SetCarry(int ca) { inline void SetCarry(int ca) {
XER.CA = ca; ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).CA = ca;
} }
inline int GetCarry() { inline int GetCarry() {
return XER.CA; return ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).CA;
}
inline UReg_XER GetXER() {
return ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]);
}
inline void SetXER(UReg_XER new_xer) {
((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]) = new_xer;
}
inline int GetXER_SO() {
return ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).SO;
}
inline void SetXER_SO(int value) {
((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).SO = value;
} }
#endif #endif