Somewhat faster CR flag storage. Doesn't really make that much of a difference - but opens a possibility to merge cmp instructions with their following conditional branches in an efficient way.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1549 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
5c831a934b
commit
a44c421d01
|
@ -326,19 +326,18 @@ void GenerateCommon()
|
|||
{
|
||||
// USES_CR
|
||||
computeRc = AlignCode16();
|
||||
AND(32, M(&PowerPC::ppcState.cr), Imm32(0x0FFFFFFF));
|
||||
CMP(32, R(EAX), Imm8(0));
|
||||
FixupBranch pLesser = J_CC(CC_L);
|
||||
FixupBranch pGreater = J_CC(CC_G);
|
||||
OR(32, M(&PowerPC::ppcState.cr), Imm32(0x20000000)); // _x86Reg == 0
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x2)); // _x86Reg == 0
|
||||
RET();
|
||||
SetJumpTarget(pGreater);
|
||||
OR(32, M(&PowerPC::ppcState.cr), Imm32(0x40000000)); // _x86Reg > 0
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x4)); // _x86Reg > 0
|
||||
RET();
|
||||
SetJumpTarget(pLesser);
|
||||
OR(32, M(&PowerPC::ppcState.cr), Imm32(0x80000000)); // _x86Reg < 0
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x8)); // _x86Reg < 0
|
||||
RET();
|
||||
|
||||
|
||||
fifoDirectWrite8 = AlignCode4();
|
||||
GenFifoWrite(8);
|
||||
fifoDirectWrite16 = AlignCode4();
|
||||
|
|
|
@ -125,7 +125,7 @@ namespace Jit64
|
|||
|
||||
if ((inst.BO & 16) == 0) // Test a CR bit
|
||||
{
|
||||
TEST(32, M(&PowerPC::ppcState.cr), Imm32(0x80000000 >> inst.BI));
|
||||
TEST(8, M(&PowerPC::ppcState.cr_fast[inst.BI >> 2]), Imm8(8 >> (inst.BI & 3)));
|
||||
if (inst.BO & 8) // Conditional branch
|
||||
branch = CC_NZ;
|
||||
else
|
||||
|
|
|
@ -202,11 +202,9 @@ namespace Jit64
|
|||
|
||||
fpr.Lock(a,b);
|
||||
if (a != b)
|
||||
{
|
||||
fpr.LoadToX64(a, true);
|
||||
}
|
||||
|
||||
// USES_CR
|
||||
AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xF0000000 >> shift)));
|
||||
if (ordered)
|
||||
COMISD(fpr.R(a).GetSimpleReg(), fpr.R(b));
|
||||
else
|
||||
|
@ -214,19 +212,17 @@ namespace Jit64
|
|||
FixupBranch pLesser = J_CC(CC_B);
|
||||
FixupBranch pGreater = J_CC(CC_A);
|
||||
// _x86Reg == 0
|
||||
MOV(32, R(EAX), Imm32(0x20000000));
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2));
|
||||
FixupBranch continue1 = J();
|
||||
// _x86Reg > 0
|
||||
SetJumpTarget(pGreater);
|
||||
MOV(32, R(EAX), Imm32(0x40000000));
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4));
|
||||
FixupBranch continue2 = J();
|
||||
// _x86Reg < 0
|
||||
SetJumpTarget(pLesser);
|
||||
MOV(32, R(EAX), Imm32(0x80000000));
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8));
|
||||
SetJumpTarget(continue1);
|
||||
SetJumpTarget(continue2);
|
||||
SHR(32, R(EAX), Imm8(shift));
|
||||
OR(32, M(&PowerPC::ppcState.cr), R(EAX));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
|
|
@ -174,23 +174,21 @@ namespace Jit64
|
|||
}
|
||||
|
||||
gpr.KillImmediate(a); // todo, optimize instead, but unlikely to make a difference
|
||||
AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xF0000000 >> (crf*4))));
|
||||
CMP(32, gpr.R(a), comparand);
|
||||
FixupBranch pLesser = J_CC(less_than);
|
||||
FixupBranch pGreater = J_CC(greater_than);
|
||||
|
||||
MOV(32, R(EAX), Imm32(0x20000000 >> shift)); // _x86Reg == 0
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // _x86Reg == 0
|
||||
FixupBranch continue1 = J();
|
||||
|
||||
SetJumpTarget(pGreater);
|
||||
MOV(32, R(EAX), Imm32(0x40000000 >> shift)); // _x86Reg > 0
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // _x86Reg > 0
|
||||
FixupBranch continue2 = J();
|
||||
|
||||
SetJumpTarget(pLesser);
|
||||
MOV(32, R(EAX), Imm32(0x80000000 >> shift));// _x86Reg < 0
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // _x86Reg < 0
|
||||
SetJumpTarget(continue1);
|
||||
SetJumpTarget(continue2);
|
||||
OR(32, M(&PowerPC::ppcState.cr), R(EAX));
|
||||
|
||||
// TODO: Add extra code at the end for the "taken" case. Jump to it from the matching branches.
|
||||
// Since it's the last block, some liberties can be taken.
|
||||
|
@ -221,23 +219,21 @@ namespace Jit64
|
|||
}
|
||||
gpr.Lock(a, b);
|
||||
gpr.LoadToX64(a, true, false);
|
||||
AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xF0000000 >> (crf*4))));
|
||||
CMP(32, gpr.R(a), comparand);
|
||||
FixupBranch pLesser = J_CC(less_than);
|
||||
FixupBranch pGreater = J_CC(greater_than);
|
||||
// _x86Reg == 0
|
||||
MOV(32, R(EAX), Imm32(0x20000000 >> shift));
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // _x86Reg == 0
|
||||
FixupBranch continue1 = J();
|
||||
// _x86Reg > 0
|
||||
|
||||
SetJumpTarget(pGreater);
|
||||
MOV(32, R(EAX), Imm32(0x40000000 >> shift));
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // _x86Reg > 0
|
||||
FixupBranch continue2 = J();
|
||||
// _x86Reg < 0
|
||||
|
||||
SetJumpTarget(pLesser);
|
||||
MOV(32, R(EAX), Imm32(0x80000000 >> shift));
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // _x86Reg < 0
|
||||
SetJumpTarget(continue1);
|
||||
SetJumpTarget(continue2);
|
||||
OR(32, M(&PowerPC::ppcState.cr), R(EAX));
|
||||
gpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
|
|
@ -163,20 +163,39 @@ namespace Jit64
|
|||
// USES_CR
|
||||
int d = inst.RD;
|
||||
gpr.LoadToX64(d, false, true);
|
||||
MOV(32, gpr.R(d), M(&PowerPC::ppcState.cr));
|
||||
MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[0]));
|
||||
SHL(32, R(EAX), Imm8(4));
|
||||
for (int i = 1; i < 7; i++) {
|
||||
OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[i]));
|
||||
SHL(32, R(EAX), Imm8(4));
|
||||
}
|
||||
OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[7]));
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
}
|
||||
|
||||
void mtcrf(UGeckoInstruction inst)
|
||||
{
|
||||
//Default(inst);
|
||||
//return;
|
||||
|
||||
// USES_CR
|
||||
u32 mask = 0;
|
||||
u32 crm = inst.CRM;
|
||||
gpr.FlushLockX(ECX);
|
||||
if (crm == 0xFF) {
|
||||
gpr.FlushLockX(ECX);
|
||||
MOV(32, R(EAX), gpr.R(inst.RS));
|
||||
MOV(32, M(&PowerPC::ppcState.cr), R(EAX));
|
||||
for (int i = 0; i < 8; i++) {
|
||||
MOV(32, R(ECX), R(EAX));
|
||||
SHR(32, R(ECX), Imm8(28 - (i * 4)));
|
||||
AND(32, R(ECX), Imm32(0xF));
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[i]), R(ECX));
|
||||
}
|
||||
gpr.UnlockAllX();
|
||||
} else {
|
||||
//TODO: use lookup table? probably not worth it
|
||||
Default(inst);
|
||||
return;
|
||||
|
||||
// TODO: translate this to work in new CR model.
|
||||
for (int i = 0; i < 8; i++) {
|
||||
if (crm & (1 << i))
|
||||
mask |= 0xF << (i*4);
|
||||
|
@ -188,9 +207,6 @@ namespace Jit64
|
|||
OR(32, R(EAX), R(ECX));
|
||||
MOV(32, M(&PowerPC::ppcState.cr), R(EAX));
|
||||
}
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
|
|
@ -42,6 +42,22 @@ volatile CPUState state = CPU_STEPPING;
|
|||
|
||||
static CoreMode mode;
|
||||
|
||||
void CompactCR()
|
||||
{
|
||||
ppcState.cr = 0;
|
||||
for (int i = 0; i < 8; i++) {
|
||||
ppcState.cr |= ppcState.cr_fast[i] << (28 - i * 4);
|
||||
}
|
||||
}
|
||||
|
||||
void ExpandCR()
|
||||
{
|
||||
for (int i = 0; i < 8; i++) {
|
||||
ppcState.cr_fast[i] = (ppcState.cr >> (28 - i * 4)) & 0xF;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void DoState(PointerWrap &p)
|
||||
{
|
||||
p.Do(ppcState);
|
||||
|
|
|
@ -46,7 +46,9 @@ struct GC_ALIGNED64(PowerPCState)
|
|||
u32 pc; // program counter
|
||||
u32 npc;
|
||||
|
||||
u32 cr; // flags
|
||||
u32 cr; // flags
|
||||
u8 cr_fast[8]; // Possibly reorder to 0, 2, 4, 8, 1, 3, 5, 7 so that we can make Compact and Expand super fast?
|
||||
|
||||
u32 msr; // machine specific register
|
||||
u32 fpscr; // floating point flags/status bits
|
||||
|
||||
|
@ -86,6 +88,9 @@ void Start();
|
|||
void Pause();
|
||||
void Stop();
|
||||
|
||||
void CompactCR();
|
||||
void ExpandCR();
|
||||
|
||||
void OnIdle(u32 _uThreadAddr);
|
||||
|
||||
// Easy register access macros.
|
||||
|
@ -127,23 +132,25 @@ void OnIdle(u32 _uThreadAddr);
|
|||
|
||||
// These are intended to stay fast, probably become faster, and are not likely to slow down much if at all.
|
||||
inline void SetCRField(int cr_field, int value) {
|
||||
PowerPC::ppcState.cr = (PowerPC::ppcState.cr & (~(0xF0000000 >> (cr_field * 4)))) | (value << ((7 - cr_field) * 4));
|
||||
PowerPC::ppcState.cr_fast[cr_field] = value;
|
||||
}
|
||||
|
||||
inline u32 GetCRField(int cr_field) {
|
||||
return (PowerPC::ppcState.cr >> (4 * cr_field)) & 0xF;
|
||||
return PowerPC::ppcState.cr_fast[cr_field];
|
||||
}
|
||||
|
||||
inline u32 GetCRBit(int bit) {
|
||||
return (PowerPC::ppcState.cr >> (31 - bit)) & 1;
|
||||
return (PowerPC::ppcState.cr_fast[bit >> 2] >> (3 - (bit & 3))) & 1;
|
||||
}
|
||||
|
||||
// SetCR and GetCR may become fairly slow soon. Should be avoided if possible.
|
||||
inline void SetCR(u32 new_cr) {
|
||||
PowerPC::ppcState.cr = new_cr;
|
||||
PowerPC::ExpandCR();
|
||||
}
|
||||
|
||||
inline u32 GetCR() {
|
||||
PowerPC::CompactCR();
|
||||
return PowerPC::ppcState.cr;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue