JIT: move mfcr code to JitAsmCommon

It's like 80+ instructions, so inlining it on every use of mfcr is probably
not the best for the icache.
This commit is contained in:
Fiora 2015-01-03 10:52:55 -08:00
parent 821db9798c
commit b058bbd223
4 changed files with 46 additions and 31 deletions

View File

@ -226,6 +226,8 @@ void Jit64AsmRoutineManager::GenerateCommon()
GenFrsqrte();
fres = AlignCode4();
GenFres();
mfcr = AlignCode4();
GenMfcr();
GenQuantizedLoads();
GenQuantizedStores();

View File

@ -406,39 +406,12 @@ void Jit64::mfcr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
// USES_CR
int d = inst.RD;
gpr.FlushLockX(RSCRATCH_EXTRA);
CALL((void *)asm_routines.mfcr);
gpr.Lock(d);
gpr.BindToRegister(d, false, true);
XOR(32, gpr.R(d), gpr.R(d));
X64Reg cr_val = RSCRATCH2;
// we only need to zero the high bits of RSCRATCH once
XOR(32, R(RSCRATCH), R(RSCRATCH));
for (int i = 0; i < 8; i++)
{
static const u8 m_flagTable[8] = {0x0,0x1,0x8,0x9,0x0,0x1,0x8,0x9};
if (i != 0)
SHL(32, gpr.R(d), Imm8(4));
MOV(64, R(cr_val), PPCSTATE(cr_val[i]));
// EQ: Bits 31-0 == 0; set flag bit 1
TEST(32, R(cr_val), R(cr_val));
SETcc(CC_Z, R(RSCRATCH));
LEA(32, gpr.RX(d), MComplex(gpr.RX(d), RSCRATCH, SCALE_2, 0));
// GT: Value > 0; set flag bit 2
TEST(64, R(cr_val), R(cr_val));
SETcc(CC_G, R(RSCRATCH));
LEA(32, gpr.RX(d), MComplex(gpr.RX(d), RSCRATCH, SCALE_4, 0));
// SO: Bit 61 set; set flag bit 0
// LT: Bit 62 set; set flag bit 3
SHR(64, R(cr_val), Imm8(61));
MOVZX(32, 8, RSCRATCH, MDisp(cr_val, (u32)(u64)m_flagTable));
OR(32, gpr.R(d), R(RSCRATCH));
}
MOV(32, gpr.R(d), R(RSCRATCH));
gpr.UnlockAll();
gpr.UnlockAllX();
}

View File

@ -151,6 +151,44 @@ void CommonAsmRoutines::GenFres()
RET();
}
void CommonAsmRoutines::GenMfcr()
{
// Input: none
// Output: RSCRATCH
// This function clobbers all three RSCRATCH.
X64Reg dst = RSCRATCH;
X64Reg tmp = RSCRATCH2;
X64Reg cr_val = RSCRATCH_EXTRA;
XOR(32, R(dst), R(dst));
// we only need to zero the high bits of tmp once
XOR(32, R(tmp), R(tmp));
for (int i = 0; i < 8; i++)
{
static const u32 m_flagTable[8] = { 0x0, 0x1, 0x8, 0x9, 0x0, 0x1, 0x8, 0x9 };
if (i != 0)
SHL(32, R(dst), Imm8(4));
MOV(64, R(cr_val), PPCSTATE(cr_val[i]));
// EQ: Bits 31-0 == 0; set flag bit 1
TEST(32, R(cr_val), R(cr_val));
// FIXME: is there a better way to do this without the partial register merging?
SETcc(CC_Z, R(tmp));
LEA(32, dst, MComplex(dst, tmp, SCALE_2, 0));
// GT: Value > 0; set flag bit 2
TEST(64, R(cr_val), R(cr_val));
SETcc(CC_G, R(tmp));
LEA(32, dst, MComplex(dst, tmp, SCALE_4, 0));
// SO: Bit 61 set; set flag bit 0
// LT: Bit 62 set; set flag bit 3
SHR(64, R(cr_val), Imm8(61));
OR(32, R(dst), MScaled(cr_val, SCALE_4, (u32)(u64)m_flagTable));
}
RET();
}
// Safe + Fast Quantizers, originally from JITIL by magumagu
static const u8 GC_ALIGNED16(pbswapShuffle1x4[16]) = { 3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };

View File

@ -25,6 +25,7 @@ public:
const u8 *frsqrte;
const u8 *fres;
const u8 *mfcr;
// In: array index: GQR to use.
// In: ECX: Address to read from.
@ -58,4 +59,5 @@ public:
void GenFifoWrite(int size);
void GenFrsqrte();
void GenFres();
void GenMfcr();
};