JIT: move mfcr code to JitAsmCommon
It's like 80+ instructions, so inlining it on every use of mfcr is probably not the best for the icache.
This commit is contained in:
parent
821db9798c
commit
b058bbd223
|
@ -226,6 +226,8 @@ void Jit64AsmRoutineManager::GenerateCommon()
|
||||||
GenFrsqrte();
|
GenFrsqrte();
|
||||||
fres = AlignCode4();
|
fres = AlignCode4();
|
||||||
GenFres();
|
GenFres();
|
||||||
|
mfcr = AlignCode4();
|
||||||
|
GenMfcr();
|
||||||
|
|
||||||
GenQuantizedLoads();
|
GenQuantizedLoads();
|
||||||
GenQuantizedStores();
|
GenQuantizedStores();
|
||||||
|
|
|
@ -406,39 +406,12 @@ void Jit64::mfcr(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(bJITSystemRegistersOff);
|
JITDISABLE(bJITSystemRegistersOff);
|
||||||
// USES_CR
|
|
||||||
int d = inst.RD;
|
int d = inst.RD;
|
||||||
|
gpr.FlushLockX(RSCRATCH_EXTRA);
|
||||||
|
CALL((void *)asm_routines.mfcr);
|
||||||
|
gpr.Lock(d);
|
||||||
gpr.BindToRegister(d, false, true);
|
gpr.BindToRegister(d, false, true);
|
||||||
XOR(32, gpr.R(d), gpr.R(d));
|
MOV(32, gpr.R(d), R(RSCRATCH));
|
||||||
|
|
||||||
X64Reg cr_val = RSCRATCH2;
|
|
||||||
// we only need to zero the high bits of RSCRATCH once
|
|
||||||
XOR(32, R(RSCRATCH), R(RSCRATCH));
|
|
||||||
for (int i = 0; i < 8; i++)
|
|
||||||
{
|
|
||||||
static const u8 m_flagTable[8] = {0x0,0x1,0x8,0x9,0x0,0x1,0x8,0x9};
|
|
||||||
if (i != 0)
|
|
||||||
SHL(32, gpr.R(d), Imm8(4));
|
|
||||||
|
|
||||||
MOV(64, R(cr_val), PPCSTATE(cr_val[i]));
|
|
||||||
|
|
||||||
// EQ: Bits 31-0 == 0; set flag bit 1
|
|
||||||
TEST(32, R(cr_val), R(cr_val));
|
|
||||||
SETcc(CC_Z, R(RSCRATCH));
|
|
||||||
LEA(32, gpr.RX(d), MComplex(gpr.RX(d), RSCRATCH, SCALE_2, 0));
|
|
||||||
|
|
||||||
// GT: Value > 0; set flag bit 2
|
|
||||||
TEST(64, R(cr_val), R(cr_val));
|
|
||||||
SETcc(CC_G, R(RSCRATCH));
|
|
||||||
LEA(32, gpr.RX(d), MComplex(gpr.RX(d), RSCRATCH, SCALE_4, 0));
|
|
||||||
|
|
||||||
// SO: Bit 61 set; set flag bit 0
|
|
||||||
// LT: Bit 62 set; set flag bit 3
|
|
||||||
SHR(64, R(cr_val), Imm8(61));
|
|
||||||
MOVZX(32, 8, RSCRATCH, MDisp(cr_val, (u32)(u64)m_flagTable));
|
|
||||||
OR(32, gpr.R(d), R(RSCRATCH));
|
|
||||||
}
|
|
||||||
|
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
gpr.UnlockAllX();
|
gpr.UnlockAllX();
|
||||||
}
|
}
|
||||||
|
|
|
@ -151,6 +151,44 @@ void CommonAsmRoutines::GenFres()
|
||||||
RET();
|
RET();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CommonAsmRoutines::GenMfcr()
|
||||||
|
{
|
||||||
|
// Input: none
|
||||||
|
// Output: RSCRATCH
|
||||||
|
// This function clobbers all three RSCRATCH.
|
||||||
|
X64Reg dst = RSCRATCH;
|
||||||
|
X64Reg tmp = RSCRATCH2;
|
||||||
|
X64Reg cr_val = RSCRATCH_EXTRA;
|
||||||
|
XOR(32, R(dst), R(dst));
|
||||||
|
// we only need to zero the high bits of tmp once
|
||||||
|
XOR(32, R(tmp), R(tmp));
|
||||||
|
for (int i = 0; i < 8; i++)
|
||||||
|
{
|
||||||
|
static const u32 m_flagTable[8] = { 0x0, 0x1, 0x8, 0x9, 0x0, 0x1, 0x8, 0x9 };
|
||||||
|
if (i != 0)
|
||||||
|
SHL(32, R(dst), Imm8(4));
|
||||||
|
|
||||||
|
MOV(64, R(cr_val), PPCSTATE(cr_val[i]));
|
||||||
|
|
||||||
|
// EQ: Bits 31-0 == 0; set flag bit 1
|
||||||
|
TEST(32, R(cr_val), R(cr_val));
|
||||||
|
// FIXME: is there a better way to do this without the partial register merging?
|
||||||
|
SETcc(CC_Z, R(tmp));
|
||||||
|
LEA(32, dst, MComplex(dst, tmp, SCALE_2, 0));
|
||||||
|
|
||||||
|
// GT: Value > 0; set flag bit 2
|
||||||
|
TEST(64, R(cr_val), R(cr_val));
|
||||||
|
SETcc(CC_G, R(tmp));
|
||||||
|
LEA(32, dst, MComplex(dst, tmp, SCALE_4, 0));
|
||||||
|
|
||||||
|
// SO: Bit 61 set; set flag bit 0
|
||||||
|
// LT: Bit 62 set; set flag bit 3
|
||||||
|
SHR(64, R(cr_val), Imm8(61));
|
||||||
|
OR(32, R(dst), MScaled(cr_val, SCALE_4, (u32)(u64)m_flagTable));
|
||||||
|
}
|
||||||
|
RET();
|
||||||
|
}
|
||||||
|
|
||||||
// Safe + Fast Quantizers, originally from JITIL by magumagu
|
// Safe + Fast Quantizers, originally from JITIL by magumagu
|
||||||
|
|
||||||
static const u8 GC_ALIGNED16(pbswapShuffle1x4[16]) = { 3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
|
static const u8 GC_ALIGNED16(pbswapShuffle1x4[16]) = { 3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
|
||||||
|
|
|
@ -25,6 +25,7 @@ public:
|
||||||
|
|
||||||
const u8 *frsqrte;
|
const u8 *frsqrte;
|
||||||
const u8 *fres;
|
const u8 *fres;
|
||||||
|
const u8 *mfcr;
|
||||||
|
|
||||||
// In: array index: GQR to use.
|
// In: array index: GQR to use.
|
||||||
// In: ECX: Address to read from.
|
// In: ECX: Address to read from.
|
||||||
|
@ -58,4 +59,5 @@ public:
|
||||||
void GenFifoWrite(int size);
|
void GenFifoWrite(int size);
|
||||||
void GenFrsqrte();
|
void GenFrsqrte();
|
||||||
void GenFres();
|
void GenFres();
|
||||||
|
void GenMfcr();
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue