JIT: move mfcr code to JitAsmCommon

It's like 80+ instructions, so inlining it on every use of mfcr is probably not the best for the icache.
2015-01-03 10:52:55 -08:00 · 2015-01-03 10:52:55 -08:00 · b058bbd223
parent 821db9798c
commit b058bbd223
4 changed files with 46 additions and 31 deletions
--- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp
@ -226,6 +226,8 @@ void Jit64AsmRoutineManager::GenerateCommon()
 	GenFrsqrte();
 	fres = AlignCode4();
 	GenFres();
+	mfcr = AlignCode4();
+	GenMfcr();

 	GenQuantizedLoads();
 	GenQuantizedStores();
--- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp
@ -406,39 +406,12 @@ void Jit64::mfcr(UGeckoInstruction inst)
 {
 	INSTRUCTION_START
 	JITDISABLE(bJITSystemRegistersOff);
-	// USES_CR
 	int d = inst.RD;
+	gpr.FlushLockX(RSCRATCH_EXTRA);
+	CALL((void *)asm_routines.mfcr);
+	gpr.Lock(d);
 	gpr.BindToRegister(d, false, true);
-	XOR(32, gpr.R(d), gpr.R(d));
-
-	X64Reg cr_val = RSCRATCH2;
-	// we only need to zero the high bits of RSCRATCH once
-	XOR(32, R(RSCRATCH), R(RSCRATCH));
-	for (int i = 0; i < 8; i++)
-	{
-		static const u8 m_flagTable[8] = {0x0,0x1,0x8,0x9,0x0,0x1,0x8,0x9};
-		if (i != 0)
-			SHL(32, gpr.R(d), Imm8(4));
-
-		MOV(64, R(cr_val), PPCSTATE(cr_val[i]));
-
-		// EQ: Bits 31-0 == 0; set flag bit 1
-		TEST(32, R(cr_val), R(cr_val));
-		SETcc(CC_Z, R(RSCRATCH));
-		LEA(32, gpr.RX(d), MComplex(gpr.RX(d), RSCRATCH, SCALE_2, 0));
-
-		// GT: Value > 0; set flag bit 2
-		TEST(64, R(cr_val), R(cr_val));
-		SETcc(CC_G, R(RSCRATCH));
-		LEA(32, gpr.RX(d), MComplex(gpr.RX(d), RSCRATCH, SCALE_4, 0));
-
-		// SO: Bit 61 set; set flag bit 0
-		// LT: Bit 62 set; set flag bit 3
-		SHR(64, R(cr_val), Imm8(61));
-		MOVZX(32, 8, RSCRATCH, MDisp(cr_val, (u32)(u64)m_flagTable));
-		OR(32, gpr.R(d), R(RSCRATCH));
-	}
-
+	MOV(32, gpr.R(d), R(RSCRATCH));
 	gpr.UnlockAll();
 	gpr.UnlockAllX();
 }
--- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp
+++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp
@ -151,6 +151,44 @@ void CommonAsmRoutines::GenFres()
 	RET();
 }

+void CommonAsmRoutines::GenMfcr()
+{
+	// Input: none
+	// Output: RSCRATCH
+	// This function clobbers all three RSCRATCH.
+	X64Reg dst = RSCRATCH;
+	X64Reg tmp = RSCRATCH2;
+	X64Reg cr_val = RSCRATCH_EXTRA;
+	XOR(32, R(dst), R(dst));
+	// we only need to zero the high bits of tmp once
+	XOR(32, R(tmp), R(tmp));
+	for (int i = 0; i < 8; i++)
+	{
+		static const u32 m_flagTable[8] = { 0x0, 0x1, 0x8, 0x9, 0x0, 0x1, 0x8, 0x9 };
+		if (i != 0)
+			SHL(32, R(dst), Imm8(4));
+
+		MOV(64, R(cr_val), PPCSTATE(cr_val[i]));
+
+		// EQ: Bits 31-0 == 0; set flag bit 1
+		TEST(32, R(cr_val), R(cr_val));
+		// FIXME: is there a better way to do this without the partial register merging?
+		SETcc(CC_Z, R(tmp));
+		LEA(32, dst, MComplex(dst, tmp, SCALE_2, 0));
+
+		// GT: Value > 0; set flag bit 2
+		TEST(64, R(cr_val), R(cr_val));
+		SETcc(CC_G, R(tmp));
+		LEA(32, dst, MComplex(dst, tmp, SCALE_4, 0));
+
+		// SO: Bit 61 set; set flag bit 0
+		// LT: Bit 62 set; set flag bit 3
+		SHR(64, R(cr_val), Imm8(61));
+		OR(32, R(dst), MScaled(cr_val, SCALE_4, (u32)(u64)m_flagTable));
+	}
+	RET();
+}
+
 // Safe + Fast Quantizers, originally from JITIL by magumagu

 static const u8 GC_ALIGNED16(pbswapShuffle1x4[16]) = { 3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
--- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h
+++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h
@ -25,6 +25,7 @@ public:

 	const u8 *frsqrte;
 	const u8 *fres;
+	const u8 *mfcr;

 	// In: array index: GQR to use.
 	// In: ECX: Address to read from.
@ -58,4 +59,5 @@ public:
 	void GenFifoWrite(int size);
 	void GenFrsqrte();
 	void GenFres();
+	void GenMfcr();
 };