Merge pull request #5259 from MerryMage/quantload

Jit64: Make psq_lXX PIE-compliant
This commit is contained in:
Markus Wick 2017-04-15 11:20:09 +02:00 committed by GitHub
commit 8d4be36963
8 changed files with 44 additions and 20 deletions

View File

@ -237,6 +237,7 @@ void Jit64AsmRoutineManager::GenerateCommon()
GenMfcr();
GenQuantizedLoads();
GenQuantizedSingleLoads();
GenQuantizedStores();
GenQuantizedSingleStores();

View File

@ -148,16 +148,18 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
}
else
{
MOV(32, R(RSCRATCH2), Imm32(0x3F07));
// Get the high part of the GQR register
OpArg gqr = PPCSTATE(spr[SPR_GQR0 + i]);
gqr.AddMemOffset(2);
MOV(32, R(RSCRATCH2), Imm32(0x3F07));
AND(32, R(RSCRATCH2), gqr);
MOVZX(32, 8, RSCRATCH, R(RSCRATCH2));
CALLptr(MScaled(RSCRATCH, SCALE_8, PtrOffset(&asm_routines.pairedLoadQuantized[w * 8])));
LEA(64, RSCRATCH, M(w ? asm_routines.singleLoadQuantized : asm_routines.pairedLoadQuantized));
// 8-bit operations do not zero upper 32-bits of 64-bit registers.
// Here we know that RSCRATCH's least significant byte is zero.
OR(8, R(RSCRATCH), R(RSCRATCH2));
SHL(8, R(RSCRATCH), Imm8(3));
CALLptr(MatR(RSCRATCH));
}
CVTPS2PD(fpr.RX(s), R(XMM0));

View File

@ -277,13 +277,22 @@ const u8* CommonAsmRoutines::GenQuantizedStoreRuntime(bool single, EQuantizeType
void CommonAsmRoutines::GenQuantizedLoads()
{
pairedLoadQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCode16()));
ReserveCodeSpace(16 * sizeof(u8*));
// Aligned to 256 bytes as least significant byte needs to be zero (See: Jit64::psq_lXX).
pairedLoadQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCodeTo(256)));
ReserveCodeSpace(8 * sizeof(u8*));
for (int type = 0; type < 8; type++)
pairedLoadQuantized[type] = GenQuantizedLoadRuntime(false, static_cast<EQuantizeType>(type));
}
void CommonAsmRoutines::GenQuantizedSingleLoads()
{
// Aligned to 256 bytes as least significant byte needs to be zero (See: Jit64::psq_lXX).
singleLoadQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCodeTo(256)));
ReserveCodeSpace(8 * sizeof(u8*));
for (int type = 0; type < 8; type++)
pairedLoadQuantized[type + 8] = GenQuantizedLoadRuntime(true, static_cast<EQuantizeType>(type));
singleLoadQuantized[type] = GenQuantizedLoadRuntime(true, static_cast<EQuantizeType>(type));
}
const u8* CommonAsmRoutines::GenQuantizedLoadRuntime(bool single, EQuantizeType type)

View File

@ -33,6 +33,7 @@ protected:
const u8* GenQuantizedLoadRuntime(bool single, EQuantizeType type);
const u8* GenQuantizedStoreRuntime(bool single, EQuantizeType type);
void GenQuantizedLoads();
void GenQuantizedSingleLoads();
void GenQuantizedStores();
void GenQuantizedSingleStores();
};

View File

@ -1616,10 +1616,12 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
Jit->MOV(32, R(RSCRATCH2), Imm32(0x3F07));
Jit->AND(32, R(RSCRATCH2), M(((char*)&GQR(quantreg)) + 2));
Jit->MOVZX(32, 8, RSCRATCH, R(RSCRATCH2));
Jit->OR(32, R(RSCRATCH), Imm8(w << 3));
const u8** table =
w ? Jit->asm_routines.singleLoadQuantized : Jit->asm_routines.pairedLoadQuantized;
Jit->MOV(32, R(RSCRATCH_EXTRA), regLocForInst(RI, getOp1(I)));
Jit->CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(Jit->asm_routines.pairedLoadQuantized)));
Jit->CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)table));
Jit->MOVAPD(reg, R(XMM0));
RI.fregs[reg] = I;
regNormalRegClear(RI, I);

View File

@ -81,7 +81,7 @@ void JitArm64::psq_l(UGeckoInstruction inst)
UBFM(type_reg, scale_reg, 16, 18); // Type
UBFM(scale_reg, scale_reg, 24, 29); // Scale
MOVP2R(X30, &pairedLoadQuantized[inst.W * 8]);
MOVP2R(X30, inst.W ? singleLoadQuantized : pairedLoadQuantized);
LDR(X30, X30, ArithOption(EncodeRegTo64(type_reg), true));
BLR(X30);

View File

@ -336,7 +336,7 @@ void JitArm64::GenerateCommonAsm()
JitRegister::Register(start, GetCodePtr(), "JIT_QuantizedLoad");
pairedLoadQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCode16()));
ReserveCodeSpace(16 * sizeof(u8*));
ReserveCodeSpace(8 * sizeof(u8*));
pairedLoadQuantized[0] = loadPairedFloatTwo;
pairedLoadQuantized[1] = loadPairedIllegal;
@ -347,14 +347,17 @@ void JitArm64::GenerateCommonAsm()
pairedLoadQuantized[6] = loadPairedS8Two;
pairedLoadQuantized[7] = loadPairedS16Two;
pairedLoadQuantized[8] = loadPairedFloatOne;
pairedLoadQuantized[9] = loadPairedIllegal;
pairedLoadQuantized[10] = loadPairedIllegal;
pairedLoadQuantized[11] = loadPairedIllegal;
pairedLoadQuantized[12] = loadPairedU8One;
pairedLoadQuantized[13] = loadPairedU16One;
pairedLoadQuantized[14] = loadPairedS8One;
pairedLoadQuantized[15] = loadPairedS16One;
singleLoadQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCode16()));
ReserveCodeSpace(8 * sizeof(u8*));
singleLoadQuantized[0] = loadPairedFloatOne;
singleLoadQuantized[1] = loadPairedIllegal;
singleLoadQuantized[2] = loadPairedIllegal;
singleLoadQuantized[3] = loadPairedIllegal;
singleLoadQuantized[4] = loadPairedU8One;
singleLoadQuantized[5] = loadPairedU16One;
singleLoadQuantized[6] = loadPairedS8One;
singleLoadQuantized[7] = loadPairedS16One;
// Stores
start = GetCodePtr();

View File

@ -39,6 +39,12 @@ public:
// Trashes: all three RSCRATCH
const u8** pairedLoadQuantized;
// In: array index: GQR to use.
// In: ECX: Address to read from.
// Out: XMM0: Bottom 32-bit slot holds the read value.
// Trashes: all three RSCRATCH
const u8** singleLoadQuantized;
// In: array index: GQR to use.
// In: ECX: Address to write to.
// In: XMM0: Bottom two 32-bit slots hold the pair of floats to be written.