JIT: genericize immediate address handling, support in float stores too
This commit is contained in:
parent
4cf8697957
commit
fc63c7ecae
|
@ -186,8 +186,6 @@ void Jit64AsmRoutineManager::GenerateCommon()
|
|||
GenFifoWrite(16);
|
||||
fifoDirectWrite32 = AlignCode4();
|
||||
GenFifoWrite(32);
|
||||
fifoDirectWriteFloat = AlignCode4();
|
||||
GenFifoFloatWrite();
|
||||
frsqrte = AlignCode4();
|
||||
GenFrsqrte();
|
||||
fres = AlignCode4();
|
||||
|
|
|
@ -334,12 +334,13 @@ void Jit64::stX(UGeckoInstruction inst)
|
|||
|
||||
int s = inst.RS;
|
||||
int a = inst.RA;
|
||||
|
||||
bool update = inst.OPCD & 1;
|
||||
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
if (a || !update)
|
||||
{
|
||||
bool update = (inst.OPCD & 1) && offset;
|
||||
FALLBACK_IF(update);
|
||||
|
||||
if (!a && update)
|
||||
PanicAlert("Invalid stX");
|
||||
|
||||
int accessSize;
|
||||
switch (inst.OPCD & ~1)
|
||||
{
|
||||
|
@ -357,75 +358,30 @@ void Jit64::stX(UGeckoInstruction inst)
|
|||
return;
|
||||
}
|
||||
|
||||
if ((a == 0) || gpr.R(a).IsImm())
|
||||
// If we already know the address of the write
|
||||
if (!a || gpr.R(a).IsImm())
|
||||
{
|
||||
// If we already know the address through constant folding, we can do some
|
||||
// fun tricks...
|
||||
u32 addr = ((a == 0) ? 0 : (u32)gpr.R(a).offset);
|
||||
addr += offset;
|
||||
if ((addr & 0xFFFFF000) == 0xCC008000 && jo.optimizeGatherPipe)
|
||||
{
|
||||
// Helps external systems know which instruction triggered the write
|
||||
MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC));
|
||||
|
||||
MOV(32, R(RSCRATCH2), gpr.R(s));
|
||||
u32 addr = (a ? (u32)gpr.R(a).offset : 0) + offset;
|
||||
bool exception = WriteToConstAddress(accessSize, gpr.R(s), addr, CallerSavedRegistersInUse());
|
||||
if (update)
|
||||
gpr.SetImmediate32(a, addr);
|
||||
|
||||
// No need to protect these, they don't touch any state
|
||||
// question - should we inline them instead? Pro: Lose a CALL Con: Code bloat
|
||||
switch (accessSize)
|
||||
{
|
||||
case 8:
|
||||
CALL((void *)asm_routines.fifoDirectWrite8);
|
||||
break;
|
||||
case 16:
|
||||
CALL((void *)asm_routines.fifoDirectWrite16);
|
||||
break;
|
||||
case 32:
|
||||
CALL((void *)asm_routines.fifoDirectWrite32);
|
||||
break;
|
||||
}
|
||||
js.fifoBytesThisBlock += accessSize >> 3;
|
||||
gpr.UnlockAllX();
|
||||
return;
|
||||
}
|
||||
else if (Memory::IsRAMAddress(addr))
|
||||
if (!js.memcheck || !exception)
|
||||
{
|
||||
MOV(32, R(RSCRATCH), gpr.R(s));
|
||||
WriteToConstRamAddress(accessSize, RSCRATCH, addr, true);
|
||||
if (update)
|
||||
gpr.SetImmediate32(a, addr);
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Helps external systems know which instruction triggered the write
|
||||
MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC));
|
||||
|
||||
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
||||
switch (accessSize)
|
||||
gpr.KillImmediate(a, true, true);
|
||||
MEMCHECK_START(false)
|
||||
ADD(32, gpr.R(a), Imm32((u32)offset));
|
||||
MEMCHECK_END
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
case 32:
|
||||
ABI_CallFunctionAC(true ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), gpr.R(s), addr);
|
||||
break;
|
||||
case 16:
|
||||
ABI_CallFunctionAC(true ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), gpr.R(s), addr);
|
||||
break;
|
||||
case 8:
|
||||
ABI_CallFunctionAC((void *)&Memory::Write_U8, gpr.R(s), addr);
|
||||
break;
|
||||
}
|
||||
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
|
||||
if (update)
|
||||
gpr.SetImmediate32(a, addr);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
gpr.Lock(a, s);
|
||||
gpr.BindToRegister(a, true, false);
|
||||
gpr.BindToRegister(a, true, update);
|
||||
if (gpr.R(s).IsImm())
|
||||
{
|
||||
SafeWriteRegToReg(gpr.R(s), gpr.RX(a), accessSize, offset, CallerSavedRegistersInUse(), SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR);
|
||||
|
@ -446,22 +402,15 @@ void Jit64::stX(UGeckoInstruction inst)
|
|||
SafeWriteRegToReg(reg_value, gpr.RX(a), accessSize, offset, CallerSavedRegistersInUse(), SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR);
|
||||
}
|
||||
|
||||
if (update && offset)
|
||||
if (update)
|
||||
{
|
||||
MEMCHECK_START(false)
|
||||
gpr.KillImmediate(a, true, true);
|
||||
|
||||
ADD(32, gpr.R(a), Imm32((u32)offset));
|
||||
|
||||
MEMCHECK_END
|
||||
}
|
||||
}
|
||||
gpr.UnlockAll();
|
||||
}
|
||||
else
|
||||
{
|
||||
PanicAlert("Invalid stX");
|
||||
}
|
||||
}
|
||||
|
||||
void Jit64::stXx(UGeckoInstruction inst)
|
||||
{
|
||||
|
|
|
@ -101,11 +101,50 @@ void Jit64::stfXXX(UGeckoInstruction inst)
|
|||
int s = inst.RS;
|
||||
int a = inst.RA;
|
||||
int b = inst.RB;
|
||||
s32 imm = (s16)inst.SIMM_16;
|
||||
int accessSize = single ? 32 : 64;
|
||||
|
||||
FALLBACK_IF((!indexed && !a) || (update && js.memcheck && a == b));
|
||||
FALLBACK_IF(update && js.memcheck && a == b);
|
||||
|
||||
if (single)
|
||||
{
|
||||
fpr.BindToRegister(s, true, false);
|
||||
ConvertDoubleToSingle(XMM0, fpr.RX(s));
|
||||
MOVD_xmm(R(RSCRATCH), XMM0);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (fpr.R(s).IsSimpleReg())
|
||||
MOVQ_xmm(R(RSCRATCH), fpr.RX(s));
|
||||
else
|
||||
MOV(64, R(RSCRATCH), fpr.R(s));
|
||||
}
|
||||
|
||||
if (!indexed && (!a || gpr.R(a).IsImm()))
|
||||
{
|
||||
u32 addr = (a ? (u32)gpr.R(a).offset : 0) + imm;
|
||||
bool exception = WriteToConstAddress(accessSize, R(RSCRATCH), addr, CallerSavedRegistersInUse());
|
||||
|
||||
if (update)
|
||||
{
|
||||
if (!js.memcheck || !exception)
|
||||
{
|
||||
gpr.SetImmediate32(a, addr);
|
||||
}
|
||||
else
|
||||
{
|
||||
gpr.KillImmediate(a, true, true);
|
||||
MEMCHECK_START(false)
|
||||
ADD(32, gpr.R(a), Imm32((u32)imm));
|
||||
MEMCHECK_END
|
||||
}
|
||||
}
|
||||
fpr.UnlockAll();
|
||||
gpr.UnlockAll();
|
||||
return;
|
||||
}
|
||||
|
||||
s32 offset = 0;
|
||||
s32 imm = (s16)inst.SIMM_16;
|
||||
if (indexed)
|
||||
{
|
||||
if (update)
|
||||
|
@ -140,21 +179,8 @@ void Jit64::stfXXX(UGeckoInstruction inst)
|
|||
MOV(32, R(RSCRATCH2), gpr.R(a));
|
||||
}
|
||||
|
||||
if (single)
|
||||
{
|
||||
fpr.BindToRegister(s, true, false);
|
||||
ConvertDoubleToSingle(XMM0, fpr.RX(s));
|
||||
SafeWriteF32ToReg(XMM0, RSCRATCH2, offset, CallerSavedRegistersInUse());
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (fpr.R(s).IsSimpleReg())
|
||||
MOVQ_xmm(R(RSCRATCH), fpr.RX(s));
|
||||
else
|
||||
MOV(64, R(RSCRATCH), fpr.R(s));
|
||||
SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 64, offset, CallerSavedRegistersInUse());
|
||||
}
|
||||
SafeWriteRegToReg(RSCRATCH, RSCRATCH2, accessSize, offset, CallerSavedRegistersInUse());
|
||||
|
||||
if (js.memcheck && update)
|
||||
{
|
||||
// revert the address change if an exception occurred
|
||||
|
@ -162,6 +188,8 @@ void Jit64::stfXXX(UGeckoInstruction inst)
|
|||
SUB(32, gpr.R(a), indexed ? gpr.R(b) : Imm32(imm));
|
||||
MEMCHECK_END
|
||||
}
|
||||
|
||||
fpr.UnlockAll();
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
|
|
@ -22,31 +22,13 @@ static int temp32;
|
|||
|
||||
void CommonAsmRoutines::GenFifoWrite(int size)
|
||||
{
|
||||
// Assume value in RSCRATCH2
|
||||
PUSH(ESI);
|
||||
MOV(32, R(RSCRATCH), Imm32((u32)(u64)GPFifo::m_gatherPipe));
|
||||
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
|
||||
|
||||
SwapAndStore(size, MComplex(RSCRATCH, ESI, 1, 0), RSCRATCH2);
|
||||
|
||||
ADD(32, R(ESI), Imm8(size >> 3));
|
||||
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
|
||||
POP(ESI);
|
||||
RET();
|
||||
}
|
||||
|
||||
void CommonAsmRoutines::GenFifoFloatWrite()
|
||||
{
|
||||
// Assume value in XMM0
|
||||
PUSH(ESI);
|
||||
MOVSS(M(&temp32), XMM0);
|
||||
MOV(32, R(RSCRATCH2), M(&temp32));
|
||||
MOV(32, R(RSCRATCH), Imm32((u32)(u64)GPFifo::m_gatherPipe));
|
||||
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
|
||||
SwapAndStore(32, MComplex(RSCRATCH, RSI, 1, 0), RSCRATCH2);
|
||||
ADD(32, R(ESI), Imm8(4));
|
||||
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
|
||||
POP(ESI);
|
||||
// Assume value in RSCRATCH
|
||||
u32 gather_pipe = (u32)(u64)GPFifo::m_gatherPipe;
|
||||
_assert_msg_(DYNA_REC, gather_pipe <= 0x7FFFFFFF, "Gather pipe not in low 2GB of memory!");
|
||||
MOV(32, R(RSCRATCH2), M(&GPFifo::m_gatherPipeCount));
|
||||
SwapAndStore(size, MDisp(RSCRATCH2, gather_pipe), RSCRATCH);
|
||||
ADD(32, R(RSCRATCH2), Imm8(size >> 3));
|
||||
MOV(32, M(&GPFifo::m_gatherPipeCount), R(RSCRATCH2));
|
||||
RET();
|
||||
}
|
||||
|
||||
|
@ -386,7 +368,8 @@ void CommonAsmRoutines::GenQuantizedSingleStores()
|
|||
|
||||
// Easy!
|
||||
const u8* storeSingleFloat = AlignCode4();
|
||||
SafeWriteF32ToReg(XMM0, RSCRATCH_EXTRA, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
MOVD_xmm(R(RSCRATCH), XMM0);
|
||||
SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
RET();
|
||||
/*
|
||||
if (cpu_info.bSSSE3)
|
||||
|
|
|
@ -13,7 +13,6 @@ public:
|
|||
const u8 *fifoDirectWrite8;
|
||||
const u8 *fifoDirectWrite16;
|
||||
const u8 *fifoDirectWrite32;
|
||||
const u8 *fifoDirectWriteFloat;
|
||||
|
||||
const u8 *enterCode;
|
||||
|
||||
|
|
|
@ -422,6 +422,16 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
|
|||
}
|
||||
}
|
||||
|
||||
static OpArg SwapImmediate(int accessSize, OpArg reg_value)
|
||||
{
|
||||
if (accessSize == 32)
|
||||
return Imm32(Common::swap32((u32)reg_value.offset));
|
||||
else if (accessSize == 16)
|
||||
return Imm16(Common::swap16((u16)reg_value.offset));
|
||||
else
|
||||
return Imm8((u8)reg_value.offset);
|
||||
}
|
||||
|
||||
u8 *EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap)
|
||||
{
|
||||
u8* result = GetWritableCodePtr();
|
||||
|
@ -429,14 +439,7 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acce
|
|||
if (reg_value.IsImm())
|
||||
{
|
||||
if (swap)
|
||||
{
|
||||
if (accessSize == 32)
|
||||
reg_value = Imm32(Common::swap32((u32)reg_value.offset));
|
||||
else if (accessSize == 16)
|
||||
reg_value = Imm16(Common::swap16((u16)reg_value.offset));
|
||||
else
|
||||
reg_value = Imm8((u8)reg_value.offset);
|
||||
}
|
||||
reg_value = SwapImmediate(accessSize, reg_value);
|
||||
MOV(accessSize, dest, reg_value);
|
||||
}
|
||||
else if (swap)
|
||||
|
@ -461,6 +464,68 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acce
|
|||
return result;
|
||||
}
|
||||
|
||||
void EmuCodeBlock::UnsafeWriteGatherPipe(int accessSize)
|
||||
{
|
||||
// No need to protect these, they don't touch any state
|
||||
// question - should we inline them instead? Pro: Lose a CALL Con: Code bloat
|
||||
switch (accessSize)
|
||||
{
|
||||
case 8:
|
||||
CALL((void *)jit->GetAsmRoutines()->fifoDirectWrite8);
|
||||
break;
|
||||
case 16:
|
||||
CALL((void *)jit->GetAsmRoutines()->fifoDirectWrite16);
|
||||
break;
|
||||
case 32:
|
||||
CALL((void *)jit->GetAsmRoutines()->fifoDirectWrite32);
|
||||
break;
|
||||
}
|
||||
jit->js.fifoBytesThisBlock += accessSize >> 3;
|
||||
}
|
||||
|
||||
bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address, BitSet32 registersInUse)
|
||||
{
|
||||
// If we already know the address through constant folding, we can do some
|
||||
// fun tricks...
|
||||
if ((address & 0xFFFFF000) == 0xCC008000 && jit->jo.optimizeGatherPipe && accessSize <= 32)
|
||||
{
|
||||
if (!arg.IsSimpleReg() || arg.GetSimpleReg() != RSCRATCH)
|
||||
MOV(32, R(RSCRATCH), arg);
|
||||
|
||||
UnsafeWriteGatherPipe(accessSize);
|
||||
return false;
|
||||
}
|
||||
else if (Memory::IsRAMAddress(address))
|
||||
{
|
||||
WriteToConstRamAddress(accessSize, arg, address);
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Helps external systems know which instruction triggered the write
|
||||
MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC));
|
||||
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
||||
switch (accessSize)
|
||||
{
|
||||
case 64:
|
||||
ABI_CallFunctionAC((void *)&Memory::Write_U64, arg, address);
|
||||
break;
|
||||
case 32:
|
||||
ABI_CallFunctionAC((void *)&Memory::Write_U32, arg, address);
|
||||
break;
|
||||
case 16:
|
||||
ABI_CallFunctionAC((void *)&Memory::Write_U16, arg, address);
|
||||
break;
|
||||
case 8:
|
||||
ABI_CallFunctionAC((void *)&Memory::Write_U8, arg, address);
|
||||
break;
|
||||
}
|
||||
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset, BitSet32 registersInUse, int flags)
|
||||
{
|
||||
// set the correct immediate format
|
||||
|
@ -565,20 +630,30 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
|
|||
SetJumpTarget(exit);
|
||||
}
|
||||
|
||||
// Destroys the same as SafeWrite plus RSCRATCH. TODO: see if we can avoid temporaries here
|
||||
void EmuCodeBlock::SafeWriteF32ToReg(X64Reg xmm_value, X64Reg reg_addr, s32 offset, BitSet32 registersInUse, int flags)
|
||||
void EmuCodeBlock::WriteToConstRamAddress(int accessSize, OpArg arg, u32 address, bool swap)
|
||||
{
|
||||
// TODO: PSHUFB might be faster if fastmem supported MOVSS.
|
||||
MOVD_xmm(R(RSCRATCH), xmm_value);
|
||||
SafeWriteRegToReg(RSCRATCH, reg_addr, 32, offset, registersInUse, flags);
|
||||
X64Reg reg;
|
||||
if (arg.IsImm())
|
||||
{
|
||||
arg = SwapImmediate(accessSize, arg);
|
||||
MOV(accessSize, MDisp(RMEM, address & 0x3FFFFFFF), arg);
|
||||
return;
|
||||
}
|
||||
|
||||
void EmuCodeBlock::WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap)
|
||||
if (!arg.IsSimpleReg() || (!cpu_info.bMOVBE && swap && arg.GetSimpleReg() != RSCRATCH))
|
||||
{
|
||||
if (swap)
|
||||
SwapAndStore(accessSize, MDisp(RMEM, address & 0x3FFFFFFF), arg);
|
||||
MOV(accessSize, R(RSCRATCH), arg);
|
||||
reg = RSCRATCH;
|
||||
}
|
||||
else
|
||||
MOV(accessSize, MDisp(RMEM, address & 0x3FFFFFFF), R(arg));
|
||||
{
|
||||
reg = arg.GetSimpleReg();
|
||||
}
|
||||
|
||||
if (swap)
|
||||
SwapAndStore(accessSize, MDisp(RMEM, address & 0x3FFFFFFF), reg);
|
||||
else
|
||||
MOV(accessSize, MDisp(RMEM, address & 0x3FFFFFFF), R(reg));
|
||||
}
|
||||
|
||||
void EmuCodeBlock::ForceSinglePrecisionS(X64Reg xmm)
|
||||
|
|
|
@ -87,6 +87,7 @@ public:
|
|||
return UnsafeWriteRegToReg(R(reg_value), reg_addr, accessSize, offset, swap);
|
||||
}
|
||||
u8 *UnsafeLoadToReg(Gen::X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset, bool signExtend);
|
||||
void UnsafeWriteGatherPipe(int accessSize);
|
||||
|
||||
// Generate a load/write from the MMIO handler for a given address. Only
|
||||
// call for known addresses in MMIO range (MMIO::IsMMIOAddress).
|
||||
|
@ -116,9 +117,9 @@ public:
|
|||
return swap && !cpu_info.bMOVBE && accessSize > 8;
|
||||
}
|
||||
|
||||
void SafeWriteF32ToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, s32 offset, BitSet32 registersInUse, int flags = 0);
|
||||
|
||||
void WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap = false);
|
||||
void WriteToConstRamAddress(int accessSize, Gen::OpArg arg, u32 address, bool swap = true);
|
||||
// returns true if an exception could have been caused
|
||||
bool WriteToConstAddress(int accessSize, Gen::OpArg arg, u32 address, BitSet32 registersInUse);
|
||||
void JitGetAndClearCAOV(bool oe);
|
||||
void JitSetCA();
|
||||
void JitSetCAIf(Gen::CCFlags conditionCode);
|
||||
|
|
Loading…
Reference in New Issue