Merge pull request #4533 from lioncash/jits
Jit64/JitIL: Get rid of trivial global jit variable usages
This commit is contained in:
commit
8192af9b3d
|
@ -189,15 +189,15 @@ void Jit64::fp_arith(UGeckoInstruction inst)
|
||||||
// If both the inputs are known to have identical top and bottom halves, we can skip the MOVDDUP
|
// If both the inputs are known to have identical top and bottom halves, we can skip the MOVDDUP
|
||||||
// at the end by
|
// at the end by
|
||||||
// using packed arithmetic instead.
|
// using packed arithmetic instead.
|
||||||
bool packed = inst.OPCD == 4 || (inst.OPCD == 59 && jit->js.op->fprIsDuplicated[a] &&
|
bool packed = inst.OPCD == 4 ||
|
||||||
jit->js.op->fprIsDuplicated[arg2]);
|
(inst.OPCD == 59 && js.op->fprIsDuplicated[a] && js.op->fprIsDuplicated[arg2]);
|
||||||
// Packed divides are slower than scalar divides on basically all x86, so this optimization isn't
|
// Packed divides are slower than scalar divides on basically all x86, so this optimization isn't
|
||||||
// worth it in that case.
|
// worth it in that case.
|
||||||
// Atoms (and a few really old CPUs) are also slower on packed operations than scalar ones.
|
// Atoms (and a few really old CPUs) are also slower on packed operations than scalar ones.
|
||||||
if (inst.OPCD == 59 && (inst.SUBOP5 == 18 || cpu_info.bAtom))
|
if (inst.OPCD == 59 && (inst.SUBOP5 == 18 || cpu_info.bAtom))
|
||||||
packed = false;
|
packed = false;
|
||||||
|
|
||||||
bool round_input = single && !jit->js.op->fprIsSingle[inst.FC];
|
bool round_input = single && !js.op->fprIsSingle[inst.FC];
|
||||||
bool preserve_inputs = SConfig::GetInstance().bAccurateNaNs;
|
bool preserve_inputs = SConfig::GetInstance().bAccurateNaNs;
|
||||||
|
|
||||||
X64Reg dest = INVALID_REG;
|
X64Reg dest = INVALID_REG;
|
||||||
|
@ -241,10 +241,9 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
|
||||||
int c = inst.FC;
|
int c = inst.FC;
|
||||||
int d = inst.FD;
|
int d = inst.FD;
|
||||||
bool single = inst.OPCD == 4 || inst.OPCD == 59;
|
bool single = inst.OPCD == 4 || inst.OPCD == 59;
|
||||||
bool round_input = single && !jit->js.op->fprIsSingle[c];
|
bool round_input = single && !js.op->fprIsSingle[c];
|
||||||
bool packed =
|
bool packed = inst.OPCD == 4 || (!cpu_info.bAtom && single && js.op->fprIsDuplicated[a] &&
|
||||||
inst.OPCD == 4 || (!cpu_info.bAtom && single && jit->js.op->fprIsDuplicated[a] &&
|
js.op->fprIsDuplicated[b] && js.op->fprIsDuplicated[c]);
|
||||||
jit->js.op->fprIsDuplicated[b] && jit->js.op->fprIsDuplicated[c]);
|
|
||||||
|
|
||||||
fpr.Lock(a, b, c, d);
|
fpr.Lock(a, b, c, d);
|
||||||
|
|
||||||
|
@ -635,7 +634,7 @@ void Jit64::frspx(UGeckoInstruction inst)
|
||||||
FALLBACK_IF(inst.Rc);
|
FALLBACK_IF(inst.Rc);
|
||||||
int b = inst.FB;
|
int b = inst.FB;
|
||||||
int d = inst.FD;
|
int d = inst.FD;
|
||||||
bool packed = jit->js.op->fprIsDuplicated[b] && !cpu_info.bAtom;
|
bool packed = js.op->fprIsDuplicated[b] && !cpu_info.bAtom;
|
||||||
|
|
||||||
fpr.Lock(b, d);
|
fpr.Lock(b, d);
|
||||||
OpArg src = fpr.R(b);
|
OpArg src = fpr.R(b);
|
||||||
|
|
|
@ -279,7 +279,7 @@ void Jit64::dcbx(UGeckoInstruction inst)
|
||||||
// Check whether a JIT cache line needs to be invalidated.
|
// Check whether a JIT cache line needs to be invalidated.
|
||||||
LEA(32, value, MScaled(addr, SCALE_8, 0)); // addr << 3 (masks the first 3 bits)
|
LEA(32, value, MScaled(addr, SCALE_8, 0)); // addr << 3 (masks the first 3 bits)
|
||||||
SHR(32, R(value), Imm8(3 + 5 + 5)); // >> 5 for cache line size, >> 5 for width of bitset
|
SHR(32, R(value), Imm8(3 + 5 + 5)); // >> 5 for cache line size, >> 5 for width of bitset
|
||||||
MOV(64, R(tmp), ImmPtr(jit->GetBlockCache()->GetBlockBitSet()));
|
MOV(64, R(tmp), ImmPtr(GetBlockCache()->GetBlockBitSet()));
|
||||||
MOV(32, R(value), MComplex(tmp, value, SCALE_4, 0));
|
MOV(32, R(value), MComplex(tmp, value, SCALE_4, 0));
|
||||||
SHR(32, R(addr), Imm8(5));
|
SHR(32, R(addr), Imm8(5));
|
||||||
BT(32, R(value), R(addr));
|
BT(32, R(value), R(addr));
|
||||||
|
@ -355,7 +355,7 @@ void Jit64::dcbz(UGeckoInstruction inst)
|
||||||
SwitchToFarCode();
|
SwitchToFarCode();
|
||||||
SetJumpTarget(slow);
|
SetJumpTarget(slow);
|
||||||
}
|
}
|
||||||
MOV(32, M(&PC), Imm32(jit->js.compilerPC));
|
MOV(32, M(&PC), Imm32(js.compilerPC));
|
||||||
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
||||||
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
||||||
ABI_CallFunctionR(PowerPC::ClearCacheLine, RSCRATCH);
|
ABI_CallFunctionR(PowerPC::ClearCacheLine, RSCRATCH);
|
||||||
|
|
|
@ -105,7 +105,7 @@ void Jit64::stfXXX(UGeckoInstruction inst)
|
||||||
|
|
||||||
if (single)
|
if (single)
|
||||||
{
|
{
|
||||||
if (jit->js.op->fprIsStoreSafe[s])
|
if (js.op->fprIsStoreSafe[s])
|
||||||
{
|
{
|
||||||
CVTSD2SS(XMM0, fpr.R(s));
|
CVTSD2SS(XMM0, fpr.R(s));
|
||||||
}
|
}
|
||||||
|
|
|
@ -84,7 +84,7 @@ void Jit64::ps_muls(UGeckoInstruction inst)
|
||||||
int d = inst.FD;
|
int d = inst.FD;
|
||||||
int a = inst.FA;
|
int a = inst.FA;
|
||||||
int c = inst.FC;
|
int c = inst.FC;
|
||||||
bool round_input = !jit->js.op->fprIsSingle[c];
|
bool round_input = !js.op->fprIsSingle[c];
|
||||||
fpr.Lock(a, c, d);
|
fpr.Lock(a, c, d);
|
||||||
switch (inst.SUBOP5)
|
switch (inst.SUBOP5)
|
||||||
{
|
{
|
||||||
|
|
|
@ -54,7 +54,7 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
|
||||||
TrampolineInfo& info = it->second;
|
TrampolineInfo& info = it->second;
|
||||||
|
|
||||||
u8* exceptionHandler = nullptr;
|
u8* exceptionHandler = nullptr;
|
||||||
if (jit->jo.memcheck)
|
if (jo.memcheck)
|
||||||
{
|
{
|
||||||
auto it2 = m_exception_handler_at_loc.find(codePtr);
|
auto it2 = m_exception_handler_at_loc.find(codePtr);
|
||||||
if (it2 != m_exception_handler_at_loc.end())
|
if (it2 != m_exception_handler_at_loc.end())
|
||||||
|
@ -68,13 +68,13 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
|
||||||
// into the original code if necessary to ensure there is enough space
|
// into the original code if necessary to ensure there is enough space
|
||||||
// to insert the backpatch jump.)
|
// to insert the backpatch jump.)
|
||||||
|
|
||||||
jit->js.generatingTrampoline = true;
|
js.generatingTrampoline = true;
|
||||||
jit->js.trampolineExceptionHandler = exceptionHandler;
|
js.trampolineExceptionHandler = exceptionHandler;
|
||||||
|
|
||||||
// Generate the trampoline.
|
// Generate the trampoline.
|
||||||
const u8* trampoline = trampolines.GenerateTrampoline(info);
|
const u8* trampoline = trampolines.GenerateTrampoline(info);
|
||||||
jit->js.generatingTrampoline = false;
|
js.generatingTrampoline = false;
|
||||||
jit->js.trampolineExceptionHandler = nullptr;
|
js.trampolineExceptionHandler = nullptr;
|
||||||
|
|
||||||
u8* start = info.start;
|
u8* start = info.start;
|
||||||
|
|
||||||
|
|
|
@ -354,8 +354,8 @@ void JitIL::Cleanup()
|
||||||
{
|
{
|
||||||
// SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time.
|
// SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time.
|
||||||
if (MMCR0.Hex || MMCR1.Hex)
|
if (MMCR0.Hex || MMCR1.Hex)
|
||||||
ABI_CallFunctionCCC(PowerPC::UpdatePerformanceMonitor, js.downcountAmount,
|
ABI_CallFunctionCCC(PowerPC::UpdatePerformanceMonitor, js.downcountAmount, js.numLoadStoreInst,
|
||||||
jit->js.numLoadStoreInst, jit->js.numFloatingPointInst);
|
js.numFloatingPointInst);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitIL::WriteExit(u32 destination)
|
void JitIL::WriteExit(u32 destination)
|
||||||
|
@ -518,8 +518,8 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc
|
||||||
js.blockStart = em_address;
|
js.blockStart = em_address;
|
||||||
js.fifoBytesSinceCheck = 0;
|
js.fifoBytesSinceCheck = 0;
|
||||||
js.curBlock = b;
|
js.curBlock = b;
|
||||||
jit->js.numLoadStoreInst = 0;
|
js.numLoadStoreInst = 0;
|
||||||
jit->js.numFloatingPointInst = 0;
|
js.numFloatingPointInst = 0;
|
||||||
|
|
||||||
PPCAnalyst::CodeOp* ops = code_buf->codebuffer;
|
PPCAnalyst::CodeOp* ops = code_buf->codebuffer;
|
||||||
|
|
||||||
|
@ -610,7 +610,7 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc
|
||||||
if (type == HLE::HLE_HOOK_REPLACE)
|
if (type == HLE::HLE_HOOK_REPLACE)
|
||||||
{
|
{
|
||||||
MOV(32, R(EAX), PPCSTATE(npc));
|
MOV(32, R(EAX), PPCSTATE(npc));
|
||||||
jit->js.downcountAmount += jit->js.st.numCycles;
|
js.downcountAmount += js.st.numCycles;
|
||||||
WriteExitDestInOpArg(R(EAX));
|
WriteExitDestInOpArg(R(EAX));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -625,7 +625,7 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc
|
||||||
ibuild.EmitFPExceptionCheck(ibuild.EmitIntConst(ops[i].address));
|
ibuild.EmitFPExceptionCheck(ibuild.EmitIntConst(ops[i].address));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (jit->js.fifoWriteAddresses.find(js.compilerPC) != jit->js.fifoWriteAddresses.end())
|
if (js.fifoWriteAddresses.find(js.compilerPC) != js.fifoWriteAddresses.end())
|
||||||
{
|
{
|
||||||
ibuild.EmitExtExceptionCheck(ibuild.EmitIntConst(ops[i].address));
|
ibuild.EmitExtExceptionCheck(ibuild.EmitIntConst(ops[i].address));
|
||||||
}
|
}
|
||||||
|
@ -648,10 +648,10 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc
|
||||||
}
|
}
|
||||||
|
|
||||||
if (opinfo->flags & FL_LOADSTORE)
|
if (opinfo->flags & FL_LOADSTORE)
|
||||||
++jit->js.numLoadStoreInst;
|
++js.numLoadStoreInst;
|
||||||
|
|
||||||
if (opinfo->flags & FL_USE_FPU)
|
if (opinfo->flags & FL_USE_FPU)
|
||||||
++jit->js.numFloatingPointInst;
|
++js.numFloatingPointInst;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue