Merge pull request #1668 from Tilka/memop_cleanup
Jit64: clean up casts in memory operands
This commit is contained in:
commit
3a149c3aab
|
@ -194,7 +194,6 @@ private:
|
|||
u16 indexReg;
|
||||
};
|
||||
|
||||
inline OpArg M(const void *ptr) {return OpArg((u64)ptr, (int)SCALE_RIP);}
|
||||
template <typename T>
|
||||
inline OpArg M(const T *ptr) {return OpArg((u64)(const void *)ptr, (int)SCALE_RIP);}
|
||||
inline OpArg R(X64Reg value) {return OpArg(0, SCALE_NONE, value);}
|
||||
|
|
|
@ -694,7 +694,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
|||
SetJumpTarget(extException);
|
||||
TEST(32, PPCSTATE(msr), Imm32(0x0008000));
|
||||
FixupBranch noExtIntEnable = J_CC(CC_Z, true);
|
||||
TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH));
|
||||
TEST(32, M(&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH));
|
||||
FixupBranch noCPInt = J_CC(CC_Z, true);
|
||||
|
||||
gpr.Flush(FLUSH_MAINTAIN_STATE);
|
||||
|
@ -722,7 +722,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
|||
ABI_PushRegistersAndAdjustStack({}, 0);
|
||||
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckBreakPoints));
|
||||
ABI_PopRegistersAndAdjustStack({}, 0);
|
||||
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
|
||||
TEST(32, M(PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
|
||||
FixupBranch noBreakpoint = J_CC(CC_Z);
|
||||
|
||||
WriteExit(ops[i].address);
|
||||
|
|
|
@ -70,12 +70,12 @@ void Jit64AsmRoutineManager::Generate()
|
|||
|
||||
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging)
|
||||
{
|
||||
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(PowerPC::CPU_STEPPING));
|
||||
TEST(32, M(PowerPC::GetStatePtr()), Imm32(PowerPC::CPU_STEPPING));
|
||||
FixupBranch notStepping = J_CC(CC_Z);
|
||||
ABI_PushRegistersAndAdjustStack({}, 0);
|
||||
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckBreakPoints));
|
||||
ABI_PopRegistersAndAdjustStack({}, 0);
|
||||
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
|
||||
TEST(32, M(PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
|
||||
dbg_exit = J_CC(CC_NZ, true);
|
||||
SetJumpTarget(notStepping);
|
||||
}
|
||||
|
@ -151,7 +151,7 @@ void Jit64AsmRoutineManager::Generate()
|
|||
ABI_PopRegistersAndAdjustStack({}, 0);
|
||||
SetJumpTarget(noExtException);
|
||||
|
||||
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
|
||||
TEST(32, M(PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
|
||||
J_CC(CC_Z, outerLoop);
|
||||
|
||||
//Landing pad for drec space
|
||||
|
|
|
@ -209,7 +209,7 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
|
|||
ADDSD(XMM0, fpr.R(b));
|
||||
}
|
||||
if (inst.SUBOP5 == 31) //nmadd
|
||||
PXOR(XMM0, M((void*)&(packed ? psSignBits2 : psSignBits)));
|
||||
PXOR(XMM0, M(packed ? psSignBits2 : psSignBits));
|
||||
}
|
||||
|
||||
fpr.BindToRegister(d, !single);
|
||||
|
@ -252,13 +252,13 @@ void Jit64::fsign(UGeckoInstruction inst)
|
|||
case 40: // fnegx
|
||||
// We can cheat and not worry about clobbering the top half by using masks
|
||||
// that don't modify the top half.
|
||||
PXOR(fpr.RX(d), M((void*)&psSignBits));
|
||||
PXOR(fpr.RX(d), M(psSignBits));
|
||||
break;
|
||||
case 264: // fabsx
|
||||
PAND(fpr.RX(d), M((void*)&psAbsMask));
|
||||
PAND(fpr.RX(d), M(psAbsMask));
|
||||
break;
|
||||
case 136: // fnabs
|
||||
POR(fpr.RX(d), M((void*)&psSignBits));
|
||||
POR(fpr.RX(d), M(psSignBits));
|
||||
break;
|
||||
default:
|
||||
PanicAlert("fsign bleh");
|
||||
|
@ -463,7 +463,7 @@ void Jit64::fctiwx(UGeckoInstruction inst)
|
|||
// The upper 32 bits of the result are set to 0xfff80000,
|
||||
// except for -0.0 where they are set to 0xfff80001 (TODO).
|
||||
|
||||
MOVAPD(XMM0, M(&half_qnan_and_s32_max));
|
||||
MOVAPD(XMM0, M(half_qnan_and_s32_max));
|
||||
MINSD(XMM0, fpr.R(b));
|
||||
switch (inst.SUBOP10)
|
||||
{
|
||||
|
|
|
@ -77,13 +77,13 @@ void Jit64::ps_sign(UGeckoInstruction inst)
|
|||
switch (inst.SUBOP10)
|
||||
{
|
||||
case 40: //neg
|
||||
avx_op(&XEmitter::VPXOR, &XEmitter::PXOR, fpr.RX(d), fpr.R(b), M((void*)&psSignBits));
|
||||
avx_op(&XEmitter::VPXOR, &XEmitter::PXOR, fpr.RX(d), fpr.R(b), M(psSignBits));
|
||||
break;
|
||||
case 136: //nabs
|
||||
avx_op(&XEmitter::VPOR, &XEmitter::POR, fpr.RX(d), fpr.R(b), M((void*)&psSignBits));
|
||||
avx_op(&XEmitter::VPOR, &XEmitter::POR, fpr.RX(d), fpr.R(b), M(psSignBits));
|
||||
break;
|
||||
case 264: //abs
|
||||
avx_op(&XEmitter::VPAND, &XEmitter::PAND, fpr.RX(d), fpr.R(b), M((void*)&psAbsMask));
|
||||
avx_op(&XEmitter::VPAND, &XEmitter::PAND, fpr.RX(d), fpr.R(b), M(psAbsMask));
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -372,12 +372,12 @@ void Jit64::ps_maddXX(UGeckoInstruction inst)
|
|||
case 30: //nmsub
|
||||
MULPD(XMM0, fpr.R(a));
|
||||
SUBPD(XMM0, fpr.R(b));
|
||||
PXOR(XMM0, M((void*)&psSignBits));
|
||||
PXOR(XMM0, M(psSignBits));
|
||||
break;
|
||||
case 31: //nmadd
|
||||
MULPD(XMM0, fpr.R(a));
|
||||
ADDPD(XMM0, fpr.R(b));
|
||||
PXOR(XMM0, M((void*)&psSignBits));
|
||||
PXOR(XMM0, M(psSignBits));
|
||||
break;
|
||||
default:
|
||||
_assert_msg_(DYNA_REC, 0, "ps_maddXX WTF!!!");
|
||||
|
|
|
@ -334,7 +334,7 @@ void Jit64::mtmsr(UGeckoInstruction inst)
|
|||
FixupBranch noExceptionsPending = J_CC(CC_Z);
|
||||
|
||||
// Check if a CP interrupt is waiting and keep the GPU emulation in sync (issue 4336)
|
||||
TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP));
|
||||
TEST(32, M(&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP));
|
||||
FixupBranch cpInt = J_CC(CC_NZ);
|
||||
|
||||
MOV(32, PPCSTATE(pc), Imm32(js.compilerPC + 4));
|
||||
|
|
|
@ -1721,7 +1721,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
|
||||
X64Reg reg = fregURegWithMov(RI, I);
|
||||
static const u32 GC_ALIGNED16(ssSignBits[4]) = {0x80000000};
|
||||
Jit->PXOR(reg, M((void*)&ssSignBits));
|
||||
Jit->PXOR(reg, M(ssSignBits));
|
||||
RI.fregs[reg] = I;
|
||||
fregNormalRegClear(RI, I);
|
||||
break;
|
||||
|
@ -1733,7 +1733,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
|
||||
X64Reg reg = fregURegWithMov(RI, I);
|
||||
static const u64 GC_ALIGNED16(sdSignBits[2]) = {0x8000000000000000ULL};
|
||||
Jit->PXOR(reg, M((void*)&sdSignBits));
|
||||
Jit->PXOR(reg, M(sdSignBits));
|
||||
RI.fregs[reg] = I;
|
||||
fregNormalRegClear(RI, I);
|
||||
break;
|
||||
|
@ -1745,7 +1745,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
|
||||
X64Reg reg = fregURegWithMov(RI, I);
|
||||
static const u32 GC_ALIGNED16(psSignBits[4]) = {0x80000000, 0x80000000};
|
||||
Jit->PXOR(reg, M((void*)&psSignBits));
|
||||
Jit->PXOR(reg, M(psSignBits));
|
||||
RI.fregs[reg] = I;
|
||||
fregNormalRegClear(RI, I);
|
||||
break;
|
||||
|
@ -2266,7 +2266,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
FixupBranch noExtException = Jit->J_CC(CC_Z);
|
||||
Jit->TEST(32, PPCSTATE(msr), Imm32(0x0008000));
|
||||
FixupBranch noExtIntEnable = Jit->J_CC(CC_Z);
|
||||
Jit->TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH));
|
||||
Jit->TEST(32, M(&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH));
|
||||
FixupBranch noCPInt = Jit->J_CC(CC_Z);
|
||||
|
||||
Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc));
|
||||
|
@ -2284,7 +2284,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
|
||||
Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc));
|
||||
Jit->ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckBreakPoints));
|
||||
Jit->TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
|
||||
Jit->TEST(32, M(PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
|
||||
FixupBranch noBreakpoint = Jit->J_CC(CC_Z);
|
||||
Jit->WriteExit(InstLoc);
|
||||
Jit->SetJumpTarget(noBreakpoint);
|
||||
|
|
|
@ -267,7 +267,7 @@ void CommonAsmRoutines::GenQuantizedStores()
|
|||
MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
#ifdef QUANTIZE_OVERFLOW_SAFE
|
||||
MINPS(XMM0, M((void *)&m_65535));
|
||||
MINPS(XMM0, M(m_65535));
|
||||
#endif
|
||||
CVTTPS2DQ(XMM0, R(XMM0));
|
||||
PACKSSDW(XMM0, R(XMM0));
|
||||
|
@ -282,7 +282,7 @@ void CommonAsmRoutines::GenQuantizedStores()
|
|||
MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
#ifdef QUANTIZE_OVERFLOW_SAFE
|
||||
MINPS(XMM0, M((void *)&m_65535));
|
||||
MINPS(XMM0, M(m_65535));
|
||||
#endif
|
||||
CVTTPS2DQ(XMM0, R(XMM0));
|
||||
PACKSSDW(XMM0, R(XMM0));
|
||||
|
@ -301,7 +301,7 @@ void CommonAsmRoutines::GenQuantizedStores()
|
|||
if (cpu_info.bSSE4_1)
|
||||
{
|
||||
#ifdef QUANTIZE_OVERFLOW_SAFE
|
||||
MINPS(XMM0, M((void *)&m_65535));
|
||||
MINPS(XMM0, M(m_65535));
|
||||
#endif
|
||||
CVTTPS2DQ(XMM0, R(XMM0));
|
||||
PACKUSDW(XMM0, R(XMM0));
|
||||
|
@ -313,15 +313,15 @@ void CommonAsmRoutines::GenQuantizedStores()
|
|||
{
|
||||
XORPS(XMM1, R(XMM1));
|
||||
MAXPS(XMM0, R(XMM1));
|
||||
MINPS(XMM0, M((void *)&m_65535));
|
||||
MINPS(XMM0, M(m_65535));
|
||||
|
||||
CVTTPS2DQ(XMM0, R(XMM0));
|
||||
MOVQ_xmm(M(psTemp), XMM0);
|
||||
// place ps[0] into the higher word, ps[1] into the lower
|
||||
// so no need in ROL after BSWAP
|
||||
MOVZX(32, 16, RSCRATCH, M((char*)psTemp + 0));
|
||||
MOVZX(32, 16, RSCRATCH, M(&psTemp[0]));
|
||||
SHL(32, R(RSCRATCH), Imm8(16));
|
||||
MOV(16, R(RSCRATCH), M((char*)psTemp + 4));
|
||||
MOV(16, R(RSCRATCH), M(&psTemp[1]));
|
||||
BSWAP(32, RSCRATCH);
|
||||
}
|
||||
|
||||
|
@ -334,7 +334,7 @@ void CommonAsmRoutines::GenQuantizedStores()
|
|||
MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
#ifdef QUANTIZE_OVERFLOW_SAFE
|
||||
MINPS(XMM0, M((void *)&m_65535));
|
||||
MINPS(XMM0, M(m_65535));
|
||||
#endif
|
||||
CVTTPS2DQ(XMM0, R(XMM0));
|
||||
PACKSSDW(XMM0, R(XMM0));
|
||||
|
@ -372,7 +372,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores()
|
|||
/*
|
||||
if (cpu_info.bSSSE3)
|
||||
{
|
||||
PSHUFB(XMM0, M((void *)pbswapShuffle2x4));
|
||||
PSHUFB(XMM0, M(pbswapShuffle2x4));
|
||||
// TODO: SafeWriteFloat
|
||||
MOVSS(M(&psTemp[0]), XMM0);
|
||||
MOV(32, R(RSCRATCH), M(&psTemp[0]));
|
||||
|
@ -390,7 +390,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores()
|
|||
MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS));
|
||||
XORPS(XMM1, R(XMM1));
|
||||
MAXSS(XMM0, R(XMM1));
|
||||
MINSS(XMM0, M((void *)&m_255));
|
||||
MINSS(XMM0, M(&m_255));
|
||||
CVTTSS2SI(RSCRATCH, R(XMM0));
|
||||
SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
RET();
|
||||
|
@ -398,8 +398,8 @@ void CommonAsmRoutines::GenQuantizedSingleStores()
|
|||
const u8* storeSingleS8 = AlignCode4();
|
||||
SHR(32, R(RSCRATCH2), Imm8(5));
|
||||
MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS));
|
||||
MAXSS(XMM0, M((void *)&m_m128));
|
||||
MINSS(XMM0, M((void *)&m_127));
|
||||
MAXSS(XMM0, M(&m_m128));
|
||||
MINSS(XMM0, M(&m_127));
|
||||
CVTTSS2SI(RSCRATCH, R(XMM0));
|
||||
SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
RET();
|
||||
|
@ -409,7 +409,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores()
|
|||
MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS));
|
||||
XORPS(XMM1, R(XMM1));
|
||||
MAXSS(XMM0, R(XMM1));
|
||||
MINSS(XMM0, M((void *)&m_65535));
|
||||
MINSS(XMM0, M(m_65535));
|
||||
CVTTSS2SI(RSCRATCH, R(XMM0));
|
||||
SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
RET();
|
||||
|
@ -417,8 +417,8 @@ void CommonAsmRoutines::GenQuantizedSingleStores()
|
|||
const u8* storeSingleS16 = AlignCode4();
|
||||
SHR(32, R(RSCRATCH2), Imm8(5));
|
||||
MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS));
|
||||
MAXSS(XMM0, M((void *)&m_m32768));
|
||||
MINSS(XMM0, M((void *)&m_32767));
|
||||
MAXSS(XMM0, M(&m_m32768));
|
||||
MINSS(XMM0, M(&m_32767));
|
||||
CVTTSS2SI(RSCRATCH, R(XMM0));
|
||||
SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
RET();
|
||||
|
@ -451,7 +451,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
|
|||
else if (cpu_info.bSSSE3)
|
||||
{
|
||||
MOVQ_xmm(XMM0, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0));
|
||||
PSHUFB(XMM0, M((void *)pbswapShuffle2x4));
|
||||
PSHUFB(XMM0, M(pbswapShuffle2x4));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -466,19 +466,19 @@ void CommonAsmRoutines::GenQuantizedLoads()
|
|||
{
|
||||
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 32, 0, QUANTIZED_REGS_TO_SAVE, false, SAFE_LOADSTORE_NO_PROLOG);
|
||||
MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
|
||||
UNPCKLPS(XMM0, M((void*)m_one));
|
||||
UNPCKLPS(XMM0, M(m_one));
|
||||
}
|
||||
else if (cpu_info.bSSSE3)
|
||||
{
|
||||
MOVD_xmm(XMM0, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0));
|
||||
PSHUFB(XMM0, M((void *)pbswapShuffle1x4));
|
||||
UNPCKLPS(XMM0, M((void*)m_one));
|
||||
PSHUFB(XMM0, M(pbswapShuffle1x4));
|
||||
UNPCKLPS(XMM0, M(m_one));
|
||||
}
|
||||
else
|
||||
{
|
||||
LoadAndSwap(32, RSCRATCH_EXTRA, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0));
|
||||
MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
|
||||
UNPCKLPS(XMM0, M((void*)m_one));
|
||||
UNPCKLPS(XMM0, M(m_one));
|
||||
}
|
||||
RET();
|
||||
|
||||
|
@ -518,7 +518,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
|
|||
CVTSI2SS(XMM0, R(RSCRATCH_EXTRA));
|
||||
SHR(32, R(RSCRATCH2), Imm8(5));
|
||||
MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS));
|
||||
UNPCKLPS(XMM0, M((void*)m_one));
|
||||
UNPCKLPS(XMM0, M(m_one));
|
||||
RET();
|
||||
|
||||
const u8* loadPairedS8Two = AlignCode4();
|
||||
|
@ -557,7 +557,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
|
|||
CVTSI2SS(XMM0, R(RSCRATCH_EXTRA));
|
||||
SHR(32, R(RSCRATCH2), Imm8(5));
|
||||
MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS));
|
||||
UNPCKLPS(XMM0, M((void*)m_one));
|
||||
UNPCKLPS(XMM0, M(m_one));
|
||||
RET();
|
||||
|
||||
const u8* loadPairedU16Two = AlignCode4();
|
||||
|
@ -591,7 +591,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
|
|||
CVTSI2SS(XMM0, R(RSCRATCH_EXTRA));
|
||||
SHR(32, R(RSCRATCH2), Imm8(5));
|
||||
MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS));
|
||||
UNPCKLPS(XMM0, M((void*)m_one));
|
||||
UNPCKLPS(XMM0, M(m_one));
|
||||
RET();
|
||||
|
||||
const u8* loadPairedS16Two = AlignCode4();
|
||||
|
@ -624,7 +624,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
|
|||
CVTSI2SS(XMM0, R(RSCRATCH_EXTRA));
|
||||
SHR(32, R(RSCRATCH2), Imm8(5));
|
||||
MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS));
|
||||
UNPCKLPS(XMM0, M((void*)m_one));
|
||||
UNPCKLPS(XMM0, M(m_one));
|
||||
RET();
|
||||
|
||||
pairedLoadQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCode16()));
|
||||
|
|
|
@ -790,16 +790,16 @@ void EmuCodeBlock::Force25BitPrecision(X64Reg output, OpArg input, X64Reg tmp)
|
|||
// mantissa = (mantissa & ~0xFFFFFFF) + ((mantissa & (1ULL << 27)) << 1);
|
||||
if (input.IsSimpleReg() && cpu_info.bAVX)
|
||||
{
|
||||
VPAND(tmp, input.GetSimpleReg(), M((void*)&psRoundBit));
|
||||
VPAND(output, input.GetSimpleReg(), M((void*)&psMantissaTruncate));
|
||||
VPAND(tmp, input.GetSimpleReg(), M(psRoundBit));
|
||||
VPAND(output, input.GetSimpleReg(), M(psMantissaTruncate));
|
||||
PADDQ(output, R(tmp));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!input.IsSimpleReg() || input.GetSimpleReg() != output)
|
||||
MOVAPD(output, input);
|
||||
avx_op(&XEmitter::VPAND, &XEmitter::PAND, tmp, R(output), M((void*)&psRoundBit), true, true);
|
||||
PAND(output, M((void*)&psMantissaTruncate));
|
||||
avx_op(&XEmitter::VPAND, &XEmitter::PAND, tmp, R(output), M(psRoundBit), true, true);
|
||||
PAND(output, M(psMantissaTruncate));
|
||||
PADDQ(output, R(tmp));
|
||||
}
|
||||
}
|
||||
|
@ -842,7 +842,7 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
|
|||
MOVSD(XMM1, R(src));
|
||||
|
||||
// Grab Exponent
|
||||
PAND(XMM1, M((void *)&double_exponent));
|
||||
PAND(XMM1, M(&double_exponent));
|
||||
PSRLQ(XMM1, 52);
|
||||
MOVD_xmm(R(RSCRATCH), XMM1);
|
||||
|
||||
|
@ -862,15 +862,15 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
|
|||
|
||||
// xmm1 = fraction | 0x0010000000000000
|
||||
MOVSD(XMM1, R(src));
|
||||
PAND(XMM1, M((void *)&double_fraction));
|
||||
POR(XMM1, M((void *)&double_explicit_top_bit));
|
||||
PAND(XMM1, M(&double_fraction));
|
||||
POR(XMM1, M(&double_explicit_top_bit));
|
||||
|
||||
// fraction >> shift
|
||||
PSRLQ(XMM1, R(XMM0));
|
||||
|
||||
// OR the sign bit in.
|
||||
MOVSD(XMM0, R(src));
|
||||
PAND(XMM0, M((void *)&double_sign_bit));
|
||||
PAND(XMM0, M(&double_sign_bit));
|
||||
PSRLQ(XMM0, 32);
|
||||
POR(XMM1, R(XMM0));
|
||||
|
||||
|
@ -883,12 +883,12 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
|
|||
|
||||
// We want bits 0, 1
|
||||
MOVSD(XMM1, R(src));
|
||||
PAND(XMM1, M((void *)&double_top_two_bits));
|
||||
PAND(XMM1, M(&double_top_two_bits));
|
||||
PSRLQ(XMM1, 32);
|
||||
|
||||
// And 5 through to 34
|
||||
MOVSD(XMM0, R(src));
|
||||
PAND(XMM0, M((void *)&double_bottom_bits));
|
||||
PAND(XMM0, M(&double_bottom_bits));
|
||||
PSRLQ(XMM0, 29);
|
||||
|
||||
// OR them togther
|
||||
|
@ -988,7 +988,7 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
|
|||
{
|
||||
MOVQ_xmm(R(RSCRATCH), xmm);
|
||||
SHR(64, R(RSCRATCH), Imm8(63)); // Get the sign bit; almost all the branches need it.
|
||||
PTEST(xmm, M((void*)psDoubleExp));
|
||||
PTEST(xmm, M(psDoubleExp));
|
||||
FixupBranch maxExponent = J_CC(CC_C);
|
||||
FixupBranch zeroExponent = J_CC(CC_Z);
|
||||
|
||||
|
@ -997,7 +997,7 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
|
|||
continue1 = J();
|
||||
|
||||
SetJumpTarget(maxExponent);
|
||||
PTEST(xmm, M((void*)psDoubleFrac));
|
||||
PTEST(xmm, M(psDoubleFrac));
|
||||
FixupBranch notNAN = J_CC(CC_Z);
|
||||
|
||||
// Max exponent + mantissa: PPC_FPCLASS_QNAN
|
||||
|
@ -1025,10 +1025,10 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
|
|||
else
|
||||
{
|
||||
MOVQ_xmm(R(RSCRATCH), xmm);
|
||||
TEST(64, R(RSCRATCH), M((void*)psDoubleExp));
|
||||
TEST(64, R(RSCRATCH), M(psDoubleExp));
|
||||
FixupBranch zeroExponent = J_CC(CC_Z);
|
||||
AND(64, R(RSCRATCH), M((void*)psDoubleNoSign));
|
||||
CMP(64, R(RSCRATCH), M((void*)psDoubleExp));
|
||||
AND(64, R(RSCRATCH), M(psDoubleNoSign));
|
||||
CMP(64, R(RSCRATCH), M(psDoubleExp));
|
||||
FixupBranch nan = J_CC(CC_G); // This works because if the sign bit is set, RSCRATCH is negative
|
||||
FixupBranch infinity = J_CC(CC_E);
|
||||
MOVQ_xmm(R(RSCRATCH), xmm);
|
||||
|
|
|
@ -29,7 +29,7 @@ namespace MMIO { class Mapping; }
|
|||
#define PPCSTATE(x) MDisp(RPPCSTATE, \
|
||||
(int) ((char *) &PowerPC::ppcState.x - (char *) &PowerPC::ppcState) - 0x80)
|
||||
// In case you want to disable the ppcstate register:
|
||||
// #define PPCSTATE(x) M((void*) &PowerPC::ppcState.x)
|
||||
// #define PPCSTATE(x) M(&PowerPC::ppcState.x)
|
||||
#define PPCSTATE_LR PPCSTATE(spr[SPR_LR])
|
||||
#define PPCSTATE_CTR PPCSTATE(spr[SPR_CTR])
|
||||
#define PPCSTATE_SRR0 PPCSTATE(spr[SPR_SRR0])
|
||||
|
|
Loading…
Reference in New Issue