Merge pull request #1668 from Tilka/memop_cleanup

Jit64: clean up casts in memory operands
This commit is contained in:
Lioncash 2014-12-07 15:10:01 -05:00
commit 3a149c3aab
10 changed files with 60 additions and 61 deletions

View File

@ -194,7 +194,6 @@ private:
u16 indexReg; u16 indexReg;
}; };
inline OpArg M(const void *ptr) {return OpArg((u64)ptr, (int)SCALE_RIP);}
template <typename T> template <typename T>
inline OpArg M(const T *ptr) {return OpArg((u64)(const void *)ptr, (int)SCALE_RIP);} inline OpArg M(const T *ptr) {return OpArg((u64)(const void *)ptr, (int)SCALE_RIP);}
inline OpArg R(X64Reg value) {return OpArg(0, SCALE_NONE, value);} inline OpArg R(X64Reg value) {return OpArg(0, SCALE_NONE, value);}

View File

@ -694,7 +694,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
SetJumpTarget(extException); SetJumpTarget(extException);
TEST(32, PPCSTATE(msr), Imm32(0x0008000)); TEST(32, PPCSTATE(msr), Imm32(0x0008000));
FixupBranch noExtIntEnable = J_CC(CC_Z, true); FixupBranch noExtIntEnable = J_CC(CC_Z, true);
TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH)); TEST(32, M(&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH));
FixupBranch noCPInt = J_CC(CC_Z, true); FixupBranch noCPInt = J_CC(CC_Z, true);
gpr.Flush(FLUSH_MAINTAIN_STATE); gpr.Flush(FLUSH_MAINTAIN_STATE);
@ -722,7 +722,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
ABI_PushRegistersAndAdjustStack({}, 0); ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckBreakPoints)); ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckBreakPoints));
ABI_PopRegistersAndAdjustStack({}, 0); ABI_PopRegistersAndAdjustStack({}, 0);
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); TEST(32, M(PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
FixupBranch noBreakpoint = J_CC(CC_Z); FixupBranch noBreakpoint = J_CC(CC_Z);
WriteExit(ops[i].address); WriteExit(ops[i].address);

View File

@ -70,12 +70,12 @@ void Jit64AsmRoutineManager::Generate()
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging) if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging)
{ {
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(PowerPC::CPU_STEPPING)); TEST(32, M(PowerPC::GetStatePtr()), Imm32(PowerPC::CPU_STEPPING));
FixupBranch notStepping = J_CC(CC_Z); FixupBranch notStepping = J_CC(CC_Z);
ABI_PushRegistersAndAdjustStack({}, 0); ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckBreakPoints)); ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckBreakPoints));
ABI_PopRegistersAndAdjustStack({}, 0); ABI_PopRegistersAndAdjustStack({}, 0);
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); TEST(32, M(PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
dbg_exit = J_CC(CC_NZ, true); dbg_exit = J_CC(CC_NZ, true);
SetJumpTarget(notStepping); SetJumpTarget(notStepping);
} }
@ -151,7 +151,7 @@ void Jit64AsmRoutineManager::Generate()
ABI_PopRegistersAndAdjustStack({}, 0); ABI_PopRegistersAndAdjustStack({}, 0);
SetJumpTarget(noExtException); SetJumpTarget(noExtException);
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); TEST(32, M(PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
J_CC(CC_Z, outerLoop); J_CC(CC_Z, outerLoop);
//Landing pad for drec space //Landing pad for drec space

View File

@ -209,7 +209,7 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
ADDSD(XMM0, fpr.R(b)); ADDSD(XMM0, fpr.R(b));
} }
if (inst.SUBOP5 == 31) //nmadd if (inst.SUBOP5 == 31) //nmadd
PXOR(XMM0, M((void*)&(packed ? psSignBits2 : psSignBits))); PXOR(XMM0, M(packed ? psSignBits2 : psSignBits));
} }
fpr.BindToRegister(d, !single); fpr.BindToRegister(d, !single);
@ -252,13 +252,13 @@ void Jit64::fsign(UGeckoInstruction inst)
case 40: // fnegx case 40: // fnegx
// We can cheat and not worry about clobbering the top half by using masks // We can cheat and not worry about clobbering the top half by using masks
// that don't modify the top half. // that don't modify the top half.
PXOR(fpr.RX(d), M((void*)&psSignBits)); PXOR(fpr.RX(d), M(psSignBits));
break; break;
case 264: // fabsx case 264: // fabsx
PAND(fpr.RX(d), M((void*)&psAbsMask)); PAND(fpr.RX(d), M(psAbsMask));
break; break;
case 136: // fnabs case 136: // fnabs
POR(fpr.RX(d), M((void*)&psSignBits)); POR(fpr.RX(d), M(psSignBits));
break; break;
default: default:
PanicAlert("fsign bleh"); PanicAlert("fsign bleh");
@ -463,7 +463,7 @@ void Jit64::fctiwx(UGeckoInstruction inst)
// The upper 32 bits of the result are set to 0xfff80000, // The upper 32 bits of the result are set to 0xfff80000,
// except for -0.0 where they are set to 0xfff80001 (TODO). // except for -0.0 where they are set to 0xfff80001 (TODO).
MOVAPD(XMM0, M(&half_qnan_and_s32_max)); MOVAPD(XMM0, M(half_qnan_and_s32_max));
MINSD(XMM0, fpr.R(b)); MINSD(XMM0, fpr.R(b));
switch (inst.SUBOP10) switch (inst.SUBOP10)
{ {

View File

@ -77,13 +77,13 @@ void Jit64::ps_sign(UGeckoInstruction inst)
switch (inst.SUBOP10) switch (inst.SUBOP10)
{ {
case 40: //neg case 40: //neg
avx_op(&XEmitter::VPXOR, &XEmitter::PXOR, fpr.RX(d), fpr.R(b), M((void*)&psSignBits)); avx_op(&XEmitter::VPXOR, &XEmitter::PXOR, fpr.RX(d), fpr.R(b), M(psSignBits));
break; break;
case 136: //nabs case 136: //nabs
avx_op(&XEmitter::VPOR, &XEmitter::POR, fpr.RX(d), fpr.R(b), M((void*)&psSignBits)); avx_op(&XEmitter::VPOR, &XEmitter::POR, fpr.RX(d), fpr.R(b), M(psSignBits));
break; break;
case 264: //abs case 264: //abs
avx_op(&XEmitter::VPAND, &XEmitter::PAND, fpr.RX(d), fpr.R(b), M((void*)&psAbsMask)); avx_op(&XEmitter::VPAND, &XEmitter::PAND, fpr.RX(d), fpr.R(b), M(psAbsMask));
break; break;
} }
@ -372,12 +372,12 @@ void Jit64::ps_maddXX(UGeckoInstruction inst)
case 30: //nmsub case 30: //nmsub
MULPD(XMM0, fpr.R(a)); MULPD(XMM0, fpr.R(a));
SUBPD(XMM0, fpr.R(b)); SUBPD(XMM0, fpr.R(b));
PXOR(XMM0, M((void*)&psSignBits)); PXOR(XMM0, M(psSignBits));
break; break;
case 31: //nmadd case 31: //nmadd
MULPD(XMM0, fpr.R(a)); MULPD(XMM0, fpr.R(a));
ADDPD(XMM0, fpr.R(b)); ADDPD(XMM0, fpr.R(b));
PXOR(XMM0, M((void*)&psSignBits)); PXOR(XMM0, M(psSignBits));
break; break;
default: default:
_assert_msg_(DYNA_REC, 0, "ps_maddXX WTF!!!"); _assert_msg_(DYNA_REC, 0, "ps_maddXX WTF!!!");

View File

@ -334,7 +334,7 @@ void Jit64::mtmsr(UGeckoInstruction inst)
FixupBranch noExceptionsPending = J_CC(CC_Z); FixupBranch noExceptionsPending = J_CC(CC_Z);
// Check if a CP interrupt is waiting and keep the GPU emulation in sync (issue 4336) // Check if a CP interrupt is waiting and keep the GPU emulation in sync (issue 4336)
TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP)); TEST(32, M(&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP));
FixupBranch cpInt = J_CC(CC_NZ); FixupBranch cpInt = J_CC(CC_NZ);
MOV(32, PPCSTATE(pc), Imm32(js.compilerPC + 4)); MOV(32, PPCSTATE(pc), Imm32(js.compilerPC + 4));

View File

@ -1721,7 +1721,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
X64Reg reg = fregURegWithMov(RI, I); X64Reg reg = fregURegWithMov(RI, I);
static const u32 GC_ALIGNED16(ssSignBits[4]) = {0x80000000}; static const u32 GC_ALIGNED16(ssSignBits[4]) = {0x80000000};
Jit->PXOR(reg, M((void*)&ssSignBits)); Jit->PXOR(reg, M(ssSignBits));
RI.fregs[reg] = I; RI.fregs[reg] = I;
fregNormalRegClear(RI, I); fregNormalRegClear(RI, I);
break; break;
@ -1733,7 +1733,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
X64Reg reg = fregURegWithMov(RI, I); X64Reg reg = fregURegWithMov(RI, I);
static const u64 GC_ALIGNED16(sdSignBits[2]) = {0x8000000000000000ULL}; static const u64 GC_ALIGNED16(sdSignBits[2]) = {0x8000000000000000ULL};
Jit->PXOR(reg, M((void*)&sdSignBits)); Jit->PXOR(reg, M(sdSignBits));
RI.fregs[reg] = I; RI.fregs[reg] = I;
fregNormalRegClear(RI, I); fregNormalRegClear(RI, I);
break; break;
@ -1745,7 +1745,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
X64Reg reg = fregURegWithMov(RI, I); X64Reg reg = fregURegWithMov(RI, I);
static const u32 GC_ALIGNED16(psSignBits[4]) = {0x80000000, 0x80000000}; static const u32 GC_ALIGNED16(psSignBits[4]) = {0x80000000, 0x80000000};
Jit->PXOR(reg, M((void*)&psSignBits)); Jit->PXOR(reg, M(psSignBits));
RI.fregs[reg] = I; RI.fregs[reg] = I;
fregNormalRegClear(RI, I); fregNormalRegClear(RI, I);
break; break;
@ -2266,7 +2266,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
FixupBranch noExtException = Jit->J_CC(CC_Z); FixupBranch noExtException = Jit->J_CC(CC_Z);
Jit->TEST(32, PPCSTATE(msr), Imm32(0x0008000)); Jit->TEST(32, PPCSTATE(msr), Imm32(0x0008000));
FixupBranch noExtIntEnable = Jit->J_CC(CC_Z); FixupBranch noExtIntEnable = Jit->J_CC(CC_Z);
Jit->TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH)); Jit->TEST(32, M(&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH));
FixupBranch noCPInt = Jit->J_CC(CC_Z); FixupBranch noCPInt = Jit->J_CC(CC_Z);
Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc)); Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc));
@ -2284,7 +2284,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc)); Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc));
Jit->ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckBreakPoints)); Jit->ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckBreakPoints));
Jit->TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); Jit->TEST(32, M(PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
FixupBranch noBreakpoint = Jit->J_CC(CC_Z); FixupBranch noBreakpoint = Jit->J_CC(CC_Z);
Jit->WriteExit(InstLoc); Jit->WriteExit(InstLoc);
Jit->SetJumpTarget(noBreakpoint); Jit->SetJumpTarget(noBreakpoint);

View File

@ -267,7 +267,7 @@ void CommonAsmRoutines::GenQuantizedStores()
MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS));
MULPS(XMM0, R(XMM1)); MULPS(XMM0, R(XMM1));
#ifdef QUANTIZE_OVERFLOW_SAFE #ifdef QUANTIZE_OVERFLOW_SAFE
MINPS(XMM0, M((void *)&m_65535)); MINPS(XMM0, M(m_65535));
#endif #endif
CVTTPS2DQ(XMM0, R(XMM0)); CVTTPS2DQ(XMM0, R(XMM0));
PACKSSDW(XMM0, R(XMM0)); PACKSSDW(XMM0, R(XMM0));
@ -282,7 +282,7 @@ void CommonAsmRoutines::GenQuantizedStores()
MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS));
MULPS(XMM0, R(XMM1)); MULPS(XMM0, R(XMM1));
#ifdef QUANTIZE_OVERFLOW_SAFE #ifdef QUANTIZE_OVERFLOW_SAFE
MINPS(XMM0, M((void *)&m_65535)); MINPS(XMM0, M(m_65535));
#endif #endif
CVTTPS2DQ(XMM0, R(XMM0)); CVTTPS2DQ(XMM0, R(XMM0));
PACKSSDW(XMM0, R(XMM0)); PACKSSDW(XMM0, R(XMM0));
@ -301,7 +301,7 @@ void CommonAsmRoutines::GenQuantizedStores()
if (cpu_info.bSSE4_1) if (cpu_info.bSSE4_1)
{ {
#ifdef QUANTIZE_OVERFLOW_SAFE #ifdef QUANTIZE_OVERFLOW_SAFE
MINPS(XMM0, M((void *)&m_65535)); MINPS(XMM0, M(m_65535));
#endif #endif
CVTTPS2DQ(XMM0, R(XMM0)); CVTTPS2DQ(XMM0, R(XMM0));
PACKUSDW(XMM0, R(XMM0)); PACKUSDW(XMM0, R(XMM0));
@ -313,15 +313,15 @@ void CommonAsmRoutines::GenQuantizedStores()
{ {
XORPS(XMM1, R(XMM1)); XORPS(XMM1, R(XMM1));
MAXPS(XMM0, R(XMM1)); MAXPS(XMM0, R(XMM1));
MINPS(XMM0, M((void *)&m_65535)); MINPS(XMM0, M(m_65535));
CVTTPS2DQ(XMM0, R(XMM0)); CVTTPS2DQ(XMM0, R(XMM0));
MOVQ_xmm(M(psTemp), XMM0); MOVQ_xmm(M(psTemp), XMM0);
// place ps[0] into the higher word, ps[1] into the lower // place ps[0] into the higher word, ps[1] into the lower
// so no need in ROL after BSWAP // so no need in ROL after BSWAP
MOVZX(32, 16, RSCRATCH, M((char*)psTemp + 0)); MOVZX(32, 16, RSCRATCH, M(&psTemp[0]));
SHL(32, R(RSCRATCH), Imm8(16)); SHL(32, R(RSCRATCH), Imm8(16));
MOV(16, R(RSCRATCH), M((char*)psTemp + 4)); MOV(16, R(RSCRATCH), M(&psTemp[1]));
BSWAP(32, RSCRATCH); BSWAP(32, RSCRATCH);
} }
@ -334,7 +334,7 @@ void CommonAsmRoutines::GenQuantizedStores()
MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS));
MULPS(XMM0, R(XMM1)); MULPS(XMM0, R(XMM1));
#ifdef QUANTIZE_OVERFLOW_SAFE #ifdef QUANTIZE_OVERFLOW_SAFE
MINPS(XMM0, M((void *)&m_65535)); MINPS(XMM0, M(m_65535));
#endif #endif
CVTTPS2DQ(XMM0, R(XMM0)); CVTTPS2DQ(XMM0, R(XMM0));
PACKSSDW(XMM0, R(XMM0)); PACKSSDW(XMM0, R(XMM0));
@ -372,7 +372,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores()
/* /*
if (cpu_info.bSSSE3) if (cpu_info.bSSSE3)
{ {
PSHUFB(XMM0, M((void *)pbswapShuffle2x4)); PSHUFB(XMM0, M(pbswapShuffle2x4));
// TODO: SafeWriteFloat // TODO: SafeWriteFloat
MOVSS(M(&psTemp[0]), XMM0); MOVSS(M(&psTemp[0]), XMM0);
MOV(32, R(RSCRATCH), M(&psTemp[0])); MOV(32, R(RSCRATCH), M(&psTemp[0]));
@ -390,7 +390,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores()
MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS));
XORPS(XMM1, R(XMM1)); XORPS(XMM1, R(XMM1));
MAXSS(XMM0, R(XMM1)); MAXSS(XMM0, R(XMM1));
MINSS(XMM0, M((void *)&m_255)); MINSS(XMM0, M(&m_255));
CVTTSS2SI(RSCRATCH, R(XMM0)); CVTTSS2SI(RSCRATCH, R(XMM0));
SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
RET(); RET();
@ -398,8 +398,8 @@ void CommonAsmRoutines::GenQuantizedSingleStores()
const u8* storeSingleS8 = AlignCode4(); const u8* storeSingleS8 = AlignCode4();
SHR(32, R(RSCRATCH2), Imm8(5)); SHR(32, R(RSCRATCH2), Imm8(5));
MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS));
MAXSS(XMM0, M((void *)&m_m128)); MAXSS(XMM0, M(&m_m128));
MINSS(XMM0, M((void *)&m_127)); MINSS(XMM0, M(&m_127));
CVTTSS2SI(RSCRATCH, R(XMM0)); CVTTSS2SI(RSCRATCH, R(XMM0));
SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
RET(); RET();
@ -409,7 +409,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores()
MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS));
XORPS(XMM1, R(XMM1)); XORPS(XMM1, R(XMM1));
MAXSS(XMM0, R(XMM1)); MAXSS(XMM0, R(XMM1));
MINSS(XMM0, M((void *)&m_65535)); MINSS(XMM0, M(m_65535));
CVTTSS2SI(RSCRATCH, R(XMM0)); CVTTSS2SI(RSCRATCH, R(XMM0));
SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
RET(); RET();
@ -417,8 +417,8 @@ void CommonAsmRoutines::GenQuantizedSingleStores()
const u8* storeSingleS16 = AlignCode4(); const u8* storeSingleS16 = AlignCode4();
SHR(32, R(RSCRATCH2), Imm8(5)); SHR(32, R(RSCRATCH2), Imm8(5));
MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS));
MAXSS(XMM0, M((void *)&m_m32768)); MAXSS(XMM0, M(&m_m32768));
MINSS(XMM0, M((void *)&m_32767)); MINSS(XMM0, M(&m_32767));
CVTTSS2SI(RSCRATCH, R(XMM0)); CVTTSS2SI(RSCRATCH, R(XMM0));
SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
RET(); RET();
@ -451,7 +451,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
else if (cpu_info.bSSSE3) else if (cpu_info.bSSSE3)
{ {
MOVQ_xmm(XMM0, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0)); MOVQ_xmm(XMM0, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0));
PSHUFB(XMM0, M((void *)pbswapShuffle2x4)); PSHUFB(XMM0, M(pbswapShuffle2x4));
} }
else else
{ {
@ -466,19 +466,19 @@ void CommonAsmRoutines::GenQuantizedLoads()
{ {
SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 32, 0, QUANTIZED_REGS_TO_SAVE, false, SAFE_LOADSTORE_NO_PROLOG); SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 32, 0, QUANTIZED_REGS_TO_SAVE, false, SAFE_LOADSTORE_NO_PROLOG);
MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
UNPCKLPS(XMM0, M((void*)m_one)); UNPCKLPS(XMM0, M(m_one));
} }
else if (cpu_info.bSSSE3) else if (cpu_info.bSSSE3)
{ {
MOVD_xmm(XMM0, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0)); MOVD_xmm(XMM0, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0));
PSHUFB(XMM0, M((void *)pbswapShuffle1x4)); PSHUFB(XMM0, M(pbswapShuffle1x4));
UNPCKLPS(XMM0, M((void*)m_one)); UNPCKLPS(XMM0, M(m_one));
} }
else else
{ {
LoadAndSwap(32, RSCRATCH_EXTRA, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0)); LoadAndSwap(32, RSCRATCH_EXTRA, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0));
MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
UNPCKLPS(XMM0, M((void*)m_one)); UNPCKLPS(XMM0, M(m_one));
} }
RET(); RET();
@ -518,7 +518,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
CVTSI2SS(XMM0, R(RSCRATCH_EXTRA)); CVTSI2SS(XMM0, R(RSCRATCH_EXTRA));
SHR(32, R(RSCRATCH2), Imm8(5)); SHR(32, R(RSCRATCH2), Imm8(5));
MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS));
UNPCKLPS(XMM0, M((void*)m_one)); UNPCKLPS(XMM0, M(m_one));
RET(); RET();
const u8* loadPairedS8Two = AlignCode4(); const u8* loadPairedS8Two = AlignCode4();
@ -557,7 +557,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
CVTSI2SS(XMM0, R(RSCRATCH_EXTRA)); CVTSI2SS(XMM0, R(RSCRATCH_EXTRA));
SHR(32, R(RSCRATCH2), Imm8(5)); SHR(32, R(RSCRATCH2), Imm8(5));
MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS));
UNPCKLPS(XMM0, M((void*)m_one)); UNPCKLPS(XMM0, M(m_one));
RET(); RET();
const u8* loadPairedU16Two = AlignCode4(); const u8* loadPairedU16Two = AlignCode4();
@ -591,7 +591,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
CVTSI2SS(XMM0, R(RSCRATCH_EXTRA)); CVTSI2SS(XMM0, R(RSCRATCH_EXTRA));
SHR(32, R(RSCRATCH2), Imm8(5)); SHR(32, R(RSCRATCH2), Imm8(5));
MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS));
UNPCKLPS(XMM0, M((void*)m_one)); UNPCKLPS(XMM0, M(m_one));
RET(); RET();
const u8* loadPairedS16Two = AlignCode4(); const u8* loadPairedS16Two = AlignCode4();
@ -624,7 +624,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
CVTSI2SS(XMM0, R(RSCRATCH_EXTRA)); CVTSI2SS(XMM0, R(RSCRATCH_EXTRA));
SHR(32, R(RSCRATCH2), Imm8(5)); SHR(32, R(RSCRATCH2), Imm8(5));
MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS));
UNPCKLPS(XMM0, M((void*)m_one)); UNPCKLPS(XMM0, M(m_one));
RET(); RET();
pairedLoadQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCode16())); pairedLoadQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCode16()));

View File

@ -790,16 +790,16 @@ void EmuCodeBlock::Force25BitPrecision(X64Reg output, OpArg input, X64Reg tmp)
// mantissa = (mantissa & ~0xFFFFFFF) + ((mantissa & (1ULL << 27)) << 1); // mantissa = (mantissa & ~0xFFFFFFF) + ((mantissa & (1ULL << 27)) << 1);
if (input.IsSimpleReg() && cpu_info.bAVX) if (input.IsSimpleReg() && cpu_info.bAVX)
{ {
VPAND(tmp, input.GetSimpleReg(), M((void*)&psRoundBit)); VPAND(tmp, input.GetSimpleReg(), M(psRoundBit));
VPAND(output, input.GetSimpleReg(), M((void*)&psMantissaTruncate)); VPAND(output, input.GetSimpleReg(), M(psMantissaTruncate));
PADDQ(output, R(tmp)); PADDQ(output, R(tmp));
} }
else else
{ {
if (!input.IsSimpleReg() || input.GetSimpleReg() != output) if (!input.IsSimpleReg() || input.GetSimpleReg() != output)
MOVAPD(output, input); MOVAPD(output, input);
avx_op(&XEmitter::VPAND, &XEmitter::PAND, tmp, R(output), M((void*)&psRoundBit), true, true); avx_op(&XEmitter::VPAND, &XEmitter::PAND, tmp, R(output), M(psRoundBit), true, true);
PAND(output, M((void*)&psMantissaTruncate)); PAND(output, M(psMantissaTruncate));
PADDQ(output, R(tmp)); PADDQ(output, R(tmp));
} }
} }
@ -842,7 +842,7 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
MOVSD(XMM1, R(src)); MOVSD(XMM1, R(src));
// Grab Exponent // Grab Exponent
PAND(XMM1, M((void *)&double_exponent)); PAND(XMM1, M(&double_exponent));
PSRLQ(XMM1, 52); PSRLQ(XMM1, 52);
MOVD_xmm(R(RSCRATCH), XMM1); MOVD_xmm(R(RSCRATCH), XMM1);
@ -862,15 +862,15 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
// xmm1 = fraction | 0x0010000000000000 // xmm1 = fraction | 0x0010000000000000
MOVSD(XMM1, R(src)); MOVSD(XMM1, R(src));
PAND(XMM1, M((void *)&double_fraction)); PAND(XMM1, M(&double_fraction));
POR(XMM1, M((void *)&double_explicit_top_bit)); POR(XMM1, M(&double_explicit_top_bit));
// fraction >> shift // fraction >> shift
PSRLQ(XMM1, R(XMM0)); PSRLQ(XMM1, R(XMM0));
// OR the sign bit in. // OR the sign bit in.
MOVSD(XMM0, R(src)); MOVSD(XMM0, R(src));
PAND(XMM0, M((void *)&double_sign_bit)); PAND(XMM0, M(&double_sign_bit));
PSRLQ(XMM0, 32); PSRLQ(XMM0, 32);
POR(XMM1, R(XMM0)); POR(XMM1, R(XMM0));
@ -883,12 +883,12 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
// We want bits 0, 1 // We want bits 0, 1
MOVSD(XMM1, R(src)); MOVSD(XMM1, R(src));
PAND(XMM1, M((void *)&double_top_two_bits)); PAND(XMM1, M(&double_top_two_bits));
PSRLQ(XMM1, 32); PSRLQ(XMM1, 32);
// And 5 through to 34 // And 5 through to 34
MOVSD(XMM0, R(src)); MOVSD(XMM0, R(src));
PAND(XMM0, M((void *)&double_bottom_bits)); PAND(XMM0, M(&double_bottom_bits));
PSRLQ(XMM0, 29); PSRLQ(XMM0, 29);
// OR them togther // OR them togther
@ -988,7 +988,7 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
{ {
MOVQ_xmm(R(RSCRATCH), xmm); MOVQ_xmm(R(RSCRATCH), xmm);
SHR(64, R(RSCRATCH), Imm8(63)); // Get the sign bit; almost all the branches need it. SHR(64, R(RSCRATCH), Imm8(63)); // Get the sign bit; almost all the branches need it.
PTEST(xmm, M((void*)psDoubleExp)); PTEST(xmm, M(psDoubleExp));
FixupBranch maxExponent = J_CC(CC_C); FixupBranch maxExponent = J_CC(CC_C);
FixupBranch zeroExponent = J_CC(CC_Z); FixupBranch zeroExponent = J_CC(CC_Z);
@ -997,7 +997,7 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
continue1 = J(); continue1 = J();
SetJumpTarget(maxExponent); SetJumpTarget(maxExponent);
PTEST(xmm, M((void*)psDoubleFrac)); PTEST(xmm, M(psDoubleFrac));
FixupBranch notNAN = J_CC(CC_Z); FixupBranch notNAN = J_CC(CC_Z);
// Max exponent + mantissa: PPC_FPCLASS_QNAN // Max exponent + mantissa: PPC_FPCLASS_QNAN
@ -1025,10 +1025,10 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
else else
{ {
MOVQ_xmm(R(RSCRATCH), xmm); MOVQ_xmm(R(RSCRATCH), xmm);
TEST(64, R(RSCRATCH), M((void*)psDoubleExp)); TEST(64, R(RSCRATCH), M(psDoubleExp));
FixupBranch zeroExponent = J_CC(CC_Z); FixupBranch zeroExponent = J_CC(CC_Z);
AND(64, R(RSCRATCH), M((void*)psDoubleNoSign)); AND(64, R(RSCRATCH), M(psDoubleNoSign));
CMP(64, R(RSCRATCH), M((void*)psDoubleExp)); CMP(64, R(RSCRATCH), M(psDoubleExp));
FixupBranch nan = J_CC(CC_G); // This works because if the sign bit is set, RSCRATCH is negative FixupBranch nan = J_CC(CC_G); // This works because if the sign bit is set, RSCRATCH is negative
FixupBranch infinity = J_CC(CC_E); FixupBranch infinity = J_CC(CC_E);
MOVQ_xmm(R(RSCRATCH), xmm); MOVQ_xmm(R(RSCRATCH), xmm);

View File

@ -29,7 +29,7 @@ namespace MMIO { class Mapping; }
#define PPCSTATE(x) MDisp(RPPCSTATE, \ #define PPCSTATE(x) MDisp(RPPCSTATE, \
(int) ((char *) &PowerPC::ppcState.x - (char *) &PowerPC::ppcState) - 0x80) (int) ((char *) &PowerPC::ppcState.x - (char *) &PowerPC::ppcState) - 0x80)
// In case you want to disable the ppcstate register: // In case you want to disable the ppcstate register:
// #define PPCSTATE(x) M((void*) &PowerPC::ppcState.x) // #define PPCSTATE(x) M(&PowerPC::ppcState.x)
#define PPCSTATE_LR PPCSTATE(spr[SPR_LR]) #define PPCSTATE_LR PPCSTATE(spr[SPR_LR])
#define PPCSTATE_CTR PPCSTATE(spr[SPR_CTR]) #define PPCSTATE_CTR PPCSTATE(spr[SPR_CTR])
#define PPCSTATE_SRR0 PPCSTATE(spr[SPR_SRR0]) #define PPCSTATE_SRR0 PPCSTATE(spr[SPR_SRR0])