Symbolicize explicit uses of x86 registers where possible (GPRs only for now).
Uses are split into three categories: - Arbitrary (except for size savings) - constants like RSCRATCH are used. - ABI (i.e. RAX as return value) - ABI_RETURN is used. - Fixed by architecture (RCX shifts, RDX/RAX for some instructions) - explicit register is kept. In theory this allows the assignments to be modified easily. I verified that I was able to run Melee with all the registers changed, although there may be issues if RSCRATCH[2] and ABI_PARAM{1,2} conflict.
This commit is contained in:
parent
100a7ac97b
commit
6fd0333c14
|
@ -53,3 +53,5 @@
|
|||
|
||||
#endif // WIN32
|
||||
|
||||
#define ABI_RETURN RAX
|
||||
|
||||
|
|
|
@ -39,14 +39,6 @@ using namespace PowerPC;
|
|||
|
||||
// Various notes below
|
||||
|
||||
// Register allocation
|
||||
// RAX - Generic quicktemp register
|
||||
// RBX - point to base of memory map
|
||||
// RSI RDI R12 R13 R14 R15 - free for allocation
|
||||
// RCX RDX R8 R9 R10 R11 - allocate in emergencies. These need to be flushed before functions are called.
|
||||
// RSP - stack pointer, do not generally use, very dangerous
|
||||
// RBP - ?
|
||||
|
||||
// IMPORTANT:
|
||||
// Make sure that all generated code and all emulator state sits under the 2GB boundary so that
|
||||
// RIP addressing can be used easily. Windows will always allocate static code under the 2GB boundary.
|
||||
|
@ -305,18 +297,18 @@ void Jit64::WriteExit(u32 destination)
|
|||
b->linkData.push_back(linkData);
|
||||
}
|
||||
|
||||
void Jit64::WriteExitDestInEAX()
|
||||
void Jit64::WriteExitDestInRSCRATCH()
|
||||
{
|
||||
MOV(32, PPCSTATE(pc), R(EAX));
|
||||
MOV(32, PPCSTATE(pc), R(RSCRATCH));
|
||||
Cleanup();
|
||||
SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
|
||||
JMP(asm_routines.dispatcher, true);
|
||||
}
|
||||
|
||||
void Jit64::WriteRfiExitDestInEAX()
|
||||
void Jit64::WriteRfiExitDestInRSCRATCH()
|
||||
{
|
||||
MOV(32, PPCSTATE(pc), R(EAX));
|
||||
MOV(32, PPCSTATE(npc), R(EAX));
|
||||
MOV(32, PPCSTATE(pc), R(RSCRATCH));
|
||||
MOV(32, PPCSTATE(npc), R(RSCRATCH));
|
||||
Cleanup();
|
||||
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions));
|
||||
SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
|
||||
|
@ -326,8 +318,8 @@ void Jit64::WriteRfiExitDestInEAX()
|
|||
void Jit64::WriteExceptionExit()
|
||||
{
|
||||
Cleanup();
|
||||
MOV(32, R(EAX), PPCSTATE(pc));
|
||||
MOV(32, PPCSTATE(npc), R(EAX));
|
||||
MOV(32, R(RSCRATCH), PPCSTATE(pc));
|
||||
MOV(32, PPCSTATE(npc), R(RSCRATCH));
|
||||
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions));
|
||||
SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
|
||||
JMP(asm_routines.dispatcher, true);
|
||||
|
@ -336,8 +328,8 @@ void Jit64::WriteExceptionExit()
|
|||
void Jit64::WriteExternalExceptionExit()
|
||||
{
|
||||
Cleanup();
|
||||
MOV(32, R(EAX), PPCSTATE(pc));
|
||||
MOV(32, PPCSTATE(npc), R(EAX));
|
||||
MOV(32, R(RSCRATCH), PPCSTATE(pc));
|
||||
MOV(32, PPCSTATE(npc), R(RSCRATCH));
|
||||
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExternalExceptions));
|
||||
SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
|
||||
JMP(asm_routines.dispatcher, true);
|
||||
|
@ -520,9 +512,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
|||
HLEFunction(function);
|
||||
if (type == HLE::HLE_HOOK_REPLACE)
|
||||
{
|
||||
MOV(32, R(EAX), PPCSTATE(npc));
|
||||
MOV(32, R(RSCRATCH), PPCSTATE(npc));
|
||||
js.downcountAmount += js.st.numCycles;
|
||||
WriteExitDestInEAX();
|
||||
WriteExitDestInRSCRATCH();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -650,8 +642,8 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
|||
OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_ISI));
|
||||
|
||||
// Remove the invalid instruction from the icache, forcing a recompile
|
||||
MOV(64, R(RAX), ImmPtr(jit->GetBlockCache()->GetICachePtr(js.compilerPC)));
|
||||
MOV(32,MatR(RAX),Imm32(JIT_ICACHE_INVALID_WORD));
|
||||
MOV(64, R(RSCRATCH), ImmPtr(jit->GetBlockCache()->GetICachePtr(js.compilerPC)));
|
||||
MOV(32,MatR(RSCRATCH),Imm32(JIT_ICACHE_INVALID_WORD));
|
||||
|
||||
WriteExceptionExit();
|
||||
}
|
||||
|
|
|
@ -90,10 +90,10 @@ public:
|
|||
// Utilities for use by opcodes
|
||||
|
||||
void WriteExit(u32 destination);
|
||||
void WriteExitDestInEAX();
|
||||
void WriteExitDestInRSCRATCH();
|
||||
void WriteExceptionExit();
|
||||
void WriteExternalExceptionExit();
|
||||
void WriteRfiExitDestInEAX();
|
||||
void WriteRfiExitDestInRSCRATCH();
|
||||
void WriteCallInterpreter(UGeckoInstruction _inst);
|
||||
void Cleanup();
|
||||
|
||||
|
@ -101,8 +101,8 @@ public:
|
|||
void GenerateConstantOverflow(s64 val);
|
||||
void GenerateOverflow();
|
||||
void FinalizeCarryOverflow(bool oe, bool inv = false);
|
||||
void GetCarryEAXAndClear();
|
||||
void FinalizeCarryGenerateOverflowEAX(bool oe, bool inv = false);
|
||||
void GetCarryRSCRATCHAndClear();
|
||||
void FinalizeCarryGenerateOverflowRSCRATCH(bool oe, bool inv = false);
|
||||
void GenerateCarry();
|
||||
void GenerateRC();
|
||||
void ComputeRC(const Gen::OpArg & arg);
|
||||
|
|
|
@ -9,13 +9,6 @@
|
|||
|
||||
using namespace Gen;
|
||||
|
||||
// GLOBAL STATIC ALLOCATIONS x64
|
||||
// RAX - ubiquitous scratch register - EVERYBODY scratches this
|
||||
// RDX - second scratch register
|
||||
// RBX - Base pointer of memory
|
||||
// R15 - Pointer to array of block pointers
|
||||
// RBP - Pointer to ppcState+0x80
|
||||
|
||||
// PLAN: no more block numbers - crazy opcodes just contain offset within
|
||||
// dynarec buffer
|
||||
// At this offset - 4, there is an int specifying the block number.
|
||||
|
@ -26,9 +19,9 @@ void Jit64AsmRoutineManager::Generate()
|
|||
ABI_PushAllCalleeSavedRegsAndAdjustStack();
|
||||
|
||||
// Two statically allocated registers.
|
||||
MOV(64, R(RBX), Imm64((u64)Memory::base));
|
||||
MOV(64, R(R15), Imm64((u64)jit->GetBlockCache()->GetCodePointers())); //It's below 2GB so 32 bits are good enough
|
||||
MOV(64, R(RBP), Imm64((u64)&PowerPC::ppcState + 0x80));
|
||||
MOV(64, R(RMEM), Imm64((u64)Memory::base));
|
||||
MOV(64, R(RCODE_POINTERS), Imm64((u64)jit->GetBlockCache()->GetCodePointers())); //It's below 2GB so 32 bits are good enough
|
||||
MOV(64, R(RPPCSTATE), Imm64((u64)&PowerPC::ppcState + 0x80));
|
||||
|
||||
const u8* outerLoop = GetCodePtr();
|
||||
ABI_CallFunction(reinterpret_cast<void *>(&CoreTiming::Advance));
|
||||
|
@ -55,8 +48,8 @@ void Jit64AsmRoutineManager::Generate()
|
|||
SetJumpTarget(skipToRealDispatch);
|
||||
|
||||
dispatcherNoCheck = GetCodePtr();
|
||||
MOV(32, R(EAX), PPCSTATE(pc));
|
||||
dispatcherPcInEAX = GetCodePtr();
|
||||
MOV(32, R(RSCRATCH), PPCSTATE(pc));
|
||||
dispatcherPcInRSCRATCH = GetCodePtr();
|
||||
|
||||
u32 mask = 0;
|
||||
FixupBranch no_mem;
|
||||
|
@ -68,12 +61,12 @@ void Jit64AsmRoutineManager::Generate()
|
|||
mask |= JIT_ICACHE_VMEM_BIT;
|
||||
if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack)
|
||||
{
|
||||
TEST(32, R(EAX), Imm32(mask));
|
||||
TEST(32, R(RSCRATCH), Imm32(mask));
|
||||
no_mem = J_CC(CC_NZ);
|
||||
}
|
||||
AND(32, R(EAX), Imm32(JIT_ICACHE_MASK));
|
||||
MOV(64, R(RDX), Imm64((u64)jit->GetBlockCache()->iCache));
|
||||
MOV(32, R(EAX), MComplex(RDX, EAX, SCALE_1, 0));
|
||||
AND(32, R(RSCRATCH), Imm32(JIT_ICACHE_MASK));
|
||||
MOV(64, R(RSCRATCH2), Imm64((u64)jit->GetBlockCache()->iCache));
|
||||
MOV(32, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_1, 0));
|
||||
|
||||
if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack)
|
||||
{
|
||||
|
@ -82,22 +75,22 @@ void Jit64AsmRoutineManager::Generate()
|
|||
}
|
||||
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack)
|
||||
{
|
||||
TEST(32, R(EAX), Imm32(JIT_ICACHE_VMEM_BIT));
|
||||
TEST(32, R(RSCRATCH), Imm32(JIT_ICACHE_VMEM_BIT));
|
||||
FixupBranch no_vmem = J_CC(CC_Z);
|
||||
AND(32, R(EAX), Imm32(JIT_ICACHE_MASK));
|
||||
MOV(64, R(RDX), Imm64((u64)jit->GetBlockCache()->iCacheVMEM));
|
||||
MOV(32, R(EAX), MComplex(RDX, EAX, SCALE_1, 0));
|
||||
AND(32, R(RSCRATCH), Imm32(JIT_ICACHE_MASK));
|
||||
MOV(64, R(RSCRATCH2), Imm64((u64)jit->GetBlockCache()->iCacheVMEM));
|
||||
MOV(32, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_1, 0));
|
||||
|
||||
if (Core::g_CoreStartupParameter.bWii) exit_vmem = J();
|
||||
SetJumpTarget(no_vmem);
|
||||
}
|
||||
if (Core::g_CoreStartupParameter.bWii)
|
||||
{
|
||||
TEST(32, R(EAX), Imm32(JIT_ICACHE_EXRAM_BIT));
|
||||
TEST(32, R(RSCRATCH), Imm32(JIT_ICACHE_EXRAM_BIT));
|
||||
FixupBranch no_exram = J_CC(CC_Z);
|
||||
AND(32, R(EAX), Imm32(JIT_ICACHEEX_MASK));
|
||||
MOV(64, R(RDX), Imm64((u64)jit->GetBlockCache()->iCacheEx));
|
||||
MOV(32, R(EAX), MComplex(RDX, EAX, SCALE_1, 0));
|
||||
AND(32, R(RSCRATCH), Imm32(JIT_ICACHEEX_MASK));
|
||||
MOV(64, R(RSCRATCH2), Imm64((u64)jit->GetBlockCache()->iCacheEx));
|
||||
MOV(32, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_1, 0));
|
||||
|
||||
SetJumpTarget(no_exram);
|
||||
}
|
||||
|
@ -106,10 +99,10 @@ void Jit64AsmRoutineManager::Generate()
|
|||
if (Core::g_CoreStartupParameter.bWii && (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack))
|
||||
SetJumpTarget(exit_vmem);
|
||||
|
||||
TEST(32, R(EAX), R(EAX));
|
||||
TEST(32, R(RSCRATCH), R(RSCRATCH));
|
||||
FixupBranch notfound = J_CC(CC_L);
|
||||
//grab from list and jump to it
|
||||
JMPptr(MComplex(R15, RAX, 8, 0));
|
||||
JMPptr(MComplex(RCODE_POINTERS, RSCRATCH, 8, 0));
|
||||
SetJumpTarget(notfound);
|
||||
|
||||
//Ok, no block, let's jit
|
||||
|
@ -124,8 +117,8 @@ void Jit64AsmRoutineManager::Generate()
|
|||
// Test external exceptions.
|
||||
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_EXTERNAL_INT | EXCEPTION_PERFORMANCE_MONITOR | EXCEPTION_DECREMENTER));
|
||||
FixupBranch noExtException = J_CC(CC_Z);
|
||||
MOV(32, R(EAX), PPCSTATE(pc));
|
||||
MOV(32, PPCSTATE(npc), R(EAX));
|
||||
MOV(32, R(RSCRATCH), PPCSTATE(pc));
|
||||
MOV(32, PPCSTATE(npc), R(RSCRATCH));
|
||||
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExternalExceptions));
|
||||
SetJumpTarget(noExtException);
|
||||
|
||||
|
@ -168,8 +161,8 @@ void Jit64AsmRoutineManager::GenerateCommon()
|
|||
const u8 *fastMemWrite8 = AlignCode16();
|
||||
CMP(32, R(ABI_PARAM2), Imm32(0xCC008000));
|
||||
FixupBranch skip_fast_write = J_CC(CC_NE, false);
|
||||
MOV(32, EAX, M(&m_gatherPipeCount));
|
||||
MOV(8, MDisp(EAX, (u32)&m_gatherPipe), ABI_PARAM1);
|
||||
MOV(32, RSCRATCH, M(&m_gatherPipeCount));
|
||||
MOV(8, MDisp(RSCRATCH, (u32)&m_gatherPipe), ABI_PARAM1);
|
||||
ADD(32, 1, M(&m_gatherPipeCount));
|
||||
RET();
|
||||
SetJumpTarget(skip_fast_write);
|
||||
|
|
|
@ -46,12 +46,12 @@ void Jit64::rfi(UGeckoInstruction inst)
|
|||
const u32 clearMSR13 = 0xFFFBFFFF; // Mask used to clear the bit MSR[13]
|
||||
// MSR = ((MSR & ~mask) | (SRR1 & mask)) & clearMSR13;
|
||||
AND(32, PPCSTATE(msr), Imm32((~mask) & clearMSR13));
|
||||
MOV(32, R(EAX), PPCSTATE_SRR1);
|
||||
AND(32, R(EAX), Imm32(mask & clearMSR13));
|
||||
OR(32, PPCSTATE(msr), R(EAX));
|
||||
MOV(32, R(RSCRATCH), PPCSTATE_SRR1);
|
||||
AND(32, R(RSCRATCH), Imm32(mask & clearMSR13));
|
||||
OR(32, PPCSTATE(msr), R(RSCRATCH));
|
||||
// NPC = SRR0;
|
||||
MOV(32, R(EAX), PPCSTATE_SRR0);
|
||||
WriteRfiExitDestInEAX();
|
||||
MOV(32, R(RSCRATCH), PPCSTATE_SRR0);
|
||||
WriteRfiExitDestInRSCRATCH();
|
||||
}
|
||||
|
||||
void Jit64::bx(UGeckoInstruction inst)
|
||||
|
@ -164,11 +164,11 @@ void Jit64::bcctrx(UGeckoInstruction inst)
|
|||
gpr.Flush();
|
||||
fpr.Flush();
|
||||
|
||||
MOV(32, R(EAX), PPCSTATE_CTR);
|
||||
MOV(32, R(RSCRATCH), PPCSTATE_CTR);
|
||||
if (inst.LK_3)
|
||||
MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // LR = PC + 4;
|
||||
AND(32, R(EAX), Imm32(0xFFFFFFFC));
|
||||
WriteExitDestInEAX();
|
||||
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
|
||||
WriteExitDestInRSCRATCH();
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -179,15 +179,15 @@ void Jit64::bcctrx(UGeckoInstruction inst)
|
|||
|
||||
FixupBranch b = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3),
|
||||
!(inst.BO_2 & BO_BRANCH_IF_TRUE));
|
||||
MOV(32, R(EAX), PPCSTATE_CTR);
|
||||
AND(32, R(EAX), Imm32(0xFFFFFFFC));
|
||||
//MOV(32, PPCSTATE(pc), R(EAX)); => Already done in WriteExitDestInEAX()
|
||||
MOV(32, R(RSCRATCH), PPCSTATE_CTR);
|
||||
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
|
||||
//MOV(32, PPCSTATE(pc), R(RSCRATCH)); => Already done in WriteExitDestInRSCRATCH()
|
||||
if (inst.LK_3)
|
||||
MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // LR = PC + 4;
|
||||
|
||||
gpr.Flush(FLUSH_MAINTAIN_STATE);
|
||||
fpr.Flush(FLUSH_MAINTAIN_STATE);
|
||||
WriteExitDestInEAX();
|
||||
WriteExitDestInRSCRATCH();
|
||||
// Would really like to continue the block here, but it ends. TODO.
|
||||
SetJumpTarget(b);
|
||||
|
||||
|
@ -224,14 +224,14 @@ void Jit64::bclrx(UGeckoInstruction inst)
|
|||
AND(32, PPCSTATE(cr), Imm32(~(0xFF000000)));
|
||||
#endif
|
||||
|
||||
MOV(32, R(EAX), PPCSTATE_LR);
|
||||
AND(32, R(EAX), Imm32(0xFFFFFFFC));
|
||||
MOV(32, R(RSCRATCH), PPCSTATE_LR);
|
||||
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
|
||||
if (inst.LK)
|
||||
MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4));
|
||||
|
||||
gpr.Flush(FLUSH_MAINTAIN_STATE);
|
||||
fpr.Flush(FLUSH_MAINTAIN_STATE);
|
||||
WriteExitDestInEAX();
|
||||
WriteExitDestInRSCRATCH();
|
||||
|
||||
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
|
||||
SetJumpTarget( pConditionDontBranch );
|
||||
|
|
|
@ -271,14 +271,14 @@ void Jit64::fcmpx(UGeckoInstruction inst)
|
|||
pGreater = J_CC(CC_B);
|
||||
}
|
||||
|
||||
MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_EQ)));
|
||||
MOV(64, R(RSCRATCH), Imm64(PPCCRToInternal(CR_EQ)));
|
||||
if (fprf)
|
||||
OR(32, PPCSTATE(fpscr), Imm32(CR_EQ << FPRF_SHIFT));
|
||||
|
||||
continue1 = J();
|
||||
|
||||
SetJumpTarget(pNaN);
|
||||
MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_SO)));
|
||||
MOV(64, R(RSCRATCH), Imm64(PPCCRToInternal(CR_SO)));
|
||||
if (fprf)
|
||||
OR(32, PPCSTATE(fpscr), Imm32(CR_SO << FPRF_SHIFT));
|
||||
|
||||
|
@ -287,13 +287,13 @@ void Jit64::fcmpx(UGeckoInstruction inst)
|
|||
continue2 = J();
|
||||
|
||||
SetJumpTarget(pGreater);
|
||||
MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_GT)));
|
||||
MOV(64, R(RSCRATCH), Imm64(PPCCRToInternal(CR_GT)));
|
||||
if (fprf)
|
||||
OR(32, PPCSTATE(fpscr), Imm32(CR_GT << FPRF_SHIFT));
|
||||
continue3 = J();
|
||||
|
||||
SetJumpTarget(pLesser);
|
||||
MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_LT)));
|
||||
MOV(64, R(RSCRATCH), Imm64(PPCCRToInternal(CR_LT)));
|
||||
if (fprf)
|
||||
OR(32, PPCSTATE(fpscr), Imm32(CR_LT << FPRF_SHIFT));
|
||||
}
|
||||
|
@ -305,7 +305,7 @@ void Jit64::fcmpx(UGeckoInstruction inst)
|
|||
SetJumpTarget(continue3);
|
||||
}
|
||||
|
||||
MOV(64, PPCSTATE(cr_val[crf]), R(RAX));
|
||||
MOV(64, PPCSTATE(cr_val[crf]), R(RSCRATCH));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
@ -375,8 +375,7 @@ void Jit64::frsqrtex(UGeckoInstruction inst)
|
|||
int b = inst.FB;
|
||||
int d = inst.FD;
|
||||
|
||||
// rsqrtex requires ECX and EDX free
|
||||
gpr.FlushLockX(ECX, EDX);
|
||||
gpr.FlushLockX(RSCRATCH_EXTRA);
|
||||
fpr.Lock(b, d);
|
||||
fpr.BindToRegister(d, d == b);
|
||||
MOVSD(XMM0, fpr.R(b));
|
||||
|
@ -395,8 +394,7 @@ void Jit64::fresx(UGeckoInstruction inst)
|
|||
int b = inst.FB;
|
||||
int d = inst.FD;
|
||||
|
||||
// resx requires ECX and EDX free
|
||||
gpr.FlushLockX(ECX, EDX);
|
||||
gpr.FlushLockX(RSCRATCH_EXTRA);
|
||||
fpr.Lock(b, d);
|
||||
fpr.BindToRegister(d, d == b);
|
||||
MOVSD(XMM0, fpr.R(b));
|
||||
|
|
|
@ -72,14 +72,14 @@ void Jit64::FinalizeCarryOverflow(bool oe, bool inv)
|
|||
}
|
||||
}
|
||||
|
||||
void Jit64::GetCarryEAXAndClear()
|
||||
void Jit64::GetCarryRSCRATCHAndClear()
|
||||
{
|
||||
MOV(32, R(EAX), PPCSTATE(spr[SPR_XER]));
|
||||
BTR(32, R(EAX), Imm8(29));
|
||||
MOV(32, R(RSCRATCH), PPCSTATE(spr[SPR_XER]));
|
||||
BTR(32, R(RSCRATCH), Imm8(29));
|
||||
}
|
||||
|
||||
// Assumes that XER is in EAX and that the CA bit is clear.
|
||||
void Jit64::FinalizeCarryGenerateOverflowEAX(bool oe, bool inv)
|
||||
// Assumes that XER is in RSCRATCH and that the CA bit is clear.
|
||||
void Jit64::FinalizeCarryGenerateOverflowRSCRATCH(bool oe, bool inv)
|
||||
{
|
||||
// USES_XER
|
||||
if (oe)
|
||||
|
@ -87,29 +87,29 @@ void Jit64::FinalizeCarryGenerateOverflowEAX(bool oe, bool inv)
|
|||
FixupBranch jno = J_CC(CC_NO);
|
||||
// Do carry
|
||||
FixupBranch carry1 = J_CC(inv ? CC_C : CC_NC);
|
||||
OR(32, R(EAX), Imm32(XER_CA_MASK));
|
||||
OR(32, R(RSCRATCH), Imm32(XER_CA_MASK));
|
||||
SetJumpTarget(carry1);
|
||||
//XER[OV/SO] = 1
|
||||
OR(32, R(EAX), Imm32(XER_SO_MASK | XER_OV_MASK));
|
||||
OR(32, R(RSCRATCH), Imm32(XER_SO_MASK | XER_OV_MASK));
|
||||
FixupBranch exit = J();
|
||||
SetJumpTarget(jno);
|
||||
// Do carry
|
||||
FixupBranch carry2 = J_CC(inv ? CC_C : CC_NC);
|
||||
OR(32, R(EAX), Imm32(XER_CA_MASK));
|
||||
OR(32, R(RSCRATCH), Imm32(XER_CA_MASK));
|
||||
SetJumpTarget(carry2);
|
||||
//XER[OV] = 0
|
||||
AND(32, R(EAX), Imm32(~XER_OV_MASK));
|
||||
AND(32, R(RSCRATCH), Imm32(~XER_OV_MASK));
|
||||
SetJumpTarget(exit);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Do carry
|
||||
FixupBranch carry1 = J_CC(inv ? CC_C : CC_NC);
|
||||
OR(32, R(EAX), Imm32(XER_CA_MASK));
|
||||
OR(32, R(RSCRATCH), Imm32(XER_CA_MASK));
|
||||
SetJumpTarget(carry1);
|
||||
}
|
||||
// Dump EAX back into XER
|
||||
MOV(32, PPCSTATE(spr[SPR_XER]), R(EAX));
|
||||
// Dump RSCRATCH back into XER
|
||||
MOV(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH));
|
||||
}
|
||||
|
||||
// Assumes that the flags were just set through an addition.
|
||||
|
@ -132,8 +132,8 @@ void Jit64::ComputeRC(const Gen::OpArg & arg)
|
|||
}
|
||||
else
|
||||
{
|
||||
MOVSX(64, 32, RAX, arg);
|
||||
MOV(64, PPCSTATE(cr_val[0]), R(RAX));
|
||||
MOVSX(64, 32, RSCRATCH, arg);
|
||||
MOV(64, PPCSTATE(cr_val[0]), R(RSCRATCH));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -374,8 +374,8 @@ void Jit64::cmpXX(UGeckoInstruction inst)
|
|||
else
|
||||
compareResult = CR_LT;
|
||||
}
|
||||
MOV(64, R(RAX), Imm64(PPCCRToInternal(compareResult)));
|
||||
MOV(64, PPCSTATE(cr_val[crf]), R(RAX));
|
||||
MOV(64, R(RSCRATCH), Imm64(PPCCRToInternal(compareResult)));
|
||||
MOV(64, PPCSTATE(cr_val[crf]), R(RSCRATCH));
|
||||
gpr.UnlockAll();
|
||||
|
||||
if (merge_branch)
|
||||
|
@ -406,16 +406,16 @@ void Jit64::cmpXX(UGeckoInstruction inst)
|
|||
{
|
||||
if (js.next_inst.LK)
|
||||
MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4));
|
||||
MOV(32, R(EAX), PPCSTATE_CTR);
|
||||
AND(32, R(EAX), Imm32(0xFFFFFFFC));
|
||||
WriteExitDestInEAX();
|
||||
MOV(32, R(RSCRATCH), PPCSTATE_CTR);
|
||||
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
|
||||
WriteExitDestInRSCRATCH();
|
||||
}
|
||||
else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx
|
||||
{
|
||||
MOV(32, R(EAX), PPCSTATE_LR);
|
||||
MOV(32, R(RSCRATCH), PPCSTATE_LR);
|
||||
if (js.next_inst.LK)
|
||||
MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4));
|
||||
WriteExitDestInEAX();
|
||||
WriteExitDestInRSCRATCH();
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -436,32 +436,32 @@ void Jit64::cmpXX(UGeckoInstruction inst)
|
|||
if (signedCompare)
|
||||
{
|
||||
if (gpr.R(a).IsImm())
|
||||
MOV(64, R(RAX), Imm32((s32)gpr.R(a).offset));
|
||||
MOV(64, R(RSCRATCH), Imm32((s32)gpr.R(a).offset));
|
||||
else
|
||||
MOVSX(64, 32, RAX, gpr.R(a));
|
||||
MOVSX(64, 32, RSCRATCH, gpr.R(a));
|
||||
|
||||
if (!comparand.IsImm())
|
||||
{
|
||||
MOVSX(64, 32, RDX, comparand);
|
||||
comparand = R(RDX);
|
||||
MOVSX(64, 32, RSCRATCH2, comparand);
|
||||
comparand = R(RSCRATCH2);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (gpr.R(a).IsImm())
|
||||
MOV(32, R(RAX), Imm32((u32)gpr.R(a).offset));
|
||||
MOV(32, R(RSCRATCH), Imm32((u32)gpr.R(a).offset));
|
||||
else
|
||||
MOVZX(64, 32, RAX, gpr.R(a));
|
||||
MOVZX(64, 32, RSCRATCH, gpr.R(a));
|
||||
|
||||
if (comparand.IsImm())
|
||||
MOV(32, R(RDX), comparand);
|
||||
MOV(32, R(RSCRATCH2), comparand);
|
||||
else
|
||||
MOVZX(64, 32, RDX, comparand);
|
||||
MOVZX(64, 32, RSCRATCH2, comparand);
|
||||
|
||||
comparand = R(RDX);
|
||||
comparand = R(RSCRATCH2);
|
||||
}
|
||||
SUB(64, R(RAX), comparand);
|
||||
MOV(64, PPCSTATE(cr_val[crf]), R(RAX));
|
||||
SUB(64, R(RSCRATCH), comparand);
|
||||
MOV(64, PPCSTATE(cr_val[crf]), R(RSCRATCH));
|
||||
|
||||
if (merge_branch)
|
||||
{
|
||||
|
@ -506,19 +506,19 @@ void Jit64::cmpXX(UGeckoInstruction inst)
|
|||
if (js.next_inst.LK)
|
||||
MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4));
|
||||
|
||||
MOV(32, R(EAX), PPCSTATE_CTR);
|
||||
AND(32, R(EAX), Imm32(0xFFFFFFFC));
|
||||
WriteExitDestInEAX();
|
||||
MOV(32, R(RSCRATCH), PPCSTATE_CTR);
|
||||
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
|
||||
WriteExitDestInRSCRATCH();
|
||||
}
|
||||
else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx
|
||||
{
|
||||
MOV(32, R(EAX), PPCSTATE_LR);
|
||||
AND(32, R(EAX), Imm32(0xFFFFFFFC));
|
||||
MOV(32, R(RSCRATCH), PPCSTATE_LR);
|
||||
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
|
||||
|
||||
if (js.next_inst.LK)
|
||||
MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4));
|
||||
|
||||
WriteExitDestInEAX();
|
||||
WriteExitDestInRSCRATCH();
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -636,9 +636,9 @@ void Jit64::boolX(UGeckoInstruction inst)
|
|||
}
|
||||
else
|
||||
{
|
||||
MOV(32, R(EAX), operand);
|
||||
NOT(32, R(EAX));
|
||||
AND(32, gpr.R(a), R(EAX));
|
||||
MOV(32, R(RSCRATCH), operand);
|
||||
NOT(32, R(RSCRATCH));
|
||||
AND(32, gpr.R(a), R(RSCRATCH));
|
||||
}
|
||||
}
|
||||
else if (inst.SUBOP10 == 444) // orx
|
||||
|
@ -659,9 +659,9 @@ void Jit64::boolX(UGeckoInstruction inst)
|
|||
}
|
||||
else
|
||||
{
|
||||
MOV(32, R(EAX), operand);
|
||||
NOT(32, R(EAX));
|
||||
OR(32, gpr.R(a), R(EAX));
|
||||
MOV(32, R(RSCRATCH), operand);
|
||||
NOT(32, R(RSCRATCH));
|
||||
OR(32, gpr.R(a), R(RSCRATCH));
|
||||
}
|
||||
}
|
||||
else if (inst.SUBOP10 == 316) // xorx
|
||||
|
@ -755,11 +755,7 @@ void Jit64::extsbx(UGeckoInstruction inst)
|
|||
{
|
||||
gpr.Lock(a, s);
|
||||
gpr.BindToRegister(a, a == s, true);
|
||||
// Always force moving to EAX because it isn't possible
|
||||
// to refer to the lowest byte of some registers, at least in
|
||||
// 32-bit mode.
|
||||
MOV(32, R(EAX), gpr.R(s));
|
||||
MOVSX(32, 8, gpr.RX(a), R(AL)); // watch out for ah and friends
|
||||
MOVSX(32, 8, gpr.RX(a), gpr.R(s));
|
||||
gpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
@ -863,9 +859,9 @@ void Jit64::subfcx(UGeckoInstruction inst)
|
|||
}
|
||||
else if (d == a)
|
||||
{
|
||||
MOV(32, R(EAX), gpr.R(a));
|
||||
MOV(32, R(RSCRATCH), gpr.R(a));
|
||||
MOV(32, gpr.R(d), gpr.R(b));
|
||||
SUB(32, gpr.R(d), R(EAX));
|
||||
SUB(32, gpr.R(d), R(RSCRATCH));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -887,7 +883,7 @@ void Jit64::subfex(UGeckoInstruction inst)
|
|||
gpr.Lock(a, b, d);
|
||||
gpr.BindToRegister(d, (d == a || d == b), true);
|
||||
|
||||
GetCarryEAXAndClear();
|
||||
GetCarryRSCRATCHAndClear();
|
||||
|
||||
bool invertedCarry = false;
|
||||
if (d == b)
|
||||
|
@ -908,7 +904,7 @@ void Jit64::subfex(UGeckoInstruction inst)
|
|||
NOT(32, gpr.R(d));
|
||||
ADC(32, gpr.R(d), gpr.R(b));
|
||||
}
|
||||
FinalizeCarryGenerateOverflowEAX(inst.OE, invertedCarry);
|
||||
FinalizeCarryGenerateOverflowRSCRATCH(inst.OE, invertedCarry);
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
|
||||
|
@ -924,14 +920,14 @@ void Jit64::subfmex(UGeckoInstruction inst)
|
|||
gpr.Lock(a, d);
|
||||
gpr.BindToRegister(d, d == a);
|
||||
|
||||
GetCarryEAXAndClear();
|
||||
GetCarryRSCRATCHAndClear();
|
||||
if (d != a)
|
||||
{
|
||||
MOV(32, gpr.R(d), gpr.R(a));
|
||||
}
|
||||
NOT(32, gpr.R(d));
|
||||
ADC(32, gpr.R(d), Imm32(0xFFFFFFFF));
|
||||
FinalizeCarryGenerateOverflowEAX(inst.OE);
|
||||
FinalizeCarryGenerateOverflowRSCRATCH(inst.OE);
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
gpr.UnlockAll();
|
||||
|
@ -947,14 +943,14 @@ void Jit64::subfzex(UGeckoInstruction inst)
|
|||
gpr.Lock(a, d);
|
||||
gpr.BindToRegister(d, d == a);
|
||||
|
||||
GetCarryEAXAndClear();
|
||||
GetCarryRSCRATCHAndClear();
|
||||
if (d != a)
|
||||
{
|
||||
MOV(32, gpr.R(d), gpr.R(a));
|
||||
}
|
||||
NOT(32, gpr.R(d));
|
||||
ADC(32, gpr.R(d), Imm8(0));
|
||||
FinalizeCarryGenerateOverflowEAX(inst.OE);
|
||||
FinalizeCarryGenerateOverflowRSCRATCH(inst.OE);
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
|
||||
|
@ -990,9 +986,9 @@ void Jit64::subfx(UGeckoInstruction inst)
|
|||
}
|
||||
else if (d == a)
|
||||
{
|
||||
MOV(32, R(EAX), gpr.R(a));
|
||||
MOV(32, R(RSCRATCH), gpr.R(a));
|
||||
MOV(32, gpr.R(d), gpr.R(b));
|
||||
SUB(32, gpr.R(d), R(EAX));
|
||||
SUB(32, gpr.R(d), R(RSCRATCH));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1171,9 +1167,9 @@ void Jit64::mulhwXx(UGeckoInstruction inst)
|
|||
else
|
||||
{
|
||||
gpr.Lock(a, b, d);
|
||||
// no register choice
|
||||
gpr.FlushLockX(EDX, EAX);
|
||||
gpr.BindToRegister(d, (d == a || d == b), true);
|
||||
if (gpr.RX(d) == EDX)
|
||||
PanicAlert("mulhwux : WTF");
|
||||
MOV(32, R(EAX), gpr.R(a));
|
||||
gpr.KillImmediate(b, true, false);
|
||||
if (sign)
|
||||
|
@ -1252,11 +1248,11 @@ void Jit64::divwux(UGeckoInstruction inst)
|
|||
// If failed, use slower round-down method
|
||||
gpr.Lock(a, b, d);
|
||||
gpr.BindToRegister(d, d == a, true);
|
||||
MOV(32, R(EAX), Imm32(magic));
|
||||
MOV(32, R(RSCRATCH), Imm32(magic));
|
||||
if (d != a)
|
||||
MOV(32, gpr.R(d), gpr.R(a));
|
||||
IMUL(64, gpr.RX(d), R(RAX));
|
||||
ADD(64, gpr.R(d), R(RAX));
|
||||
IMUL(64, gpr.RX(d), R(RSCRATCH));
|
||||
ADD(64, gpr.R(d), R(RSCRATCH));
|
||||
SHR(64, gpr.R(d), Imm8(shift+32));
|
||||
}
|
||||
else
|
||||
|
@ -1267,8 +1263,8 @@ void Jit64::divwux(UGeckoInstruction inst)
|
|||
gpr.BindToRegister(d, false, true);
|
||||
if (d == a)
|
||||
{
|
||||
MOV(32, R(EAX), Imm32(magic+1));
|
||||
IMUL(64, gpr.RX(d), R(RAX));
|
||||
MOV(32, R(RSCRATCH), Imm32(magic+1));
|
||||
IMUL(64, gpr.RX(d), R(RSCRATCH));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1288,6 +1284,8 @@ void Jit64::divwux(UGeckoInstruction inst)
|
|||
else
|
||||
{
|
||||
gpr.Lock(a, b, d);
|
||||
// no register choice (do we need to do this?)
|
||||
gpr.FlushLockX(EAX, EDX);
|
||||
gpr.BindToRegister(d, (d == a || d == b), true);
|
||||
MOV(32, R(EAX), gpr.R(a));
|
||||
XOR(32, R(EDX), R(EDX));
|
||||
|
@ -1299,7 +1297,7 @@ void Jit64::divwux(UGeckoInstruction inst)
|
|||
{
|
||||
GenerateConstantOverflow(true);
|
||||
}
|
||||
//MOV(32, R(EAX), gpr.R(d));
|
||||
//MOV(32, R(RAX), gpr.R(d));
|
||||
FixupBranch end = J();
|
||||
SetJumpTarget(not_div_by_zero);
|
||||
DIV(32, gpr.R(b));
|
||||
|
@ -1348,6 +1346,8 @@ void Jit64::divwx(UGeckoInstruction inst)
|
|||
else
|
||||
{
|
||||
gpr.Lock(a, b, d);
|
||||
// no register choice
|
||||
gpr.FlushLockX(EAX, EDX);
|
||||
gpr.BindToRegister(d, (d == a || d == b), true);
|
||||
MOV(32, R(EAX), gpr.R(a));
|
||||
CDQ();
|
||||
|
@ -1456,9 +1456,9 @@ void Jit64::addex(UGeckoInstruction inst)
|
|||
gpr.Lock(a, b, d);
|
||||
gpr.BindToRegister(d, true);
|
||||
|
||||
GetCarryEAXAndClear();
|
||||
GetCarryRSCRATCHAndClear();
|
||||
ADC(32, gpr.R(d), gpr.R((d == a) ? b : a));
|
||||
FinalizeCarryGenerateOverflowEAX(inst.OE);
|
||||
FinalizeCarryGenerateOverflowRSCRATCH(inst.OE);
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
gpr.UnlockAll();
|
||||
|
@ -1468,10 +1468,10 @@ void Jit64::addex(UGeckoInstruction inst)
|
|||
gpr.Lock(a, b, d);
|
||||
gpr.BindToRegister(d, false);
|
||||
|
||||
GetCarryEAXAndClear();
|
||||
GetCarryRSCRATCHAndClear();
|
||||
MOV(32, gpr.R(d), gpr.R(a));
|
||||
ADC(32, gpr.R(d), gpr.R(b));
|
||||
FinalizeCarryGenerateOverflowEAX(inst.OE);
|
||||
FinalizeCarryGenerateOverflowRSCRATCH(inst.OE);
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
gpr.UnlockAll();
|
||||
|
@ -1522,9 +1522,9 @@ void Jit64::addmex(UGeckoInstruction inst)
|
|||
gpr.Lock(d);
|
||||
gpr.BindToRegister(d, true);
|
||||
|
||||
GetCarryEAXAndClear();
|
||||
GetCarryRSCRATCHAndClear();
|
||||
ADC(32, gpr.R(d), Imm32(0xFFFFFFFF));
|
||||
FinalizeCarryGenerateOverflowEAX(inst.OE);
|
||||
FinalizeCarryGenerateOverflowRSCRATCH(inst.OE);
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
gpr.UnlockAll();
|
||||
|
@ -1534,10 +1534,10 @@ void Jit64::addmex(UGeckoInstruction inst)
|
|||
gpr.Lock(a, d);
|
||||
gpr.BindToRegister(d, false);
|
||||
|
||||
GetCarryEAXAndClear();
|
||||
GetCarryRSCRATCHAndClear();
|
||||
MOV(32, gpr.R(d), gpr.R(a));
|
||||
ADC(32, gpr.R(d), Imm32(0xFFFFFFFF));
|
||||
FinalizeCarryGenerateOverflowEAX(inst.OE);
|
||||
FinalizeCarryGenerateOverflowRSCRATCH(inst.OE);
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
gpr.UnlockAll();
|
||||
|
@ -1556,9 +1556,9 @@ void Jit64::addzex(UGeckoInstruction inst)
|
|||
gpr.Lock(d);
|
||||
gpr.BindToRegister(d, true);
|
||||
|
||||
GetCarryEAXAndClear();
|
||||
GetCarryRSCRATCHAndClear();
|
||||
ADC(32, gpr.R(d), Imm8(0));
|
||||
FinalizeCarryGenerateOverflowEAX(inst.OE);
|
||||
FinalizeCarryGenerateOverflowRSCRATCH(inst.OE);
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
gpr.UnlockAll();
|
||||
|
@ -1568,10 +1568,10 @@ void Jit64::addzex(UGeckoInstruction inst)
|
|||
gpr.Lock(a, d);
|
||||
gpr.BindToRegister(d, false);
|
||||
|
||||
GetCarryEAXAndClear();
|
||||
GetCarryRSCRATCHAndClear();
|
||||
MOV(32, gpr.R(d), gpr.R(a));
|
||||
ADC(32, gpr.R(d), Imm8(0));
|
||||
FinalizeCarryGenerateOverflowEAX(inst.OE);
|
||||
FinalizeCarryGenerateOverflowRSCRATCH(inst.OE);
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(d));
|
||||
gpr.UnlockAll();
|
||||
|
@ -1689,25 +1689,25 @@ void Jit64::rlwimix(UGeckoInstruction inst)
|
|||
{
|
||||
if (mask == 0U - (1U << inst.SH))
|
||||
{
|
||||
MOV(32, R(EAX), gpr.R(s));
|
||||
SHL(32, R(EAX), Imm8(inst.SH));
|
||||
MOV(32, R(RSCRATCH), gpr.R(s));
|
||||
SHL(32, R(RSCRATCH), Imm8(inst.SH));
|
||||
AND(32, gpr.R(a), Imm32(~mask));
|
||||
OR(32, gpr.R(a), R(EAX));
|
||||
OR(32, gpr.R(a), R(RSCRATCH));
|
||||
}
|
||||
else if (mask == (1U << inst.SH) - 1)
|
||||
{
|
||||
MOV(32, R(EAX), gpr.R(s));
|
||||
SHR(32, R(EAX), Imm8(32-inst.SH));
|
||||
MOV(32, R(RSCRATCH), gpr.R(s));
|
||||
SHR(32, R(RSCRATCH), Imm8(32-inst.SH));
|
||||
AND(32, gpr.R(a), Imm32(~mask));
|
||||
OR(32, gpr.R(a), R(EAX));
|
||||
OR(32, gpr.R(a), R(RSCRATCH));
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV(32, R(EAX), gpr.R(s));
|
||||
ROL(32, R(EAX), Imm8(inst.SH));
|
||||
XOR(32, R(EAX), gpr.R(a));
|
||||
AND(32, R(EAX), Imm32(mask));
|
||||
XOR(32, gpr.R(a), R(EAX));
|
||||
MOV(32, R(RSCRATCH), gpr.R(s));
|
||||
ROL(32, R(RSCRATCH), Imm8(inst.SH));
|
||||
XOR(32, R(RSCRATCH), gpr.R(a));
|
||||
AND(32, R(RSCRATCH), Imm32(mask));
|
||||
XOR(32, gpr.R(a), R(RSCRATCH));
|
||||
}
|
||||
|
||||
if (inst.Rc)
|
||||
|
@ -1742,6 +1742,7 @@ void Jit64::rlwnmx(UGeckoInstruction inst)
|
|||
}
|
||||
else
|
||||
{
|
||||
// no register choice
|
||||
gpr.FlushLockX(ECX);
|
||||
gpr.Lock(a, b, s);
|
||||
gpr.BindToRegister(a, (a == b || a == s), true);
|
||||
|
@ -1809,6 +1810,7 @@ void Jit64::srwx(UGeckoInstruction inst)
|
|||
}
|
||||
else
|
||||
{
|
||||
// no register choice
|
||||
gpr.FlushLockX(ECX);
|
||||
gpr.Lock(a, b, s);
|
||||
gpr.BindToRegister(a, (a == b || a == s), true);
|
||||
|
@ -1847,6 +1849,7 @@ void Jit64::slwx(UGeckoInstruction inst)
|
|||
}
|
||||
else
|
||||
{
|
||||
// no register choice
|
||||
gpr.FlushLockX(ECX);
|
||||
gpr.Lock(a, b, s);
|
||||
gpr.BindToRegister(a, (a == b || a == s), true);
|
||||
|
@ -1887,9 +1890,9 @@ void Jit64::srawx(UGeckoInstruction inst)
|
|||
MOV(32, gpr.R(a), gpr.R(s));
|
||||
SHL(64, gpr.R(a), Imm8(32));
|
||||
SAR(64, gpr.R(a), R(ECX));
|
||||
MOV(32, R(EAX), gpr.R(a));
|
||||
MOV(32, R(RSCRATCH), gpr.R(a));
|
||||
SHR(64, gpr.R(a), Imm8(32));
|
||||
TEST(32, gpr.R(a), R(EAX));
|
||||
TEST(32, gpr.R(a), R(RSCRATCH));
|
||||
FixupBranch nocarry = J_CC(CC_Z);
|
||||
JitSetCA();
|
||||
SetJumpTarget(nocarry);
|
||||
|
@ -1914,16 +1917,16 @@ void Jit64::srawix(UGeckoInstruction inst)
|
|||
gpr.Lock(a, s);
|
||||
gpr.BindToRegister(a, a == s, true);
|
||||
JitClearCA();
|
||||
MOV(32, R(EAX), gpr.R(s));
|
||||
MOV(32, R(RSCRATCH), gpr.R(s));
|
||||
if (a != s)
|
||||
{
|
||||
MOV(32, gpr.R(a), R(EAX));
|
||||
MOV(32, gpr.R(a), R(RSCRATCH));
|
||||
}
|
||||
SAR(32, gpr.R(a), Imm8(amount));
|
||||
if (inst.Rc)
|
||||
ComputeRC(gpr.R(a));
|
||||
SHL(32, R(EAX), Imm8(32-amount));
|
||||
TEST(32, R(EAX), gpr.R(a));
|
||||
SHL(32, R(RSCRATCH), Imm8(32-amount));
|
||||
TEST(32, R(RSCRATCH), gpr.R(a));
|
||||
FixupBranch nocarry = J_CC(CC_Z);
|
||||
JitSetCA();
|
||||
SetJumpTarget(nocarry);
|
||||
|
|
|
@ -197,13 +197,13 @@ void Jit64::lXXx(UGeckoInstruction inst)
|
|||
else
|
||||
{
|
||||
// In this case we need an extra temporary register.
|
||||
opAddress = R(RDX);
|
||||
opAddress = R(RSCRATCH2);
|
||||
storeAddress = true;
|
||||
if (use_constant_offset)
|
||||
{
|
||||
if (gpr.R(a).IsSimpleReg() && offset != 0)
|
||||
{
|
||||
LEA(32, RDX, MDisp(gpr.RX(a), offset));
|
||||
LEA(32, RSCRATCH2, MDisp(gpr.RX(a), offset));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -214,7 +214,7 @@ void Jit64::lXXx(UGeckoInstruction inst)
|
|||
}
|
||||
else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
|
||||
{
|
||||
LEA(32, RDX, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
|
||||
LEA(32, RSCRATCH2, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -231,7 +231,7 @@ void Jit64::lXXx(UGeckoInstruction inst)
|
|||
if (update && storeAddress)
|
||||
{
|
||||
// We need to save the (usually scratch) address register for the update.
|
||||
registersInUse |= (1 << RDX);
|
||||
registersInUse |= (1 << RSCRATCH2);
|
||||
}
|
||||
SafeLoadToReg(gpr.RX(d), opAddress, accessSize, loadOffset, registersInUse, signExtend);
|
||||
|
||||
|
@ -274,11 +274,11 @@ void Jit64::dcbz(UGeckoInstruction inst)
|
|||
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack)
|
||||
mem_mask |= Memory::ADDR_MASK_MEM1;
|
||||
|
||||
MOV(32, R(EAX), gpr.R(b));
|
||||
MOV(32, R(RSCRATCH), gpr.R(b));
|
||||
if (a)
|
||||
ADD(32, R(EAX), gpr.R(a));
|
||||
AND(32, R(EAX), Imm32(~31));
|
||||
TEST(32, R(EAX), Imm32(mem_mask));
|
||||
ADD(32, R(RSCRATCH), gpr.R(a));
|
||||
AND(32, R(RSCRATCH), Imm32(~31));
|
||||
TEST(32, R(RSCRATCH), Imm32(mem_mask));
|
||||
FixupBranch fast = J_CC(CC_Z, true);
|
||||
|
||||
// Should this code ever run? I can't find any games that use DCBZ on non-physical addresses, but
|
||||
|
@ -286,14 +286,14 @@ void Jit64::dcbz(UGeckoInstruction inst)
|
|||
MOV(32, M(&PC), Imm32(jit->js.compilerPC));
|
||||
u32 registersInUse = CallerSavedRegistersInUse();
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, false);
|
||||
ABI_CallFunctionR((void *)&Memory::ClearCacheLine, EAX);
|
||||
ABI_CallFunctionR((void *)&Memory::ClearCacheLine, RSCRATCH);
|
||||
ABI_PopRegistersAndAdjustStack(registersInUse, false);
|
||||
|
||||
FixupBranch exit = J();
|
||||
SetJumpTarget(fast);
|
||||
PXOR(XMM0, R(XMM0));
|
||||
MOVAPS(MComplex(RBX, RAX, SCALE_1, 0), XMM0);
|
||||
MOVAPS(MComplex(RBX, RAX, SCALE_1, 16), XMM0);
|
||||
MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 0), XMM0);
|
||||
MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 16), XMM0);
|
||||
SetJumpTarget(exit);
|
||||
}
|
||||
|
||||
|
@ -338,7 +338,7 @@ void Jit64::stX(UGeckoInstruction inst)
|
|||
// Helps external systems know which instruction triggered the write
|
||||
MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC));
|
||||
|
||||
MOV(32, R(EDX), gpr.R(s));
|
||||
MOV(32, R(RSCRATCH2), gpr.R(s));
|
||||
if (update)
|
||||
gpr.SetImmediate32(a, addr);
|
||||
|
||||
|
@ -362,8 +362,8 @@ void Jit64::stX(UGeckoInstruction inst)
|
|||
}
|
||||
else if (Memory::IsRAMAddress(addr))
|
||||
{
|
||||
MOV(32, R(EAX), gpr.R(s));
|
||||
WriteToConstRamAddress(accessSize, EAX, addr, true);
|
||||
MOV(32, R(RSCRATCH), gpr.R(s));
|
||||
WriteToConstRamAddress(accessSize, RSCRATCH, addr, true);
|
||||
if (update)
|
||||
gpr.SetImmediate32(a, addr);
|
||||
return;
|
||||
|
@ -399,15 +399,15 @@ void Jit64::stX(UGeckoInstruction inst)
|
|||
X64Reg reg_value;
|
||||
if (WriteClobbersRegValue(accessSize, /* swap */ true))
|
||||
{
|
||||
MOV(32, R(EDX), gpr.R(s));
|
||||
reg_value = EDX;
|
||||
MOV(32, R(RSCRATCH2), gpr.R(s));
|
||||
reg_value = RSCRATCH2;
|
||||
}
|
||||
else
|
||||
{
|
||||
gpr.BindToRegister(s, true, false);
|
||||
reg_value = gpr.RX(s);
|
||||
}
|
||||
SafeWriteRegToReg(reg_value, gpr.RX(a), accessSize, offset, CallerSavedRegistersInUse(), SAFE_LOADSTORE_CLOBBER_EAX_INSTEAD_OF_ADDR);
|
||||
SafeWriteRegToReg(reg_value, gpr.RX(a), accessSize, offset, CallerSavedRegistersInUse(), SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR);
|
||||
|
||||
if (update && offset)
|
||||
{
|
||||
|
@ -440,16 +440,16 @@ void Jit64::stXx(UGeckoInstruction inst)
|
|||
{
|
||||
gpr.BindToRegister(a, true, true);
|
||||
ADD(32, gpr.R(a), gpr.R(b));
|
||||
MOV(32, R(EDX), gpr.R(a));
|
||||
MOV(32, R(RSCRATCH2), gpr.R(a));
|
||||
}
|
||||
else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
|
||||
{
|
||||
LEA(32, EDX, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
|
||||
LEA(32, RSCRATCH2, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV(32, R(EDX), gpr.R(a));
|
||||
ADD(32, R(EDX), gpr.R(b));
|
||||
MOV(32, R(RSCRATCH2), gpr.R(a));
|
||||
ADD(32, R(RSCRATCH2), gpr.R(b));
|
||||
}
|
||||
|
||||
int accessSize;
|
||||
|
@ -473,15 +473,15 @@ void Jit64::stXx(UGeckoInstruction inst)
|
|||
X64Reg reg_value;
|
||||
if (WriteClobbersRegValue(accessSize, /* swap */ true))
|
||||
{
|
||||
MOV(32, R(EAX), gpr.R(s));
|
||||
reg_value = EAX;
|
||||
MOV(32, R(RSCRATCH), gpr.R(s));
|
||||
reg_value = RSCRATCH;
|
||||
}
|
||||
else
|
||||
{
|
||||
gpr.BindToRegister(s, true, false);
|
||||
reg_value = gpr.RX(s);
|
||||
}
|
||||
SafeWriteRegToReg(reg_value, EDX, accessSize, 0, CallerSavedRegistersInUse());
|
||||
SafeWriteRegToReg(reg_value, RSCRATCH2, accessSize, 0, CallerSavedRegistersInUse());
|
||||
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
|
@ -494,14 +494,14 @@ void Jit64::lmw(UGeckoInstruction inst)
|
|||
JITDISABLE(bJITLoadStoreOff);
|
||||
|
||||
// TODO: This doesn't handle rollback on DSI correctly
|
||||
MOV(32, R(EDX), Imm32((u32)(s32)inst.SIMM_16));
|
||||
MOV(32, R(RSCRATCH2), Imm32((u32)(s32)inst.SIMM_16));
|
||||
if (inst.RA)
|
||||
ADD(32, R(EDX), gpr.R(inst.RA));
|
||||
ADD(32, R(RSCRATCH2), gpr.R(inst.RA));
|
||||
for (int i = inst.RD; i < 32; i++)
|
||||
{
|
||||
SafeLoadToReg(EAX, R(EDX), 32, (i - inst.RD) * 4, CallerSavedRegistersInUse() | (1 << ECX), false);
|
||||
SafeLoadToReg(RSCRATCH, R(RSCRATCH2), 32, (i - inst.RD) * 4, CallerSavedRegistersInUse() | (1 << RSCRATCH_EXTRA), false);
|
||||
gpr.BindToRegister(i, false, true);
|
||||
MOV(32, gpr.R(i), R(EAX));
|
||||
MOV(32, gpr.R(i), R(RSCRATCH));
|
||||
}
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
@ -515,11 +515,11 @@ void Jit64::stmw(UGeckoInstruction inst)
|
|||
for (int i = inst.RD; i < 32; i++)
|
||||
{
|
||||
if (inst.RA)
|
||||
MOV(32, R(EAX), gpr.R(inst.RA));
|
||||
MOV(32, R(RSCRATCH), gpr.R(inst.RA));
|
||||
else
|
||||
XOR(32, R(EAX), R(EAX));
|
||||
MOV(32, R(EDX), gpr.R(i));
|
||||
SafeWriteRegToReg(EDX, EAX, 32, (i - inst.RD) * 4 + (u32)(s32)inst.SIMM_16, CallerSavedRegistersInUse());
|
||||
XOR(32, R(RSCRATCH), R(RSCRATCH));
|
||||
MOV(32, R(RSCRATCH2), gpr.R(i));
|
||||
SafeWriteRegToReg(RSCRATCH2, RSCRATCH, 32, (i - inst.RD) * 4 + (u32)(s32)inst.SIMM_16, CallerSavedRegistersInUse());
|
||||
}
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
|
|
@ -42,9 +42,9 @@ void Jit64::lfXXX(UGeckoInstruction inst)
|
|||
}
|
||||
else
|
||||
{
|
||||
addr = R(EAX);
|
||||
addr = R(RSCRATCH);
|
||||
if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
|
||||
LEA(32, EAX, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
|
||||
LEA(32, RSCRATCH, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
|
||||
else
|
||||
{
|
||||
MOV(32, addr, gpr.R(b));
|
||||
|
@ -61,18 +61,18 @@ void Jit64::lfXXX(UGeckoInstruction inst)
|
|||
offset = (s32)(s16)inst.SIMM_16;
|
||||
}
|
||||
|
||||
SafeLoadToReg(RAX, addr, single ? 32 : 64, offset, CallerSavedRegistersInUse(), false);
|
||||
SafeLoadToReg(RSCRATCH, addr, single ? 32 : 64, offset, CallerSavedRegistersInUse(), false);
|
||||
fpr.Lock(d);
|
||||
fpr.BindToRegister(d, js.memcheck || !single);
|
||||
|
||||
MEMCHECK_START
|
||||
if (single)
|
||||
{
|
||||
ConvertSingleToDouble(fpr.RX(d), EAX, true);
|
||||
ConvertSingleToDouble(fpr.RX(d), RSCRATCH, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
MOVQ_xmm(XMM0, R(RAX));
|
||||
MOVQ_xmm(XMM0, R(RSCRATCH));
|
||||
MOVSD(fpr.RX(d), R(XMM0));
|
||||
}
|
||||
MEMCHECK_END
|
||||
|
@ -102,17 +102,17 @@ void Jit64::stfXXX(UGeckoInstruction inst)
|
|||
{
|
||||
gpr.BindToRegister(a, true, true);
|
||||
ADD(32, gpr.R(a), gpr.R(b));
|
||||
MOV(32, R(RDX), gpr.R(a));
|
||||
MOV(32, R(RSCRATCH2), gpr.R(a));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
|
||||
LEA(32, RDX, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
|
||||
LEA(32, RSCRATCH2, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
|
||||
else
|
||||
{
|
||||
MOV(32, R(RDX), gpr.R(b));
|
||||
MOV(32, R(RSCRATCH2), gpr.R(b));
|
||||
if (a)
|
||||
ADD(32, R(RDX), gpr.R(a));
|
||||
ADD(32, R(RSCRATCH2), gpr.R(a));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -127,23 +127,23 @@ void Jit64::stfXXX(UGeckoInstruction inst)
|
|||
{
|
||||
offset = (s32)(s16)inst.SIMM_16;
|
||||
}
|
||||
MOV(32, R(RDX), gpr.R(a));
|
||||
MOV(32, R(RSCRATCH2), gpr.R(a));
|
||||
}
|
||||
|
||||
if (single)
|
||||
{
|
||||
fpr.BindToRegister(s, true, false);
|
||||
ConvertDoubleToSingle(XMM0, fpr.RX(s));
|
||||
SafeWriteF32ToReg(XMM0, RDX, offset, CallerSavedRegistersInUse());
|
||||
SafeWriteF32ToReg(XMM0, RSCRATCH2, offset, CallerSavedRegistersInUse());
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (fpr.R(s).IsSimpleReg())
|
||||
MOVQ_xmm(R(RAX), fpr.RX(s));
|
||||
MOVQ_xmm(R(RSCRATCH), fpr.RX(s));
|
||||
else
|
||||
MOV(64, R(RAX), fpr.R(s));
|
||||
SafeWriteRegToReg(RAX, RDX, 64, offset, CallerSavedRegistersInUse());
|
||||
MOV(64, R(RSCRATCH), fpr.R(s));
|
||||
SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 64, offset, CallerSavedRegistersInUse());
|
||||
}
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
|
@ -159,14 +159,14 @@ void Jit64::stfiwx(UGeckoInstruction inst)
|
|||
int a = inst.RA;
|
||||
int b = inst.RB;
|
||||
|
||||
MOV(32, R(RDX), gpr.R(b));
|
||||
MOV(32, R(RSCRATCH2), gpr.R(b));
|
||||
if (a)
|
||||
ADD(32, R(RDX), gpr.R(a));
|
||||
ADD(32, R(RSCRATCH2), gpr.R(a));
|
||||
|
||||
if (fpr.R(s).IsSimpleReg())
|
||||
MOVD_xmm(R(EAX), fpr.RX(s));
|
||||
MOVD_xmm(R(RSCRATCH), fpr.RX(s));
|
||||
else
|
||||
MOV(32, R(EAX), fpr.R(s));
|
||||
SafeWriteRegToReg(EAX, RDX, 32, 0, CallerSavedRegistersInUse());
|
||||
MOV(32, R(RSCRATCH), fpr.R(s));
|
||||
SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 32, 0, CallerSavedRegistersInUse());
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
|
|
@ -28,36 +28,36 @@ void Jit64::psq_st(UGeckoInstruction inst)
|
|||
int a = inst.RA;
|
||||
int s = inst.RS; // Fp numbers
|
||||
|
||||
gpr.FlushLockX(EAX, ECX);
|
||||
gpr.FlushLockX(RSCRATCH, RSCRATCH_EXTRA);
|
||||
if (update)
|
||||
gpr.BindToRegister(inst.RA, true, true);
|
||||
fpr.BindToRegister(inst.RS, true, false);
|
||||
MOV(32, R(ECX), gpr.R(inst.RA));
|
||||
MOV(32, R(RSCRATCH_EXTRA), gpr.R(inst.RA));
|
||||
if (offset)
|
||||
ADD(32, R(ECX), Imm32((u32)offset));
|
||||
ADD(32, R(RSCRATCH_EXTRA), Imm32((u32)offset));
|
||||
if (update && offset)
|
||||
MOV(32, gpr.R(a), R(ECX));
|
||||
MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
|
||||
// Some games (e.g. Dirt 2) incorrectly set the unused bits which breaks the lookup table code.
|
||||
// Hence, we need to mask out the unused bits. The layout of the GQR register is
|
||||
// UU[SCALE]UUUUU[TYPE] where SCALE is 6 bits and TYPE is 3 bits, so we have to AND with
|
||||
// 0b0011111100000111, or 0x3F07.
|
||||
MOV(32, R(EAX), Imm32(0x3F07));
|
||||
AND(32, R(EAX), PPCSTATE(spr[SPR_GQR0 + inst.I]));
|
||||
MOVZX(32, 8, EDX, R(AL));
|
||||
MOV(32, R(RSCRATCH), Imm32(0x3F07));
|
||||
AND(32, R(RSCRATCH), PPCSTATE(spr[SPR_GQR0 + inst.I]));
|
||||
MOVZX(32, 8, RSCRATCH2, R(RSCRATCH));
|
||||
|
||||
// FIXME: Fix ModR/M encoding to allow [EDX*4+disp32] without a base register!
|
||||
// FIXME: Fix ModR/M encoding to allow [RSCRATCH2*4+disp32] without a base register!
|
||||
if (inst.W)
|
||||
{
|
||||
// One value
|
||||
PXOR(XMM0, R(XMM0)); // TODO: See if we can get rid of this cheaply by tweaking the code in the singleStore* functions.
|
||||
CVTSD2SS(XMM0, fpr.R(s));
|
||||
CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)asm_routines.singleStoreQuantized));
|
||||
CALLptr(MScaled(RSCRATCH2, SCALE_8, (u32)(u64)asm_routines.singleStoreQuantized));
|
||||
}
|
||||
else
|
||||
{
|
||||
// Pair of values
|
||||
CVTPD2PS(XMM0, fpr.R(s));
|
||||
CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)asm_routines.pairedStoreQuantized));
|
||||
CALLptr(MScaled(RSCRATCH2, SCALE_8, (u32)(u64)asm_routines.pairedStoreQuantized));
|
||||
}
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
|
@ -72,23 +72,23 @@ void Jit64::psq_l(UGeckoInstruction inst)
|
|||
bool update = inst.OPCD == 57;
|
||||
int offset = inst.SIMM_12;
|
||||
|
||||
gpr.FlushLockX(EAX, ECX);
|
||||
gpr.FlushLockX(RSCRATCH, RSCRATCH_EXTRA);
|
||||
gpr.BindToRegister(inst.RA, true, update && offset);
|
||||
fpr.BindToRegister(inst.RS, false, true);
|
||||
if (offset)
|
||||
LEA(32, ECX, MDisp(gpr.RX(inst.RA), offset));
|
||||
LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(inst.RA), offset));
|
||||
else
|
||||
MOV(32, R(ECX), gpr.R(inst.RA));
|
||||
MOV(32, R(RSCRATCH_EXTRA), gpr.R(inst.RA));
|
||||
if (update && offset)
|
||||
MOV(32, gpr.R(inst.RA), R(ECX));
|
||||
MOV(32, R(EAX), Imm32(0x3F07));
|
||||
AND(32, R(EAX), M(((char *)&GQR(inst.I)) + 2));
|
||||
MOVZX(32, 8, EDX, R(AL));
|
||||
MOV(32, gpr.R(inst.RA), R(RSCRATCH_EXTRA));
|
||||
MOV(32, R(RSCRATCH), Imm32(0x3F07));
|
||||
AND(32, R(RSCRATCH), M(((char *)&GQR(inst.I)) + 2));
|
||||
MOVZX(32, 8, RSCRATCH2, R(RSCRATCH));
|
||||
if (inst.W)
|
||||
OR(32, R(EDX), Imm8(8));
|
||||
OR(32, R(RSCRATCH2), Imm8(8));
|
||||
|
||||
ABI_AlignStack(0);
|
||||
CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)asm_routines.pairedLoadQuantized));
|
||||
CALLptr(MScaled(RSCRATCH2, SCALE_8, (u32)(u64)asm_routines.pairedLoadQuantized));
|
||||
ABI_RestoreStack(0);
|
||||
|
||||
// MEMCHECK_START // FIXME: MMU does not work here because of unsafe memory access
|
||||
|
|
|
@ -42,40 +42,40 @@ void Jit64::GetCRFieldBit(int field, int bit, Gen::X64Reg out, bool negate)
|
|||
|
||||
void Jit64::SetCRFieldBit(int field, int bit, Gen::X64Reg in)
|
||||
{
|
||||
MOV(64, R(RDX), PPCSTATE(cr_val[field]));
|
||||
MOV(64, R(RSCRATCH2), PPCSTATE(cr_val[field]));
|
||||
MOVZX(32, 8, in, R(in));
|
||||
|
||||
switch (bit)
|
||||
{
|
||||
case CR_SO_BIT: // set bit 61 to input
|
||||
BTR(64, R(RDX), Imm8(61));
|
||||
BTR(64, R(RSCRATCH2), Imm8(61));
|
||||
SHL(64, R(in), Imm8(61));
|
||||
OR(64, R(RDX), R(in));
|
||||
OR(64, R(RSCRATCH2), R(in));
|
||||
break;
|
||||
|
||||
case CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input
|
||||
SHR(64, R(RDX), Imm8(32));
|
||||
SHL(64, R(RDX), Imm8(32));
|
||||
SHR(64, R(RSCRATCH2), Imm8(32));
|
||||
SHL(64, R(RSCRATCH2), Imm8(32));
|
||||
XOR(32, R(in), Imm8(1));
|
||||
OR(64, R(RDX), R(in));
|
||||
OR(64, R(RSCRATCH2), R(in));
|
||||
break;
|
||||
|
||||
case CR_GT_BIT: // set bit 63 to !input
|
||||
BTR(64, R(RDX), Imm8(63));
|
||||
BTR(64, R(RSCRATCH2), Imm8(63));
|
||||
NOT(32, R(in));
|
||||
SHL(64, R(in), Imm8(63));
|
||||
OR(64, R(RDX), R(in));
|
||||
OR(64, R(RSCRATCH2), R(in));
|
||||
break;
|
||||
|
||||
case CR_LT_BIT: // set bit 62 to input
|
||||
BTR(64, R(RDX), Imm8(62));
|
||||
BTR(64, R(RSCRATCH2), Imm8(62));
|
||||
SHL(64, R(in), Imm8(62));
|
||||
OR(64, R(RDX), R(in));
|
||||
OR(64, R(RSCRATCH2), R(in));
|
||||
break;
|
||||
}
|
||||
|
||||
BTS(64, R(RDX), Imm8(32));
|
||||
MOV(64, PPCSTATE(cr_val[field]), R(RDX));
|
||||
BTS(64, R(RSCRATCH2), Imm8(32));
|
||||
MOV(64, PPCSTATE(cr_val[field]), R(RSCRATCH2));
|
||||
}
|
||||
|
||||
FixupBranch Jit64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set)
|
||||
|
@ -173,8 +173,10 @@ void Jit64::mfspr(UGeckoInstruction inst)
|
|||
// typical use of this instruction is to call it three times, e.g. mftbu/mftbl/mftbu/cmpw/bne
|
||||
// to deal with possible timer wraparound. This makes the second two (out of three) completely
|
||||
// redundant for the JIT.
|
||||
// no register choice
|
||||
|
||||
gpr.FlushLockX(RDX, RAX);
|
||||
u32 offset = js.downcountAmount / SystemTimers::TIMER_RATIO;
|
||||
gpr.FlushLockX(EDX);
|
||||
|
||||
// An inline implementation of CoreTiming::GetFakeTimeBase, since in timer-heavy games the
|
||||
// cost of calling out to C for this is actually significant.
|
||||
|
@ -205,14 +207,14 @@ void Jit64::mfspr(UGeckoInstruction inst)
|
|||
gpr.BindToRegister(d, false);
|
||||
gpr.BindToRegister(n, false);
|
||||
if (iIndex == SPR_TL)
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
MOV(32, gpr.R(d), R(RAX));
|
||||
if (nextIndex == SPR_TL)
|
||||
MOV(32, gpr.R(n), R(EAX));
|
||||
MOV(32, gpr.R(n), R(RAX));
|
||||
SHR(64, R(RAX), Imm8(32));
|
||||
if (iIndex == SPR_TU)
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
MOV(32, gpr.R(d), R(RAX));
|
||||
if (nextIndex == SPR_TU)
|
||||
MOV(32, gpr.R(n), R(EAX));
|
||||
MOV(32, gpr.R(n), R(RAX));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -220,8 +222,9 @@ void Jit64::mfspr(UGeckoInstruction inst)
|
|||
gpr.BindToRegister(d, false);
|
||||
if (iIndex == SPR_TU)
|
||||
SHR(64, R(RAX), Imm8(32));
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
MOV(32, gpr.R(d), R(RAX));
|
||||
}
|
||||
gpr.UnlockAllX();
|
||||
break;
|
||||
}
|
||||
case SPR_WPAR:
|
||||
|
@ -238,7 +241,6 @@ void Jit64::mfspr(UGeckoInstruction inst)
|
|||
break;
|
||||
}
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
||||
void Jit64::mtmsr(UGeckoInstruction inst)
|
||||
|
@ -308,9 +310,9 @@ void Jit64::mfcr(UGeckoInstruction inst)
|
|||
gpr.BindToRegister(d, false, true);
|
||||
XOR(32, gpr.R(d), gpr.R(d));
|
||||
|
||||
X64Reg cr_val = RDX;
|
||||
// we only need to zero the high bits of EAX once
|
||||
XOR(32, R(EAX), R(EAX));
|
||||
X64Reg cr_val = RSCRATCH2;
|
||||
// we only need to zero the high bits of RSCRATCH once
|
||||
XOR(32, R(RSCRATCH), R(RSCRATCH));
|
||||
for (int i = 0; i < 8; i++)
|
||||
{
|
||||
static const u8 m_flagTable[8] = {0x0,0x1,0x8,0x9,0x0,0x1,0x8,0x9};
|
||||
|
@ -321,19 +323,19 @@ void Jit64::mfcr(UGeckoInstruction inst)
|
|||
|
||||
// EQ: Bits 31-0 == 0; set flag bit 1
|
||||
TEST(32, R(cr_val), R(cr_val));
|
||||
SETcc(CC_Z, R(EAX));
|
||||
LEA(32, gpr.RX(d), MComplex(gpr.RX(d), EAX, SCALE_2, 0));
|
||||
SETcc(CC_Z, R(RSCRATCH));
|
||||
LEA(32, gpr.RX(d), MComplex(gpr.RX(d), RSCRATCH, SCALE_2, 0));
|
||||
|
||||
// GT: Value > 0; set flag bit 2
|
||||
TEST(64, R(cr_val), R(cr_val));
|
||||
SETcc(CC_G, R(EAX));
|
||||
LEA(32, gpr.RX(d), MComplex(gpr.RX(d), EAX, SCALE_4, 0));
|
||||
SETcc(CC_G, R(RSCRATCH));
|
||||
LEA(32, gpr.RX(d), MComplex(gpr.RX(d), RSCRATCH, SCALE_4, 0));
|
||||
|
||||
// SO: Bit 61 set; set flag bit 0
|
||||
// LT: Bit 62 set; set flag bit 3
|
||||
SHR(64, R(cr_val), Imm8(61));
|
||||
MOVZX(32, 8, EAX, MDisp(cr_val, (u32)(u64)m_flagTable));
|
||||
OR(32, gpr.R(d), R(EAX));
|
||||
MOVZX(32, 8, RSCRATCH, MDisp(cr_val, (u32)(u64)m_flagTable));
|
||||
OR(32, gpr.R(d), R(RSCRATCH));
|
||||
}
|
||||
|
||||
gpr.UnlockAll();
|
||||
|
@ -363,8 +365,8 @@ void Jit64::mtcrf(UGeckoInstruction inst)
|
|||
}
|
||||
else
|
||||
{
|
||||
MOV(64, R(RAX), Imm64(newcrval));
|
||||
MOV(64, PPCSTATE(cr_val[i]), R(RAX));
|
||||
MOV(64, R(RSCRATCH), Imm64(newcrval));
|
||||
MOV(64, PPCSTATE(cr_val[i]), R(RSCRATCH));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -377,13 +379,13 @@ void Jit64::mtcrf(UGeckoInstruction inst)
|
|||
{
|
||||
if ((crm & (0x80 >> i)) != 0)
|
||||
{
|
||||
MOV(32, R(EAX), gpr.R(inst.RS));
|
||||
MOV(32, R(RSCRATCH), gpr.R(inst.RS));
|
||||
if (i != 7)
|
||||
SHR(32, R(EAX), Imm8(28 - (i * 4)));
|
||||
SHR(32, R(RSCRATCH), Imm8(28 - (i * 4)));
|
||||
if (i != 0)
|
||||
AND(32, R(EAX), Imm8(0xF));
|
||||
MOV(64, R(EAX), MScaled(EAX, SCALE_8, (u32)(u64)m_crTable));
|
||||
MOV(64, PPCSTATE(cr_val[i]), R(EAX));
|
||||
AND(32, R(RSCRATCH), Imm8(0xF));
|
||||
MOV(64, R(RSCRATCH), MScaled(RSCRATCH, SCALE_8, (u32)(u64)m_crTable));
|
||||
MOV(64, PPCSTATE(cr_val[i]), R(RSCRATCH));
|
||||
}
|
||||
}
|
||||
gpr.UnlockAll();
|
||||
|
@ -399,8 +401,8 @@ void Jit64::mcrf(UGeckoInstruction inst)
|
|||
// USES_CR
|
||||
if (inst.CRFS != inst.CRFD)
|
||||
{
|
||||
MOV(64, R(EAX), PPCSTATE(cr_val[inst.CRFS]));
|
||||
MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(EAX));
|
||||
MOV(64, R(RSCRATCH), PPCSTATE(cr_val[inst.CRFS]));
|
||||
MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(RSCRATCH));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -412,11 +414,11 @@ void Jit64::mcrxr(UGeckoInstruction inst)
|
|||
// USES_CR
|
||||
|
||||
// Copy XER[0-3] into CR[inst.CRFD]
|
||||
MOV(32, R(EAX), PPCSTATE(spr[SPR_XER]));
|
||||
SHR(32, R(EAX), Imm8(28));
|
||||
MOV(32, R(RSCRATCH), PPCSTATE(spr[SPR_XER]));
|
||||
SHR(32, R(RSCRATCH), Imm8(28));
|
||||
|
||||
MOV(64, R(EAX), MScaled(EAX, SCALE_8, (u32)(u64)m_crTable));
|
||||
MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(EAX));
|
||||
MOV(64, R(RSCRATCH), MScaled(RSCRATCH, SCALE_8, (u32)(u64)m_crTable));
|
||||
MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(RSCRATCH));
|
||||
|
||||
// Clear XER[0-3]
|
||||
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(0x0FFFFFFF));
|
||||
|
@ -438,8 +440,8 @@ void Jit64::crXXX(UGeckoInstruction inst)
|
|||
// crnand or crnor
|
||||
bool negateB = inst.SUBOP10 == 225 || inst.SUBOP10 == 33;
|
||||
|
||||
GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), DL, negateA);
|
||||
GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), AL, negateB);
|
||||
GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), RSCRATCH2, negateA);
|
||||
GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), RSCRATCH, negateB);
|
||||
|
||||
// Compute combined bit
|
||||
switch (inst.SUBOP10)
|
||||
|
@ -447,23 +449,23 @@ void Jit64::crXXX(UGeckoInstruction inst)
|
|||
case 33: // crnor: ~(A || B) == (~A && ~B)
|
||||
case 129: // crandc
|
||||
case 257: // crand
|
||||
AND(8, R(AL), R(DL));
|
||||
AND(8, R(RSCRATCH), R(RSCRATCH2));
|
||||
break;
|
||||
|
||||
case 193: // crxor
|
||||
case 289: // creqv
|
||||
XOR(8, R(AL), R(DL));
|
||||
XOR(8, R(RSCRATCH), R(RSCRATCH2));
|
||||
break;
|
||||
|
||||
case 225: // crnand: ~(A && B) == (~A || ~B)
|
||||
case 417: // crorc
|
||||
case 449: // cror
|
||||
OR(8, R(AL), R(DL));
|
||||
OR(8, R(RSCRATCH), R(RSCRATCH2));
|
||||
break;
|
||||
}
|
||||
|
||||
// Store result bit in CRBD
|
||||
SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), AL);
|
||||
SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), RSCRATCH);
|
||||
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
|
|
@ -604,22 +604,22 @@ static void regEmitMemStore(RegInfo& RI, InstLoc I, unsigned Size)
|
|||
{
|
||||
auto info = regBuildMemAddress(RI, I, getOp2(I), 2, Size, nullptr);
|
||||
if (info.first.IsImm())
|
||||
RI.Jit->MOV(32, R(EDX), info.first);
|
||||
RI.Jit->MOV(32, R(RSCRATCH2), info.first);
|
||||
else
|
||||
RI.Jit->LEA(32, EDX, MDisp(info.first.GetSimpleReg(), info.second));
|
||||
RI.Jit->LEA(32, RSCRATCH2, MDisp(info.first.GetSimpleReg(), info.second));
|
||||
|
||||
regSpill(RI, EAX);
|
||||
regSpill(RI, RSCRATCH);
|
||||
|
||||
if (isImm(*getOp1(I)))
|
||||
{
|
||||
RI.Jit->MOV(Size, R(EAX), regImmForConst(RI, getOp1(I), Size));
|
||||
RI.Jit->MOV(Size, R(RSCRATCH), regImmForConst(RI, getOp1(I), Size));
|
||||
}
|
||||
else
|
||||
{
|
||||
RI.Jit->MOV(32, R(EAX), regLocForInst(RI, getOp1(I)));
|
||||
RI.Jit->MOV(32, R(RSCRATCH), regLocForInst(RI, getOp1(I)));
|
||||
}
|
||||
|
||||
RI.Jit->SafeWriteRegToReg(EAX, EDX, Size, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM);
|
||||
RI.Jit->SafeWriteRegToReg(RSCRATCH, RSCRATCH2, Size, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM);
|
||||
if (RI.IInfo[I - RI.FirstI] & 4)
|
||||
regClearInst(RI, getOp1(I));
|
||||
}
|
||||
|
@ -677,9 +677,9 @@ static void regEmitCmp(RegInfo& RI, InstLoc I)
|
|||
static void regEmitICmpInst(RegInfo& RI, InstLoc I, CCFlags flag)
|
||||
{
|
||||
regEmitCmp(RI, I);
|
||||
RI.Jit->SETcc(flag, R(EDX)); // Caution: SETCC uses 8-bit regs!
|
||||
RI.Jit->SETcc(flag, R(RSCRATCH2)); // Caution: SETCC uses 8-bit regs!
|
||||
X64Reg reg = regBinReg(RI, I);
|
||||
RI.Jit->MOVZX(32, 8, reg, R(EDX));
|
||||
RI.Jit->MOVZX(32, 8, reg, R(RSCRATCH2));
|
||||
RI.regs[reg] = I;
|
||||
regNormalRegClear(RI, I);
|
||||
}
|
||||
|
@ -709,8 +709,8 @@ static void regEmitICmpCRInst(RegInfo& RI, InstLoc I)
|
|||
unsigned RHS = RI.Build->GetImmValue(getOp2(I));
|
||||
if (!signed_compare && (RHS & 0x80000000U))
|
||||
{
|
||||
RI.Jit->MOV(32, R(EAX), Imm32(RHS));
|
||||
RI.Jit->SUB(64, R(reg), R(RAX));
|
||||
RI.Jit->MOV(32, R(RSCRATCH), Imm32(RHS));
|
||||
RI.Jit->SUB(64, R(reg), R(RSCRATCH));
|
||||
}
|
||||
else if (RHS)
|
||||
{
|
||||
|
@ -720,10 +720,10 @@ static void regEmitICmpCRInst(RegInfo& RI, InstLoc I)
|
|||
else
|
||||
{
|
||||
if (signed_compare)
|
||||
RI.Jit->MOVSX(64, 32, RAX, regLocForInst(RI, getOp2(I)));
|
||||
RI.Jit->MOVSX(64, 32, RSCRATCH, regLocForInst(RI, getOp2(I)));
|
||||
else
|
||||
RI.Jit->MOV(32, R(EAX), regLocForInst(RI, getOp2(I)));
|
||||
RI.Jit->SUB(64, R(reg), R(RAX));
|
||||
RI.Jit->MOV(32, R(RSCRATCH), regLocForInst(RI, getOp2(I)));
|
||||
RI.Jit->SUB(64, R(reg), R(RSCRATCH));
|
||||
}
|
||||
|
||||
RI.regs[reg] = I;
|
||||
|
@ -1069,12 +1069,12 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
// If some exceptions are pending and EE are now enabled, force checking
|
||||
// external exceptions when going out of mtmsr in order to execute delayed
|
||||
// interrupts as soon as possible.
|
||||
Jit->MOV(32, R(EAX), PPCSTATE(msr));
|
||||
Jit->TEST(32, R(EAX), Imm32(0x8000));
|
||||
Jit->MOV(32, R(RSCRATCH), PPCSTATE(msr));
|
||||
Jit->TEST(32, R(RSCRATCH), Imm32(0x8000));
|
||||
FixupBranch eeDisabled = Jit->J_CC(CC_Z);
|
||||
|
||||
Jit->MOV(32, R(EAX), PPCSTATE(Exceptions));
|
||||
Jit->TEST(32, R(EAX), R(EAX));
|
||||
Jit->MOV(32, R(RSCRATCH), PPCSTATE(Exceptions));
|
||||
Jit->TEST(32, R(RSCRATCH), R(RSCRATCH));
|
||||
FixupBranch noExceptionsPending = Jit->J_CC(CC_Z);
|
||||
|
||||
Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc + 4));
|
||||
|
@ -1113,11 +1113,11 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
}
|
||||
case StoreFPRF:
|
||||
{
|
||||
Jit->MOV(32, R(EDX), regLocForInst(RI, getOp1(I)));
|
||||
Jit->AND(32, R(EDX), Imm8(0x1F));
|
||||
Jit->SHL(32, R(EDX), Imm8(12));
|
||||
Jit->MOV(32, R(RSCRATCH2), regLocForInst(RI, getOp1(I)));
|
||||
Jit->AND(32, R(RSCRATCH2), Imm8(0x1F));
|
||||
Jit->SHL(32, R(RSCRATCH2), Imm8(12));
|
||||
Jit->AND(32, PPCSTATE(fpscr), Imm32(~(0x1F << 12)));
|
||||
Jit->OR(32, PPCSTATE(fpscr), R(EDX));
|
||||
Jit->OR(32, PPCSTATE(fpscr), R(RSCRATCH2));
|
||||
regNormalRegClear(RI, I);
|
||||
break;
|
||||
}
|
||||
|
@ -1157,8 +1157,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
break;
|
||||
|
||||
X64Reg reg = regUReg(RI, I);
|
||||
Jit->MOV(32, R(EDX), regLocForInst(RI, getOp1(I)));
|
||||
Jit->MOVSX(32, 8, reg, R(EDX));
|
||||
Jit->MOV(32, R(RSCRATCH2), regLocForInst(RI, getOp1(I)));
|
||||
Jit->MOVSX(32, 8, reg, R(RSCRATCH2));
|
||||
RI.regs[reg] = I;
|
||||
regNormalRegClear(RI, I);
|
||||
break;
|
||||
|
@ -1180,9 +1180,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
break;
|
||||
|
||||
X64Reg reg = regUReg(RI, I);
|
||||
Jit->MOV(32, R(EDX), Imm32(63));
|
||||
Jit->MOV(32, R(RSCRATCH2), Imm32(63));
|
||||
Jit->BSR(32, reg, regLocForInst(RI, getOp1(I)));
|
||||
Jit->CMOVcc(32, reg, R(EDX), CC_Z);
|
||||
Jit->CMOVcc(32, reg, R(RSCRATCH2), CC_Z);
|
||||
Jit->XOR(32, R(reg), Imm8(31));
|
||||
RI.regs[reg] = I;
|
||||
regNormalRegClear(RI, I);
|
||||
|
@ -1267,6 +1267,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
if (!thisUsed)
|
||||
break;
|
||||
|
||||
// no register choice
|
||||
regSpill(RI, EAX);
|
||||
regSpill(RI, EDX);
|
||||
X64Reg reg = regBinReg(RI, I);
|
||||
|
@ -1421,35 +1422,35 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
X64Reg cr_val = regUReg(RI, I);
|
||||
Jit->MOV(64, R(cr_val), regLocForInst(RI, getOp1(I)));
|
||||
|
||||
Jit->XOR(32, R(EAX), R(EAX));
|
||||
Jit->XOR(32, R(RSCRATCH), R(RSCRATCH));
|
||||
|
||||
// SO: Bit 61 set.
|
||||
Jit->MOV(64, R(RDX), R(cr_val));
|
||||
Jit->SHR(64, R(RDX), Imm8(61));
|
||||
Jit->AND(32, R(EDX), Imm8(1));
|
||||
Jit->OR(32, R(EAX), R(EDX));
|
||||
Jit->MOV(64, R(RSCRATCH2), R(cr_val));
|
||||
Jit->SHR(64, R(RSCRATCH2), Imm8(61));
|
||||
Jit->AND(32, R(RSCRATCH2), Imm8(1));
|
||||
Jit->OR(32, R(RSCRATCH), R(RSCRATCH2));
|
||||
|
||||
// EQ: Bits 31-0 == 0.
|
||||
Jit->XOR(32, R(EDX), R(EDX));
|
||||
Jit->XOR(32, R(RSCRATCH2), R(RSCRATCH2));
|
||||
Jit->TEST(32, R(cr_val), R(cr_val));
|
||||
Jit->SETcc(CC_Z, R(EDX));
|
||||
Jit->SHL(32, R(EDX), Imm8(1));
|
||||
Jit->OR(32, R(EAX), R(EDX));
|
||||
Jit->SETcc(CC_Z, R(RSCRATCH2));
|
||||
Jit->SHL(32, R(RSCRATCH2), Imm8(1));
|
||||
Jit->OR(32, R(RSCRATCH), R(RSCRATCH2));
|
||||
|
||||
// GT: Value > 0.
|
||||
Jit->XOR(32, R(EDX), R(EDX));
|
||||
Jit->XOR(32, R(RSCRATCH2), R(RSCRATCH2));
|
||||
Jit->TEST(64, R(cr_val), R(cr_val));
|
||||
Jit->SETcc(CC_G, R(EDX));
|
||||
Jit->SHL(32, R(EDX), Imm8(2));
|
||||
Jit->OR(32, R(EAX), R(EDX));
|
||||
Jit->SETcc(CC_G, R(RSCRATCH2));
|
||||
Jit->SHL(32, R(RSCRATCH2), Imm8(2));
|
||||
Jit->OR(32, R(RSCRATCH), R(RSCRATCH2));
|
||||
|
||||
// LT: Bit 62 set.
|
||||
Jit->MOV(64, R(EDX), R(cr_val));
|
||||
Jit->SHR(64, R(EDX), Imm8(62 - 3));
|
||||
Jit->AND(32, R(EDX), Imm8(0x8));
|
||||
Jit->OR(32, R(EAX), R(EDX));
|
||||
Jit->MOV(64, R(RSCRATCH2), R(cr_val));
|
||||
Jit->SHR(64, R(RSCRATCH2), Imm8(62 - 3));
|
||||
Jit->AND(32, R(RSCRATCH2), Imm8(0x8));
|
||||
Jit->OR(32, R(RSCRATCH), R(RSCRATCH2));
|
||||
|
||||
Jit->MOV(32, R(cr_val), R(EAX));
|
||||
Jit->MOV(32, R(cr_val), R(RSCRATCH));
|
||||
RI.regs[cr_val] = I;
|
||||
regNormalRegClear(RI, I);
|
||||
break;
|
||||
|
@ -1462,34 +1463,34 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
X64Reg cr_val = regUReg(RI, I);
|
||||
Jit->MOV(64, R(cr_val), regLocForInst(RI, getOp1(I)));
|
||||
|
||||
Jit->MOV(64, R(RDX), Imm64(1ull << 32));
|
||||
Jit->MOV(64, R(RSCRATCH2), Imm64(1ull << 32));
|
||||
|
||||
// SO
|
||||
Jit->MOV(64, R(RAX), R(cr_val));
|
||||
Jit->SHL(64, R(RAX), Imm8(63));
|
||||
Jit->SHR(64, R(RAX), Imm8(63 - 61));
|
||||
Jit->OR(64, R(RDX), R(RAX));
|
||||
Jit->MOV(64, R(RSCRATCH), R(cr_val));
|
||||
Jit->SHL(64, R(RSCRATCH), Imm8(63));
|
||||
Jit->SHR(64, R(RSCRATCH), Imm8(63 - 61));
|
||||
Jit->OR(64, R(RSCRATCH2), R(RSCRATCH));
|
||||
|
||||
// EQ
|
||||
Jit->MOV(64, R(RAX), R(cr_val));
|
||||
Jit->NOT(64, R(RAX));
|
||||
Jit->AND(64, R(RAX), Imm8(CR_EQ));
|
||||
Jit->OR(64, R(RDX), R(RAX));
|
||||
Jit->MOV(64, R(RSCRATCH), R(cr_val));
|
||||
Jit->NOT(64, R(RSCRATCH));
|
||||
Jit->AND(64, R(RSCRATCH), Imm8(CR_EQ));
|
||||
Jit->OR(64, R(RSCRATCH2), R(RSCRATCH));
|
||||
|
||||
// GT
|
||||
Jit->MOV(64, R(RAX), R(cr_val));
|
||||
Jit->NOT(64, R(RAX));
|
||||
Jit->AND(64, R(RAX), Imm8(CR_GT));
|
||||
Jit->SHL(64, R(RAX), Imm8(63 - 2));
|
||||
Jit->OR(64, R(RDX), R(RAX));
|
||||
Jit->MOV(64, R(RSCRATCH), R(cr_val));
|
||||
Jit->NOT(64, R(RSCRATCH));
|
||||
Jit->AND(64, R(RSCRATCH), Imm8(CR_GT));
|
||||
Jit->SHL(64, R(RSCRATCH), Imm8(63 - 2));
|
||||
Jit->OR(64, R(RSCRATCH2), R(RSCRATCH));
|
||||
|
||||
// LT
|
||||
Jit->MOV(64, R(RAX), R(cr_val));
|
||||
Jit->AND(64, R(RAX), Imm8(CR_LT));
|
||||
Jit->SHL(64, R(RAX), Imm8(62 - 3));
|
||||
Jit->OR(64, R(RDX), R(RAX));
|
||||
Jit->MOV(64, R(RSCRATCH), R(cr_val));
|
||||
Jit->AND(64, R(RSCRATCH), Imm8(CR_LT));
|
||||
Jit->SHL(64, R(RSCRATCH), Imm8(62 - 3));
|
||||
Jit->OR(64, R(RSCRATCH2), R(RSCRATCH));
|
||||
|
||||
Jit->MOV(64, R(cr_val), R(RDX));
|
||||
Jit->MOV(64, R(cr_val), R(RSCRATCH2));
|
||||
|
||||
RI.regs[cr_val] = I;
|
||||
regNormalRegClear(RI, I);
|
||||
|
@ -1501,10 +1502,10 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
break;
|
||||
|
||||
X64Reg reg = regUReg(RI, I);
|
||||
Jit->MOV(64, R(RAX), Imm64(1ull << 61));
|
||||
Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RAX));
|
||||
Jit->SETcc(CC_NZ, R(AL));
|
||||
Jit->MOVZX(32, 8, reg, R(AL));
|
||||
Jit->MOV(64, R(RSCRATCH), Imm64(1ull << 61));
|
||||
Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RSCRATCH));
|
||||
Jit->SETcc(CC_NZ, R(RSCRATCH));
|
||||
Jit->MOVZX(32, 8, reg, R(RSCRATCH));
|
||||
RI.regs[reg] = I;
|
||||
regNormalRegClear(RI, I);
|
||||
break;
|
||||
|
@ -1516,8 +1517,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
|
||||
X64Reg reg = regUReg(RI, I);
|
||||
Jit->CMP(32, regLocForInst(RI, getOp1(I)), Imm32(0));
|
||||
Jit->SETcc(CC_Z, R(AL));
|
||||
Jit->MOVZX(32, 8, reg, R(AL));
|
||||
Jit->SETcc(CC_Z, R(RSCRATCH));
|
||||
Jit->MOVZX(32, 8, reg, R(RSCRATCH));
|
||||
RI.regs[reg] = I;
|
||||
regNormalRegClear(RI, I);
|
||||
break;
|
||||
|
@ -1529,8 +1530,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
|
||||
X64Reg reg = regUReg(RI, I);
|
||||
Jit->CMP(64, regLocForInst(RI, getOp1(I)), Imm8(0));
|
||||
Jit->SETcc(CC_G, R(AL));
|
||||
Jit->MOVZX(32, 8, reg, R(AL));
|
||||
Jit->SETcc(CC_G, R(RSCRATCH));
|
||||
Jit->MOVZX(32, 8, reg, R(RSCRATCH));
|
||||
RI.regs[reg] = I;
|
||||
regNormalRegClear(RI, I);
|
||||
break;
|
||||
|
@ -1541,10 +1542,10 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
break;
|
||||
|
||||
X64Reg reg = regUReg(RI, I);
|
||||
Jit->MOV(64, R(RAX), Imm64(1ull << 62));
|
||||
Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RAX));
|
||||
Jit->SETcc(CC_NZ, R(AL));
|
||||
Jit->MOVZX(32, 8, reg, R(AL));
|
||||
Jit->MOV(64, R(RSCRATCH), Imm64(1ull << 62));
|
||||
Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RSCRATCH));
|
||||
Jit->SETcc(CC_NZ, R(RSCRATCH));
|
||||
Jit->MOVZX(32, 8, reg, R(RSCRATCH));
|
||||
RI.regs[reg] = I;
|
||||
regNormalRegClear(RI, I);
|
||||
break;
|
||||
|
@ -1555,9 +1556,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
break;
|
||||
|
||||
X64Reg reg = fregFindFreeReg(RI);
|
||||
Jit->MOV(32, R(EDX), regLocForInst(RI, getOp1(I)));
|
||||
RI.Jit->SafeLoadToReg(EDX, R(EDX), 32, 0, regsInUse(RI), false, EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM);
|
||||
Jit->MOVD_xmm(reg, R(EDX));
|
||||
Jit->MOV(32, R(RSCRATCH2), regLocForInst(RI, getOp1(I)));
|
||||
RI.Jit->SafeLoadToReg(RSCRATCH2, R(RSCRATCH2), 32, 0, regsInUse(RI), false, EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM);
|
||||
Jit->MOVD_xmm(reg, R(RSCRATCH2));
|
||||
RI.fregs[reg] = I;
|
||||
regNormalRegClear(RI, I);
|
||||
break;
|
||||
|
@ -1569,9 +1570,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
|
||||
X64Reg reg = fregFindFreeReg(RI);
|
||||
const OpArg loc = regLocForInst(RI, getOp1(I));
|
||||
Jit->MOV(32, R(EDX), loc);
|
||||
RI.Jit->SafeLoadToReg(RDX, R(EDX), 64, 0, regsInUse(RI), false, EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM);
|
||||
Jit->MOVQ_xmm(reg, R(RDX));
|
||||
Jit->MOV(32, R(RSCRATCH2), loc);
|
||||
RI.Jit->SafeLoadToReg(RSCRATCH2, R(RSCRATCH2), 64, 0, regsInUse(RI), false, EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM);
|
||||
Jit->MOVQ_xmm(reg, R(RSCRATCH2));
|
||||
RI.fregs[reg] = I;
|
||||
regNormalRegClear(RI, I);
|
||||
break;
|
||||
|
@ -1581,8 +1582,6 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
if (!thisUsed)
|
||||
break;
|
||||
|
||||
regSpill(RI, EAX);
|
||||
regSpill(RI, EDX);
|
||||
X64Reg reg = fregFindFreeReg(RI);
|
||||
// The lower 3 bits is for GQR index. The next 1 bit is for inst.W
|
||||
unsigned int quantreg = (*I >> 16) & 0x7;
|
||||
|
@ -1591,12 +1590,12 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
// Hence, we need to mask out the unused bits. The layout of the GQR register is
|
||||
// UU[SCALE]UUUUU[TYPE] where SCALE is 6 bits and TYPE is 3 bits, so we have to AND with
|
||||
// 0b0011111100000111, or 0x3F07.
|
||||
Jit->MOV(32, R(EAX), Imm32(0x3F07));
|
||||
Jit->AND(32, R(EAX), M(((char *)&GQR(quantreg)) + 2));
|
||||
Jit->OR(32, R(EAX), Imm8(w << 3));
|
||||
Jit->MOV(32, R(RSCRATCH), Imm32(0x3F07));
|
||||
Jit->AND(32, R(RSCRATCH), M(((char *)&GQR(quantreg)) + 2));
|
||||
Jit->OR(32, R(RSCRATCH), Imm8(w << 3));
|
||||
|
||||
Jit->MOV(32, R(EDX), regLocForInst(RI, getOp1(I)));
|
||||
Jit->CALLptr(MScaled(EAX, SCALE_8, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedLoadQuantized)));
|
||||
Jit->MOV(32, R(RSCRATCH2), regLocForInst(RI, getOp1(I)));
|
||||
Jit->CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedLoadQuantized)));
|
||||
Jit->MOVAPD(reg, R(XMM0));
|
||||
RI.fregs[reg] = I;
|
||||
regNormalRegClear(RI, I);
|
||||
|
@ -1604,15 +1603,15 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
}
|
||||
case StoreSingle:
|
||||
{
|
||||
regSpill(RI, EAX);
|
||||
regSpill(RI, RSCRATCH);
|
||||
const OpArg loc1 = fregLocForInst(RI, getOp1(I));
|
||||
if (loc1.IsSimpleReg())
|
||||
Jit->MOVD_xmm(R(EAX), loc1.GetSimpleReg());
|
||||
Jit->MOVD_xmm(R(RSCRATCH), loc1.GetSimpleReg());
|
||||
else
|
||||
Jit->MOV(32, R(EAX), loc1);
|
||||
Jit->MOV(32, R(RSCRATCH), loc1);
|
||||
|
||||
Jit->MOV(32, R(EDX), regLocForInst(RI, getOp2(I)));
|
||||
RI.Jit->SafeWriteRegToReg(EAX, EDX, 32, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM);
|
||||
Jit->MOV(32, R(RSCRATCH2), regLocForInst(RI, getOp2(I)));
|
||||
RI.Jit->SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 32, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM);
|
||||
if (RI.IInfo[I - RI.FirstI] & 4)
|
||||
fregClearInst(RI, getOp1(I));
|
||||
if (RI.IInfo[I - RI.FirstI] & 8)
|
||||
|
@ -1621,14 +1620,14 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
}
|
||||
case StoreDouble:
|
||||
{
|
||||
regSpill(RI, EAX);
|
||||
regSpill(RI, RSCRATCH);
|
||||
|
||||
OpArg value = fregLocForInst(RI, getOp1(I));
|
||||
OpArg address = regLocForInst(RI, getOp2(I));
|
||||
Jit->MOVAPD(XMM0, value);
|
||||
Jit->MOVQ_xmm(R(RAX), XMM0);
|
||||
Jit->MOV(32, R(EDX), address);
|
||||
RI.Jit->SafeWriteRegToReg(RAX, EDX, 64, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM);
|
||||
Jit->MOVQ_xmm(R(RSCRATCH), XMM0);
|
||||
Jit->MOV(32, R(RSCRATCH2), address);
|
||||
RI.Jit->SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 64, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM);
|
||||
|
||||
if (RI.IInfo[I - RI.FirstI] & 4)
|
||||
fregClearInst(RI, getOp1(I));
|
||||
|
@ -1638,16 +1637,16 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
}
|
||||
case StorePaired:
|
||||
{
|
||||
regSpill(RI, EAX);
|
||||
regSpill(RI, EDX);
|
||||
regSpill(RI, RSCRATCH);
|
||||
regSpill(RI, RSCRATCH2);
|
||||
u32 quantreg = *I >> 24;
|
||||
Jit->MOV(32, R(EAX), Imm32(0x3F07));
|
||||
Jit->AND(32, R(EAX), PPCSTATE(spr[SPR_GQR0 + quantreg]));
|
||||
Jit->MOVZX(32, 8, EDX, R(AL));
|
||||
Jit->MOV(32, R(RSCRATCH), Imm32(0x3F07));
|
||||
Jit->AND(32, R(RSCRATCH), PPCSTATE(spr[SPR_GQR0 + quantreg]));
|
||||
Jit->MOVZX(32, 8, RSCRATCH2, R(RSCRATCH));
|
||||
|
||||
Jit->MOV(32, R(EDX), regLocForInst(RI, getOp2(I)));
|
||||
Jit->MOV(32, R(RSCRATCH2), regLocForInst(RI, getOp2(I)));
|
||||
Jit->MOVAPD(XMM0, fregLocForInst(RI, getOp1(I)));
|
||||
Jit->CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedStoreQuantized)));
|
||||
Jit->CALLptr(MScaled(RSCRATCH2, SCALE_8, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedStoreQuantized)));
|
||||
if (RI.IInfo[I - RI.FirstI] & 4)
|
||||
fregClearInst(RI, getOp1(I));
|
||||
if (RI.IInfo[I - RI.FirstI] & 8)
|
||||
|
@ -1791,9 +1790,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
X64Reg reg = fregFindFreeReg(RI);
|
||||
unsigned ppcreg = *I >> 8;
|
||||
char *p = (char*)&(PowerPC::ppcState.ps[ppcreg][0]);
|
||||
Jit->MOV(32, R(EDX), M(p+4));
|
||||
Jit->AND(32, R(EDX), Imm32(0x7ff00000));
|
||||
Jit->CMP(32, R(EDX), Imm32(0x38000000));
|
||||
Jit->MOV(32, R(RSCRATCH2), M(p+4));
|
||||
Jit->AND(32, R(RSCRATCH2), Imm32(0x7ff00000));
|
||||
Jit->CMP(32, R(RSCRATCH2), Imm32(0x38000000));
|
||||
FixupBranch ok = Jit->J_CC(CC_AE);
|
||||
Jit->AND(32, M(p+4), Imm32(0x80000000));
|
||||
Jit->MOV(32, M(p), Imm32(0));
|
||||
|
@ -1912,7 +1911,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
Jit->MOVSD(M(isSNANTemp[1]), XMM0);
|
||||
}
|
||||
Jit->ABI_CallFunction((void*)checkIsSNAN);
|
||||
Jit->TEST(8, R(EAX), R(EAX));
|
||||
Jit->TEST(8, R(ABI_RETURN), R(ABI_RETURN));
|
||||
FixupBranch ok = Jit->J_CC(CC_Z);
|
||||
Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX)); // FPSCR.FX = 1;
|
||||
Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_VXSNAN)); // FPSCR.Hex |= mask;
|
||||
|
@ -1941,7 +1940,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
Jit->MOVSD(M(isSNANTemp[1]), XMM0);
|
||||
}
|
||||
Jit->ABI_CallFunction((void*)checkIsSNAN);
|
||||
Jit->TEST(8, R(EAX), R(EAX));
|
||||
Jit->TEST(8, R(ABI_RETURN), R(ABI_RETURN));
|
||||
FixupBranch finish = Jit->J_CC(CC_Z);
|
||||
Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX)); // FPSCR.FX = 1;
|
||||
Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask;
|
||||
|
@ -2195,8 +2194,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
}
|
||||
case InterpreterBranch:
|
||||
{
|
||||
Jit->MOV(32, R(EAX), PPCSTATE(npc));
|
||||
Jit->WriteExitDestInOpArg(R(EAX));
|
||||
Jit->MOV(32, R(RSCRATCH), PPCSTATE(npc));
|
||||
Jit->WriteExitDestInOpArg(R(RSCRATCH));
|
||||
break;
|
||||
}
|
||||
case RFIExit:
|
||||
|
@ -2204,17 +2203,17 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
// See Interpreter rfi for details
|
||||
const u32 mask = 0x87C0FFFF;
|
||||
// MSR = (MSR & ~mask) | (SRR1 & mask);
|
||||
Jit->MOV(32, R(EAX), PPCSTATE(msr));
|
||||
Jit->MOV(32, R(EDX), PPCSTATE_SRR1);
|
||||
Jit->AND(32, R(EAX), Imm32(~mask));
|
||||
Jit->AND(32, R(EDX), Imm32(mask));
|
||||
Jit->OR(32, R(EAX), R(EDX));
|
||||
Jit->MOV(32, R(RSCRATCH), PPCSTATE(msr));
|
||||
Jit->MOV(32, R(RSCRATCH2), PPCSTATE_SRR1);
|
||||
Jit->AND(32, R(RSCRATCH), Imm32(~mask));
|
||||
Jit->AND(32, R(RSCRATCH2), Imm32(mask));
|
||||
Jit->OR(32, R(RSCRATCH), R(RSCRATCH2));
|
||||
// MSR &= 0xFFFBFFFF; // Mask used to clear the bit MSR[13]
|
||||
Jit->AND(32, R(EAX), Imm32(0xFFFBFFFF));
|
||||
Jit->MOV(32, PPCSTATE(msr), R(EAX));
|
||||
Jit->AND(32, R(RSCRATCH), Imm32(0xFFFBFFFF));
|
||||
Jit->MOV(32, PPCSTATE(msr), R(RSCRATCH));
|
||||
// NPC = SRR0;
|
||||
Jit->MOV(32, R(EAX), PPCSTATE_SRR0);
|
||||
Jit->WriteRfiExitDestInOpArg(R(EAX));
|
||||
Jit->MOV(32, R(RSCRATCH), PPCSTATE_SRR0);
|
||||
Jit->WriteRfiExitDestInOpArg(R(RSCRATCH));
|
||||
break;
|
||||
}
|
||||
case FPExceptionCheck:
|
||||
|
@ -2255,8 +2254,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
Jit->OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_ISI));
|
||||
|
||||
// Remove the invalid instruction from the icache, forcing a recompile
|
||||
Jit->MOV(64, R(RAX), ImmPtr(jit->GetBlockCache()->GetICachePtr(InstLoc)));
|
||||
Jit->MOV(32, MatR(RAX), Imm32(JIT_ICACHE_INVALID_WORD));
|
||||
Jit->MOV(64, R(RSCRATCH), ImmPtr(jit->GetBlockCache()->GetICachePtr(InstLoc)));
|
||||
Jit->MOV(32, MatR(RSCRATCH), Imm32(JIT_ICACHE_INVALID_WORD));
|
||||
Jit->WriteExceptionExit();
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -320,8 +320,8 @@ void JitIL::WriteCallInterpreter(UGeckoInstruction inst)
|
|||
ABI_CallFunctionC((void*)instr, inst.hex);
|
||||
if (js.isLastInstruction)
|
||||
{
|
||||
MOV(32, R(EAX), PPCSTATE(npc));
|
||||
WriteRfiExitDestInOpArg(R(EAX));
|
||||
MOV(32, R(RSCRATCH), PPCSTATE(npc));
|
||||
WriteRfiExitDestInOpArg(R(RSCRATCH));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -341,8 +341,8 @@ void JitIL::FallBackToInterpreter(UGeckoInstruction _inst)
|
|||
void JitIL::HLEFunction(UGeckoInstruction _inst)
|
||||
{
|
||||
ABI_CallFunctionCC((void*)&HLE::Execute, js.compilerPC, _inst.hex);
|
||||
MOV(32, R(EAX), PPCSTATE(npc));
|
||||
WriteExitDestInOpArg(R(EAX));
|
||||
MOV(32, R(RSCRATCH), PPCSTATE(npc));
|
||||
WriteExitDestInOpArg(R(RSCRATCH));
|
||||
}
|
||||
|
||||
void JitIL::DoNothing(UGeckoInstruction _inst)
|
||||
|
|
|
@ -9,8 +9,13 @@
|
|||
#include "Core/PowerPC/JitCommon/JitAsmCommon.h"
|
||||
#include "Core/PowerPC/JitCommon/JitBase.h"
|
||||
|
||||
#define QUANTIZED_REGS_TO_SAVE (ABI_ALL_CALLER_SAVED & ~((1 << RAX) | (1 << RCX) | \
|
||||
(1 << (XMM0+16)) | (1 << (XMM1+16))))
|
||||
#define QUANTIZED_REGS_TO_SAVE \
|
||||
(ABI_ALL_CALLER_SAVED & ~(\
|
||||
(1 << RSCRATCH) | \
|
||||
(1 << RSCRATCH2) | \
|
||||
(1 << RSCRATCH_EXTRA)| \
|
||||
(1 << (XMM0+16)) | \
|
||||
(1 << (XMM1+16))))
|
||||
|
||||
using namespace Gen;
|
||||
|
||||
|
@ -18,12 +23,12 @@ static int temp32;
|
|||
|
||||
void CommonAsmRoutines::GenFifoWrite(int size)
|
||||
{
|
||||
// Assume value in EDX
|
||||
// Assume value in RSCRATCH2
|
||||
PUSH(ESI);
|
||||
MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
|
||||
MOV(32, R(RSCRATCH), Imm32((u32)(u64)GPFifo::m_gatherPipe));
|
||||
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
|
||||
|
||||
SwapAndStore(size, MComplex(RAX, RSI, 1, 0), EDX);
|
||||
SwapAndStore(size, MComplex(RSCRATCH, ESI, 1, 0), RSCRATCH2);
|
||||
|
||||
ADD(32, R(ESI), Imm8(size >> 3));
|
||||
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
|
||||
|
@ -36,10 +41,10 @@ void CommonAsmRoutines::GenFifoFloatWrite()
|
|||
// Assume value in XMM0
|
||||
PUSH(ESI);
|
||||
MOVSS(M(&temp32), XMM0);
|
||||
MOV(32, R(EDX), M(&temp32));
|
||||
MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
|
||||
MOV(32, R(RSCRATCH2), M(&temp32));
|
||||
MOV(32, R(RSCRATCH), Imm32((u32)(u64)GPFifo::m_gatherPipe));
|
||||
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
|
||||
SwapAndStore(32, MComplex(RAX, RSI, 1, 0), EDX);
|
||||
SwapAndStore(32, MComplex(RSCRATCH, RSI, 1, 0), RSCRATCH2);
|
||||
ADD(32, R(ESI), Imm8(4));
|
||||
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
|
||||
POP(ESI);
|
||||
|
@ -49,40 +54,40 @@ void CommonAsmRoutines::GenFifoFloatWrite()
|
|||
void CommonAsmRoutines::GenFrsqrte()
|
||||
{
|
||||
// Assume input in XMM0.
|
||||
// This function clobbers EAX, ECX, and EDX.
|
||||
MOVQ_xmm(R(RAX), XMM0);
|
||||
// This function clobbers all three RSCRATCH.
|
||||
MOVQ_xmm(R(RSCRATCH), XMM0);
|
||||
|
||||
// Negative and zero inputs set an exception and take the complex path.
|
||||
TEST(64, R(RAX), R(RAX));
|
||||
TEST(64, R(RSCRATCH), R(RSCRATCH));
|
||||
FixupBranch zero = J_CC(CC_Z, true);
|
||||
FixupBranch negative = J_CC(CC_S, true);
|
||||
MOV(64, R(RCX), R(RAX));
|
||||
SHR(64, R(RCX), Imm8(52));
|
||||
MOV(64, R(RSCRATCH_EXTRA), R(RSCRATCH));
|
||||
SHR(64, R(RSCRATCH_EXTRA), Imm8(52));
|
||||
|
||||
// Zero and max exponents (non-normal floats) take the complex path.
|
||||
FixupBranch complex1 = J_CC(CC_Z, true);
|
||||
CMP(32, R(ECX), Imm32(0x7FF));
|
||||
CMP(32, R(RSCRATCH_EXTRA), Imm32(0x7FF));
|
||||
FixupBranch complex2 = J_CC(CC_E, true);
|
||||
|
||||
SUB(32, R(ECX), Imm32(0x3FD));
|
||||
SAR(32, R(ECX), Imm8(1));
|
||||
MOV(32, R(EDX), Imm32(0x3FF));
|
||||
SUB(32, R(EDX), R(ECX));
|
||||
SHL(64, R(RDX), Imm8(52)); // exponent = ((0x3FFLL << 52) - ((exponent - (0x3FELL << 52)) / 2)) & (0x7FFLL << 52);
|
||||
SUB(32, R(RSCRATCH_EXTRA), Imm32(0x3FD));
|
||||
SAR(32, R(RSCRATCH_EXTRA), Imm8(1));
|
||||
MOV(32, R(RSCRATCH2), Imm32(0x3FF));
|
||||
SUB(32, R(RSCRATCH2), R(RSCRATCH_EXTRA));
|
||||
SHL(64, R(RSCRATCH2), Imm8(52)); // exponent = ((0x3FFLL << 52) - ((exponent - (0x3FELL << 52)) / 2)) & (0x7FFLL << 52);
|
||||
|
||||
MOV(64, R(RCX), R(RAX));
|
||||
SHR(64, R(RCX), Imm8(48));
|
||||
AND(32, R(ECX), Imm8(0x1F));
|
||||
XOR(32, R(ECX), Imm8(0x10)); // int index = i / 2048 + (odd_exponent ? 16 : 0);
|
||||
MOV(64, R(RSCRATCH_EXTRA), R(RSCRATCH));
|
||||
SHR(64, R(RSCRATCH_EXTRA), Imm8(48));
|
||||
AND(32, R(RSCRATCH_EXTRA), Imm8(0x1F));
|
||||
XOR(32, R(RSCRATCH_EXTRA), Imm8(0x10)); // int index = i / 2048 + (odd_exponent ? 16 : 0);
|
||||
|
||||
SHR(64, R(RAX), Imm8(37));
|
||||
AND(32, R(EAX), Imm32(0x7FF));
|
||||
IMUL(32, EAX, MScaled(RCX, SCALE_4, (u32)(u64)MathUtil::frsqrte_expected_dec));
|
||||
MOV(32, R(ECX), MScaled(RCX, SCALE_4, (u32)(u64)MathUtil::frsqrte_expected_base));
|
||||
SUB(32, R(ECX), R(EAX));
|
||||
SHL(64, R(RCX), Imm8(26));
|
||||
OR(64, R(RDX), R(RCX)); // vali |= (s64)(frsqrte_expected_base[index] - frsqrte_expected_dec[index] * (i % 2048)) << 26;
|
||||
MOVQ_xmm(XMM0, R(RDX));
|
||||
SHR(64, R(RSCRATCH), Imm8(37));
|
||||
AND(32, R(RSCRATCH), Imm32(0x7FF));
|
||||
IMUL(32, RSCRATCH, MScaled(RSCRATCH_EXTRA, SCALE_4, (u32)(u64)MathUtil::frsqrte_expected_dec));
|
||||
MOV(32, R(RSCRATCH_EXTRA), MScaled(RSCRATCH_EXTRA, SCALE_4, (u32)(u64)MathUtil::frsqrte_expected_base));
|
||||
SUB(32, R(RSCRATCH_EXTRA), R(RSCRATCH));
|
||||
SHL(64, R(RSCRATCH_EXTRA), Imm8(26));
|
||||
OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(frsqrte_expected_base[index] - frsqrte_expected_dec[index] * (i % 2048)) << 26;
|
||||
MOVQ_xmm(XMM0, R(RSCRATCH2));
|
||||
RET();
|
||||
|
||||
// Exception flags for zero input.
|
||||
|
@ -114,44 +119,44 @@ void CommonAsmRoutines::GenFrsqrte()
|
|||
void CommonAsmRoutines::GenFres()
|
||||
{
|
||||
// Assume input in XMM0.
|
||||
// This function clobbers EAX, ECX, and EDX.
|
||||
MOVQ_xmm(R(RAX), XMM0);
|
||||
// This function clobbers all three RSCRATCH.
|
||||
MOVQ_xmm(R(RSCRATCH), XMM0);
|
||||
|
||||
// Zero inputs set an exception and take the complex path.
|
||||
TEST(64, R(RAX), R(RAX));
|
||||
TEST(64, R(RSCRATCH), R(RSCRATCH));
|
||||
FixupBranch zero = J_CC(CC_Z);
|
||||
|
||||
MOV(64, R(RCX), R(RAX));
|
||||
SHR(64, R(RCX), Imm8(52));
|
||||
MOV(32, R(EDX), R(ECX));
|
||||
AND(32, R(ECX), Imm32(0x7FF)); // exp
|
||||
AND(32, R(EDX), Imm32(0x800)); // sign
|
||||
CMP(32, R(ECX), Imm32(895));
|
||||
MOV(64, R(RSCRATCH_EXTRA), R(RSCRATCH));
|
||||
SHR(64, R(RSCRATCH_EXTRA), Imm8(52));
|
||||
MOV(32, R(RSCRATCH2), R(RSCRATCH_EXTRA));
|
||||
AND(32, R(RSCRATCH_EXTRA), Imm32(0x7FF)); // exp
|
||||
AND(32, R(RSCRATCH2), Imm32(0x800)); // sign
|
||||
CMP(32, R(RSCRATCH_EXTRA), Imm32(895));
|
||||
// Take the complex path for very large/small exponents.
|
||||
FixupBranch complex1 = J_CC(CC_L);
|
||||
CMP(32, R(ECX), Imm32(1149));
|
||||
CMP(32, R(RSCRATCH_EXTRA), Imm32(1149));
|
||||
FixupBranch complex2 = J_CC(CC_GE);
|
||||
|
||||
SUB(32, R(ECX), Imm32(0x7FD));
|
||||
NEG(32, R(ECX));
|
||||
OR(32, R(ECX), R(EDX));
|
||||
SHL(64, R(RCX), Imm8(52)); // vali = sign | exponent
|
||||
SUB(32, R(RSCRATCH_EXTRA), Imm32(0x7FD));
|
||||
NEG(32, R(RSCRATCH_EXTRA));
|
||||
OR(32, R(RSCRATCH_EXTRA), R(RSCRATCH2));
|
||||
SHL(64, R(RSCRATCH_EXTRA), Imm8(52)); // vali = sign | exponent
|
||||
|
||||
MOV(64, R(RDX), R(RAX));
|
||||
SHR(64, R(RAX), Imm8(37));
|
||||
SHR(64, R(RDX), Imm8(47));
|
||||
AND(32, R(EAX), Imm32(0x3FF)); // i % 1024
|
||||
AND(32, R(RDX), Imm8(0x1F)); // i / 1024
|
||||
MOV(64, R(RSCRATCH2), R(RSCRATCH));
|
||||
SHR(64, R(RSCRATCH), Imm8(37));
|
||||
SHR(64, R(RSCRATCH2), Imm8(47));
|
||||
AND(32, R(RSCRATCH), Imm32(0x3FF)); // i % 1024
|
||||
AND(32, R(RSCRATCH2), Imm8(0x1F)); // i / 1024
|
||||
|
||||
IMUL(32, EAX, MScaled(RDX, SCALE_4, (u32)(u64)MathUtil::fres_expected_dec));
|
||||
ADD(32, R(EAX), Imm8(1));
|
||||
SHR(32, R(EAX), Imm8(1));
|
||||
IMUL(32, RSCRATCH, MScaled(RSCRATCH2, SCALE_4, (u32)(u64)MathUtil::fres_expected_dec));
|
||||
ADD(32, R(RSCRATCH), Imm8(1));
|
||||
SHR(32, R(RSCRATCH), Imm8(1));
|
||||
|
||||
MOV(32, R(EDX), MScaled(RDX, SCALE_4, (u32)(u64)MathUtil::fres_expected_base));
|
||||
SUB(32, R(EDX), R(EAX));
|
||||
SHL(64, R(RDX), Imm8(29));
|
||||
OR(64, R(RDX), R(RCX)); // vali |= (s64)(fres_expected_base[i / 1024] - (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2) << 29
|
||||
MOVQ_xmm(XMM0, R(RDX));
|
||||
MOV(32, R(RSCRATCH2), MScaled(RSCRATCH2, SCALE_4, (u32)(u64)MathUtil::fres_expected_base));
|
||||
SUB(32, R(RSCRATCH2), R(RSCRATCH));
|
||||
SHL(64, R(RSCRATCH2), Imm8(29));
|
||||
OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(fres_expected_base[i / 1024] - (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2) << 29
|
||||
MOVQ_xmm(XMM0, R(RSCRATCH2));
|
||||
RET();
|
||||
|
||||
// Exception flags for zero input.
|
||||
|
@ -247,21 +252,21 @@ void CommonAsmRoutines::GenQuantizedStores()
|
|||
|
||||
SHUFPS(XMM0, R(XMM0), 1);
|
||||
MOVQ_xmm(M(&psTemp[0]), XMM0);
|
||||
TEST(32, R(ECX), Imm32(0x0C000000));
|
||||
TEST(32, R(RSCRATCH_EXTRA), Imm32(0x0C000000));
|
||||
FixupBranch too_complex = J_CC(CC_NZ, true);
|
||||
MOV(64, R(RAX), M(&psTemp[0]));
|
||||
SwapAndStore(64, MComplex(RBX, RCX, SCALE_1, 0), RAX);
|
||||
MOV(64, R(RSCRATCH), M(&psTemp[0]));
|
||||
SwapAndStore(64, MComplex(RMEM, RSCRATCH_EXTRA, SCALE_1, 0), RSCRATCH);
|
||||
FixupBranch skip_complex = J(true);
|
||||
SetJumpTarget(too_complex);
|
||||
ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true);
|
||||
ABI_CallFunctionR((void *)&WriteDual32, RCX);
|
||||
ABI_CallFunctionR((void *)&WriteDual32, RSCRATCH_EXTRA);
|
||||
ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true);
|
||||
SetJumpTarget(skip_complex);
|
||||
RET();
|
||||
|
||||
const u8* storePairedU8 = AlignCode4();
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
|
||||
SHR(32, R(RSCRATCH), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
#ifdef QUANTIZE_OVERFLOW_SAFE
|
||||
|
@ -272,14 +277,14 @@ void CommonAsmRoutines::GenQuantizedStores()
|
|||
CVTTPS2DQ(XMM0, R(XMM0));
|
||||
PACKSSDW(XMM0, R(XMM0));
|
||||
PACKUSWB(XMM0, R(XMM0));
|
||||
MOVD_xmm(R(EAX), XMM0);
|
||||
SafeWriteRegToReg(AX, ECX, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
MOVD_xmm(R(RSCRATCH), XMM0);
|
||||
SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
|
||||
RET();
|
||||
|
||||
const u8* storePairedS8 = AlignCode4();
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
|
||||
SHR(32, R(RSCRATCH), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
#ifdef QUANTIZE_OVERFLOW_SAFE
|
||||
|
@ -290,15 +295,15 @@ void CommonAsmRoutines::GenQuantizedStores()
|
|||
CVTTPS2DQ(XMM0, R(XMM0));
|
||||
PACKSSDW(XMM0, R(XMM0));
|
||||
PACKSSWB(XMM0, R(XMM0));
|
||||
MOVD_xmm(R(EAX), XMM0);
|
||||
MOVD_xmm(R(RSCRATCH), XMM0);
|
||||
|
||||
SafeWriteRegToReg(AX, ECX, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
|
||||
RET();
|
||||
|
||||
const u8* storePairedU16 = AlignCode4();
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
|
||||
SHR(32, R(RSCRATCH), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
|
||||
|
@ -313,18 +318,18 @@ void CommonAsmRoutines::GenQuantizedStores()
|
|||
MOVQ_xmm(M(psTemp), XMM0);
|
||||
// place ps[0] into the higher word, ps[1] into the lower
|
||||
// so no need in ROL after BSWAP
|
||||
MOVZX(32, 16, EAX, M((char*)psTemp + 0));
|
||||
SHL(32, R(EAX), Imm8(16));
|
||||
MOV(16, R(AX), M((char*)psTemp + 4));
|
||||
MOVZX(32, 16, RSCRATCH, M((char*)psTemp + 0));
|
||||
SHL(32, R(RSCRATCH), Imm8(16));
|
||||
MOV(16, R(RSCRATCH), M((char*)psTemp + 4));
|
||||
|
||||
BSWAP(32, EAX);
|
||||
SafeWriteRegToReg(EAX, ECX, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
BSWAP(32, RSCRATCH);
|
||||
SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
|
||||
RET();
|
||||
|
||||
const u8* storePairedS16 = AlignCode4();
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
|
||||
SHR(32, R(RSCRATCH), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS));
|
||||
// SHUFPS or UNPCKLPS might be a better choice here. The last one might just be an alias though.
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
|
@ -335,10 +340,10 @@ void CommonAsmRoutines::GenQuantizedStores()
|
|||
#endif
|
||||
CVTTPS2DQ(XMM0, R(XMM0));
|
||||
PACKSSDW(XMM0, R(XMM0));
|
||||
MOVD_xmm(R(EAX), XMM0);
|
||||
BSWAP(32, EAX);
|
||||
ROL(32, R(EAX), Imm8(16));
|
||||
SafeWriteRegToReg(EAX, ECX, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
MOVD_xmm(R(RSCRATCH), XMM0);
|
||||
BSWAP(32, RSCRATCH);
|
||||
ROL(32, R(RSCRATCH), Imm8(16));
|
||||
SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
|
||||
RET();
|
||||
|
||||
|
@ -363,7 +368,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores()
|
|||
|
||||
// Easy!
|
||||
const u8* storeSingleFloat = AlignCode4();
|
||||
SafeWriteF32ToReg(XMM0, ECX, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
SafeWriteF32ToReg(XMM0, RSCRATCH_EXTRA, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
RET();
|
||||
/*
|
||||
if (cpu_info.bSSSE3)
|
||||
|
@ -371,56 +376,56 @@ void CommonAsmRoutines::GenQuantizedSingleStores()
|
|||
PSHUFB(XMM0, M((void *)pbswapShuffle2x4));
|
||||
// TODO: SafeWriteFloat
|
||||
MOVSS(M(&psTemp[0]), XMM0);
|
||||
MOV(32, R(EAX), M(&psTemp[0]));
|
||||
SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
MOV(32, R(RSCRATCH), M(&psTemp[0]));
|
||||
SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 32, 0, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
}
|
||||
else
|
||||
{
|
||||
MOVSS(M(&psTemp[0]), XMM0);
|
||||
MOV(32, R(EAX), M(&psTemp[0]));
|
||||
SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
MOV(32, R(RSCRATCH), M(&psTemp[0]));
|
||||
SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 32, 0, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
}*/
|
||||
|
||||
const u8* storeSingleU8 = AlignCode4(); // Used by MKWii
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
|
||||
SHR(32, R(RSCRATCH), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS));
|
||||
MULSS(XMM0, R(XMM1));
|
||||
PXOR(XMM1, R(XMM1));
|
||||
MAXSS(XMM0, R(XMM1));
|
||||
MINSS(XMM0, M((void *)&m_255));
|
||||
CVTTSS2SI(EAX, R(XMM0));
|
||||
SafeWriteRegToReg(AL, ECX, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
CVTTSS2SI(RSCRATCH, R(XMM0));
|
||||
SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
RET();
|
||||
|
||||
const u8* storeSingleS8 = AlignCode4();
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
|
||||
SHR(32, R(RSCRATCH), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS));
|
||||
MULSS(XMM0, R(XMM1));
|
||||
MAXSS(XMM0, M((void *)&m_m128));
|
||||
MINSS(XMM0, M((void *)&m_127));
|
||||
CVTTSS2SI(EAX, R(XMM0));
|
||||
SafeWriteRegToReg(AL, ECX, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
CVTTSS2SI(RSCRATCH, R(XMM0));
|
||||
SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
RET();
|
||||
|
||||
const u8* storeSingleU16 = AlignCode4(); // Used by MKWii
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
|
||||
SHR(32, R(RSCRATCH), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS));
|
||||
MULSS(XMM0, R(XMM1));
|
||||
PXOR(XMM1, R(XMM1));
|
||||
MAXSS(XMM0, R(XMM1));
|
||||
MINSS(XMM0, M((void *)&m_65535));
|
||||
CVTTSS2SI(EAX, R(XMM0));
|
||||
SafeWriteRegToReg(EAX, ECX, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
CVTTSS2SI(RSCRATCH, R(XMM0));
|
||||
SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
RET();
|
||||
|
||||
const u8* storeSingleS16 = AlignCode4();
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
|
||||
SHR(32, R(RSCRATCH), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS));
|
||||
MULSS(XMM0, R(XMM1));
|
||||
MAXSS(XMM0, M((void *)&m_m32768));
|
||||
MINSS(XMM0, M((void *)&m_32767));
|
||||
CVTTSS2SI(EAX, R(XMM0));
|
||||
SafeWriteRegToReg(EAX, ECX, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
CVTTSS2SI(RSCRATCH, R(XMM0));
|
||||
SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
RET();
|
||||
|
||||
singleStoreQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCode16()));
|
||||
|
@ -444,126 +449,126 @@ void CommonAsmRoutines::GenQuantizedLoads()
|
|||
const u8* loadPairedFloatTwo = AlignCode4();
|
||||
if (cpu_info.bSSSE3)
|
||||
{
|
||||
MOVQ_xmm(XMM0, MComplex(RBX, RCX, 1, 0));
|
||||
MOVQ_xmm(XMM0, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0));
|
||||
PSHUFB(XMM0, M((void *)pbswapShuffle2x4));
|
||||
}
|
||||
else
|
||||
{
|
||||
LoadAndSwap(64, RCX, MComplex(RBX, RCX, 1, 0));
|
||||
ROL(64, R(RCX), Imm8(32));
|
||||
MOVQ_xmm(XMM0, R(RCX));
|
||||
LoadAndSwap(64, RSCRATCH_EXTRA, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0));
|
||||
ROL(64, R(RSCRATCH_EXTRA), Imm8(32));
|
||||
MOVQ_xmm(XMM0, R(RSCRATCH_EXTRA));
|
||||
}
|
||||
RET();
|
||||
|
||||
const u8* loadPairedFloatOne = AlignCode4();
|
||||
if (cpu_info.bSSSE3)
|
||||
{
|
||||
MOVD_xmm(XMM0, MComplex(RBX, RCX, 1, 0));
|
||||
MOVD_xmm(XMM0, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0));
|
||||
PSHUFB(XMM0, M((void *)pbswapShuffle1x4));
|
||||
UNPCKLPS(XMM0, M((void*)m_one));
|
||||
}
|
||||
else
|
||||
{
|
||||
LoadAndSwap(32, RCX, MComplex(RBX, RCX, 1, 0));
|
||||
MOVD_xmm(XMM0, R(RCX));
|
||||
LoadAndSwap(32, RSCRATCH_EXTRA, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0));
|
||||
MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
|
||||
UNPCKLPS(XMM0, M((void*)m_one));
|
||||
}
|
||||
RET();
|
||||
|
||||
const u8* loadPairedU8Two = AlignCode4();
|
||||
UnsafeLoadRegToRegNoSwap(ECX, ECX, 16, 0);
|
||||
MOVD_xmm(XMM0, R(ECX));
|
||||
UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0);
|
||||
MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
|
||||
PXOR(XMM1, R(XMM1));
|
||||
PUNPCKLBW(XMM0, R(XMM1));
|
||||
PUNPCKLWD(XMM0, R(XMM1));
|
||||
CVTDQ2PS(XMM0, R(XMM0));
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
|
||||
SHR(32, R(RSCRATCH), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
RET();
|
||||
|
||||
const u8* loadPairedU8One = AlignCode4();
|
||||
UnsafeLoadRegToRegNoSwap(ECX, ECX, 8, 0); // ECX = 0x000000xx
|
||||
MOVD_xmm(XMM0, R(ECX));
|
||||
UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 8, 0); // RSCRATCH_EXTRA = 0x000000xx
|
||||
MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
|
||||
CVTDQ2PS(XMM0, R(XMM0)); // Is CVTSI2SS better?
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
|
||||
SHR(32, R(RSCRATCH), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS));
|
||||
MULSS(XMM0, R(XMM1));
|
||||
UNPCKLPS(XMM0, M((void*)m_one));
|
||||
RET();
|
||||
|
||||
const u8* loadPairedS8Two = AlignCode4();
|
||||
UnsafeLoadRegToRegNoSwap(ECX, ECX, 16, 0);
|
||||
MOVD_xmm(XMM0, R(ECX));
|
||||
UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0);
|
||||
MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
|
||||
PUNPCKLBW(XMM0, R(XMM0));
|
||||
PUNPCKLWD(XMM0, R(XMM0));
|
||||
PSRAD(XMM0, 24);
|
||||
CVTDQ2PS(XMM0, R(XMM0));
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
|
||||
SHR(32, R(RSCRATCH), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
RET();
|
||||
|
||||
const u8* loadPairedS8One = AlignCode4();
|
||||
UnsafeLoadRegToRegNoSwap(ECX, ECX, 8, 0);
|
||||
SHL(32, R(ECX), Imm8(24));
|
||||
SAR(32, R(ECX), Imm8(24));
|
||||
MOVD_xmm(XMM0, R(ECX));
|
||||
UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 8, 0);
|
||||
SHL(32, R(RSCRATCH_EXTRA), Imm8(24));
|
||||
SAR(32, R(RSCRATCH_EXTRA), Imm8(24));
|
||||
MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
|
||||
CVTDQ2PS(XMM0, R(XMM0));
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
|
||||
SHR(32, R(RSCRATCH), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS));
|
||||
MULSS(XMM0, R(XMM1));
|
||||
UNPCKLPS(XMM0, M((void*)m_one));
|
||||
RET();
|
||||
|
||||
const u8* loadPairedU16Two = AlignCode4();
|
||||
UnsafeLoadRegToReg(ECX, ECX, 32, 0, false);
|
||||
ROL(32, R(ECX), Imm8(16));
|
||||
MOVD_xmm(XMM0, R(ECX));
|
||||
UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 32, 0, false);
|
||||
ROL(32, R(RSCRATCH_EXTRA), Imm8(16));
|
||||
MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
|
||||
PXOR(XMM1, R(XMM1));
|
||||
PUNPCKLWD(XMM0, R(XMM1));
|
||||
CVTDQ2PS(XMM0, R(XMM0));
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
|
||||
SHR(32, R(RSCRATCH), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
RET();
|
||||
|
||||
const u8* loadPairedU16One = AlignCode4();
|
||||
UnsafeLoadRegToReg(ECX, ECX, 32, 0, false);
|
||||
SHR(32, R(ECX), Imm8(16));
|
||||
MOVD_xmm(XMM0, R(ECX));
|
||||
UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 32, 0, false);
|
||||
SHR(32, R(RSCRATCH_EXTRA), Imm8(16));
|
||||
MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
|
||||
CVTDQ2PS(XMM0, R(XMM0));
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
|
||||
SHR(32, R(RSCRATCH), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS));
|
||||
MULSS(XMM0, R(XMM1));
|
||||
UNPCKLPS(XMM0, M((void*)m_one));
|
||||
RET();
|
||||
|
||||
const u8* loadPairedS16Two = AlignCode4();
|
||||
UnsafeLoadRegToReg(ECX, ECX, 32, 0, false);
|
||||
ROL(32, R(ECX), Imm8(16));
|
||||
MOVD_xmm(XMM0, R(ECX));
|
||||
UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 32, 0, false);
|
||||
ROL(32, R(RSCRATCH_EXTRA), Imm8(16));
|
||||
MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
|
||||
PUNPCKLWD(XMM0, R(XMM0));
|
||||
PSRAD(XMM0, 16);
|
||||
CVTDQ2PS(XMM0, R(XMM0));
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
AND(32, R(EAX), Imm32(0xFC));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
|
||||
SHR(32, R(RSCRATCH), Imm8(6));
|
||||
AND(32, R(RSCRATCH), Imm32(0xFC));
|
||||
MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
RET();
|
||||
|
||||
const u8* loadPairedS16One = AlignCode4();
|
||||
UnsafeLoadRegToReg(ECX, ECX, 32, 0, false);
|
||||
SAR(32, R(ECX), Imm8(16));
|
||||
MOVD_xmm(XMM0, R(ECX));
|
||||
UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 32, 0, false);
|
||||
SAR(32, R(RSCRATCH_EXTRA), Imm8(16));
|
||||
MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
|
||||
CVTDQ2PS(XMM0, R(XMM0));
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
AND(32, R(EAX), Imm32(0xFC));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
|
||||
SHR(32, R(RSCRATCH), Imm8(6));
|
||||
AND(32, R(RSCRATCH), Imm32(0xFC));
|
||||
MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS));
|
||||
MULSS(XMM0, R(XMM1));
|
||||
UNPCKLPS(XMM0, M((void*)m_one));
|
||||
RET();
|
||||
|
|
|
@ -19,9 +19,9 @@ public:
|
|||
|
||||
const u8 *dispatcher;
|
||||
const u8 *dispatcherNoCheck;
|
||||
const u8 *dispatcherPcInEAX;
|
||||
const u8 *dispatcherPcInRSCRATCH;
|
||||
|
||||
const u8 *dispatchPcInEAX;
|
||||
const u8 *dispatchPcInRSCRATCH;
|
||||
const u8 *doTiming;
|
||||
|
||||
const u8 *frsqrte;
|
||||
|
@ -31,14 +31,14 @@ public:
|
|||
// In: ECX: Address to read from.
|
||||
// Out: XMM0: Bottom two 32-bit slots hold the read value,
|
||||
// converted to a pair of floats.
|
||||
// Trashes: EAX ECX EDX
|
||||
// Trashes: all three RSCRATCH
|
||||
const u8 **pairedLoadQuantized;
|
||||
|
||||
// In: array index: GQR to use.
|
||||
// In: ECX: Address to write to.
|
||||
// In: XMM0: Bottom two 32-bit slots hold the pair of floats to be written.
|
||||
// Out: Nothing.
|
||||
// Trashes: EAX ECX EDX
|
||||
// Trashes: all three RSCRATCH
|
||||
const u8 **pairedStoreQuantized;
|
||||
|
||||
// In: array index: GQR to use.
|
||||
|
|
|
@ -74,7 +74,7 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re
|
|||
break;
|
||||
case 2:
|
||||
CALL((void *)&Memory::Read_U16);
|
||||
SHL(32, R(EAX), Imm8(16));
|
||||
SHL(32, R(ABI_RETURN), Imm8(16));
|
||||
break;
|
||||
case 1:
|
||||
CALL((void *)&Memory::Read_U8);
|
||||
|
@ -84,11 +84,11 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re
|
|||
if (info.signExtend && info.operandSize == 1)
|
||||
{
|
||||
// Need to sign extend value from Read_U8.
|
||||
MOVSX(32, 8, dataReg, R(EAX));
|
||||
MOVSX(32, 8, dataReg, R(ABI_RETURN));
|
||||
}
|
||||
else if (dataReg != EAX)
|
||||
{
|
||||
MOV(32, R(dataReg), R(EAX));
|
||||
MOV(32, R(dataReg), R(ABI_RETURN));
|
||||
}
|
||||
|
||||
ABI_PopRegistersAndAdjustStack(registersInUse, true);
|
||||
|
@ -166,9 +166,9 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
if (info.otherReg != RBX)
|
||||
if (info.otherReg != RMEM)
|
||||
{
|
||||
PanicAlert("BackPatch : Base reg not RBX."
|
||||
PanicAlert("BackPatch : Base reg not RMEM."
|
||||
"\n\nAttempted to access %08x.", emAddress);
|
||||
return nullptr;
|
||||
}
|
||||
|
|
|
@ -27,6 +27,23 @@
|
|||
#include "Core/PowerPC/JitCommon/JitBackpatch.h"
|
||||
#include "Core/PowerPC/JitCommon/JitCache.h"
|
||||
|
||||
// TODO: find a better place for x86-specific stuff
|
||||
// The following register assignments are common to Jit64 and Jit64IL:
|
||||
// RSCRATCH and RSCRATCH2 are always scratch registers and can be used without
|
||||
// limitation.
|
||||
#define RSCRATCH RAX
|
||||
#define RSCRATCH2 RDX
|
||||
// RSCRATCH_EXTRA may be in the allocation order, so it has to be flushed
|
||||
// before use.
|
||||
#define RSCRATCH_EXTRA RCX
|
||||
// RMEM points to the start of emulated memory.
|
||||
#define RMEM RBX
|
||||
// RCODE_POINTERS does what it says.
|
||||
#define RCODE_POINTERS R15
|
||||
// RPPCSTATE points to ppcState + 0x80. It's offset because we want to be able
|
||||
// to address as much as possible in a one-byte offset form.
|
||||
#define RPPCSTATE RBP
|
||||
|
||||
// Use these to control the instruction selection
|
||||
// #define INSTRUCTION_START FallBackToInterpreter(inst); return;
|
||||
// #define INSTRUCTION_START PPCTables::CountInstruction(inst);
|
||||
|
|
|
@ -41,7 +41,7 @@ void EmuCodeBlock::SwapAndStore(int size, const Gen::OpArg& dst, Gen::X64Reg src
|
|||
|
||||
void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend)
|
||||
{
|
||||
MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset));
|
||||
MOVZX(32, accessSize, reg_value, MComplex(RMEM, reg_addr, SCALE_1, offset));
|
||||
if (accessSize == 32)
|
||||
{
|
||||
BSWAP(32, reg_value);
|
||||
|
@ -63,7 +63,7 @@ void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int acc
|
|||
|
||||
void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset)
|
||||
{
|
||||
MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset));
|
||||
MOVZX(32, accessSize, reg_value, MComplex(RMEM, reg_addr, SCALE_1, offset));
|
||||
}
|
||||
|
||||
u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessSize, s32 offset, bool signExtend)
|
||||
|
@ -85,16 +85,16 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessS
|
|||
offset = 0;
|
||||
}
|
||||
|
||||
memOperand = MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset);
|
||||
memOperand = MComplex(RMEM, opAddress.GetSimpleReg(), SCALE_1, offset);
|
||||
}
|
||||
else if (opAddress.IsImm())
|
||||
{
|
||||
memOperand = MDisp(RBX, (opAddress.offset + offset) & 0x3FFFFFFF);
|
||||
memOperand = MDisp(RMEM, (opAddress.offset + offset) & 0x3FFFFFFF);
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV(32, R(reg_value), opAddress);
|
||||
memOperand = MComplex(RBX, reg_value, SCALE_1, offset);
|
||||
memOperand = MComplex(RMEM, reg_value, SCALE_1, offset);
|
||||
}
|
||||
|
||||
result = GetWritableCodePtr();
|
||||
|
@ -129,7 +129,7 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessS
|
|||
return result;
|
||||
}
|
||||
|
||||
// Visitor that generates code to read a MMIO value to EAX.
|
||||
// Visitor that generates code to read a MMIO value.
|
||||
template <typename T>
|
||||
class MMIOReadCodeGenerator : public MMIO::ReadHandlingMethodVisitor<T>
|
||||
{
|
||||
|
@ -181,9 +181,9 @@ private:
|
|||
void LoadAddrMaskToReg(int sbits, const void* ptr, u32 mask)
|
||||
{
|
||||
#ifdef _ARCH_64
|
||||
m_code->MOV(64, R(EAX), ImmPtr(ptr));
|
||||
m_code->MOV(64, R(RSCRATCH), ImmPtr(ptr));
|
||||
#else
|
||||
m_code->MOV(32, R(EAX), ImmPtr(ptr));
|
||||
m_code->MOV(32, R(RSCRATCH), ImmPtr(ptr));
|
||||
#endif
|
||||
// If we do not need to mask, we can do the sign extend while loading
|
||||
// from memory. If masking is required, we have to first zero extend,
|
||||
|
@ -191,11 +191,11 @@ private:
|
|||
u32 all_ones = (1ULL << sbits) - 1;
|
||||
if ((all_ones & mask) == all_ones)
|
||||
{
|
||||
MoveOpArgToReg(sbits, MDisp(EAX, 0));
|
||||
MoveOpArgToReg(sbits, MDisp(RSCRATCH, 0));
|
||||
}
|
||||
else
|
||||
{
|
||||
m_code->MOVZX(32, sbits, m_dst_reg, MDisp(EAX, 0));
|
||||
m_code->MOVZX(32, sbits, m_dst_reg, MDisp(RSCRATCH, 0));
|
||||
m_code->AND(32, R(m_dst_reg), Imm32(mask));
|
||||
if (m_sign_extend)
|
||||
m_code->MOVSX(32, sbits, m_dst_reg, R(m_dst_reg));
|
||||
|
@ -207,7 +207,7 @@ private:
|
|||
m_code->ABI_PushRegistersAndAdjustStack(m_registers_in_use, false);
|
||||
m_code->ABI_CallLambdaC(lambda, m_address);
|
||||
m_code->ABI_PopRegistersAndAdjustStack(m_registers_in_use, false);
|
||||
MoveOpArgToReg(sbits, R(EAX));
|
||||
MoveOpArgToReg(sbits, R(ABI_RETURN));
|
||||
}
|
||||
|
||||
Gen::X64CodeBlock* m_code;
|
||||
|
@ -320,11 +320,11 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
|
|||
if (signExtend && accessSize < 32)
|
||||
{
|
||||
// Need to sign extend values coming from the Read_U* functions.
|
||||
MOVSX(32, accessSize, reg_value, R(EAX));
|
||||
MOVSX(32, accessSize, reg_value, R(ABI_RETURN));
|
||||
}
|
||||
else if (reg_value != EAX)
|
||||
else if (reg_value != ABI_RETURN)
|
||||
{
|
||||
MOVZX(64, accessSize, reg_value, R(EAX));
|
||||
MOVZX(64, accessSize, reg_value, R(ABI_RETURN));
|
||||
}
|
||||
|
||||
MEMCHECK_END
|
||||
|
@ -335,15 +335,15 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
|
|||
OpArg addr_loc = opAddress;
|
||||
if (offset)
|
||||
{
|
||||
addr_loc = R(EAX);
|
||||
addr_loc = R(RSCRATCH);
|
||||
if (opAddress.IsSimpleReg())
|
||||
{
|
||||
LEA(32, EAX, MDisp(opAddress.GetSimpleReg(), offset));
|
||||
LEA(32, RSCRATCH, MDisp(opAddress.GetSimpleReg(), offset));
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV(32, R(EAX), opAddress);
|
||||
ADD(32, R(EAX), Imm32(offset));
|
||||
MOV(32, R(RSCRATCH), opAddress);
|
||||
ADD(32, R(RSCRATCH), Imm32(offset));
|
||||
}
|
||||
}
|
||||
TEST(32, addr_loc, Imm32(mem_mask));
|
||||
|
@ -373,11 +373,11 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
|
|||
if (signExtend && accessSize < 32)
|
||||
{
|
||||
// Need to sign extend values coming from the Read_U* functions.
|
||||
MOVSX(32, accessSize, reg_value, R(EAX));
|
||||
MOVSX(32, accessSize, reg_value, R(ABI_RETURN));
|
||||
}
|
||||
else if (reg_value != EAX)
|
||||
else if (reg_value != ABI_RETURN)
|
||||
{
|
||||
MOVZX(64, accessSize, reg_value, R(EAX));
|
||||
MOVZX(64, accessSize, reg_value, R(ABI_RETURN));
|
||||
}
|
||||
|
||||
MEMCHECK_END
|
||||
|
@ -393,7 +393,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
|
|||
u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap)
|
||||
{
|
||||
u8* result = GetWritableCodePtr();
|
||||
OpArg dest = MComplex(RBX, reg_addr, SCALE_1, offset);
|
||||
OpArg dest = MComplex(RMEM, reg_addr, SCALE_1, offset);
|
||||
if (swap)
|
||||
{
|
||||
if (cpu_info.bMOVBE)
|
||||
|
@ -441,10 +441,10 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
|
|||
|
||||
if (offset)
|
||||
{
|
||||
if (flags & SAFE_LOADSTORE_CLOBBER_EAX_INSTEAD_OF_ADDR)
|
||||
if (flags & SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR)
|
||||
{
|
||||
LEA(32, EAX, MDisp(reg_addr, (u32)offset));
|
||||
reg_addr = EAX;
|
||||
LEA(32, RSCRATCH, MDisp(reg_addr, (u32)offset));
|
||||
reg_addr = RSCRATCH;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -495,20 +495,20 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
|
|||
SetJumpTarget(exit);
|
||||
}
|
||||
|
||||
// Destroys both arg registers and EAX
|
||||
// Destroys the same as SafeWrite plus RSCRATCH. TODO: see if we can avoid temporaries here
|
||||
void EmuCodeBlock::SafeWriteF32ToReg(X64Reg xmm_value, X64Reg reg_addr, s32 offset, u32 registersInUse, int flags)
|
||||
{
|
||||
// TODO: PSHUFB might be faster if fastmem supported MOVSS.
|
||||
MOVD_xmm(R(EAX), xmm_value);
|
||||
SafeWriteRegToReg(EAX, reg_addr, 32, offset, registersInUse, flags);
|
||||
MOVD_xmm(R(RSCRATCH), xmm_value);
|
||||
SafeWriteRegToReg(RSCRATCH, reg_addr, 32, offset, registersInUse, flags);
|
||||
}
|
||||
|
||||
void EmuCodeBlock::WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap)
|
||||
{
|
||||
if (swap)
|
||||
SwapAndStore(accessSize, MDisp(RBX, address & 0x3FFFFFFF), arg);
|
||||
SwapAndStore(accessSize, MDisp(RMEM, address & 0x3FFFFFFF), arg);
|
||||
else
|
||||
MOV(accessSize, MDisp(RBX, address & 0x3FFFFFFF), R(arg));
|
||||
MOV(accessSize, MDisp(RMEM, address & 0x3FFFFFFF), R(arg));
|
||||
}
|
||||
|
||||
void EmuCodeBlock::ForceSinglePrecisionS(X64Reg xmm)
|
||||
|
@ -585,20 +585,20 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
|
|||
// Grab Exponent
|
||||
PAND(XMM1, M((void *)&double_exponent));
|
||||
PSRLQ(XMM1, 52);
|
||||
MOVD_xmm(R(EAX), XMM1);
|
||||
MOVD_xmm(R(RSCRATCH), XMM1);
|
||||
|
||||
|
||||
// Check if the double is in the range of valid single subnormal
|
||||
CMP(16, R(EAX), Imm16(896));
|
||||
CMP(16, R(RSCRATCH), Imm16(896));
|
||||
FixupBranch NoDenormalize = J_CC(CC_G);
|
||||
CMP(16, R(EAX), Imm16(874));
|
||||
CMP(16, R(RSCRATCH), Imm16(874));
|
||||
FixupBranch NoDenormalize2 = J_CC(CC_L);
|
||||
|
||||
// Denormalise
|
||||
|
||||
// shift = (905 - Exponent) plus the 21 bit double to single shift
|
||||
MOV(16, R(EAX), Imm16(905 + 21));
|
||||
MOVD_xmm(XMM0, R(EAX));
|
||||
MOV(16, R(RSCRATCH), Imm16(905 + 21));
|
||||
MOVD_xmm(XMM0, R(RSCRATCH));
|
||||
PSUBQ(XMM0, R(XMM1));
|
||||
|
||||
// xmm1 = fraction | 0x0010000000000000
|
||||
|
@ -649,12 +649,12 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
|
|||
// Changing the FPU mode is very expensive, so we can't do that.
|
||||
// Here, check to see if the exponent is small enough that it will result in a denormal, and pass it to the x87 unit
|
||||
// if it is.
|
||||
MOVQ_xmm(R(RAX), src);
|
||||
SHR(64, R(RAX), Imm8(55));
|
||||
MOVQ_xmm(R(RSCRATCH), src);
|
||||
SHR(64, R(RSCRATCH), Imm8(55));
|
||||
// Exponents 0x369 <= x <= 0x380 are denormal. This code accepts the range 0x368 <= x <= 0x387
|
||||
// to save an instruction, since diverting a few more floats to the slow path can't hurt much.
|
||||
SUB(8, R(AL), Imm8(0x6D));
|
||||
CMP(8, R(AL), Imm8(0x3));
|
||||
SUB(8, R(RSCRATCH), Imm8(0x6D));
|
||||
CMP(8, R(RSCRATCH), Imm8(0x3));
|
||||
FixupBranch x87Conversion = J_CC(CC_BE);
|
||||
CVTSD2SS(dst, R(src));
|
||||
FixupBranch continue1 = J();
|
||||
|
@ -675,7 +675,7 @@ void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr
|
|||
{
|
||||
// If the input isn't denormal, just do things the simple way -- otherwise, go through the x87 unit, which has
|
||||
// flush-to-zero off.
|
||||
X64Reg gprsrc = src_is_gpr ? src : EAX;
|
||||
X64Reg gprsrc = src_is_gpr ? src : RSCRATCH;
|
||||
if (src_is_gpr)
|
||||
{
|
||||
MOVD_xmm(dst, R(src));
|
||||
|
@ -684,7 +684,7 @@ void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr
|
|||
{
|
||||
if (dst != src)
|
||||
MOVAPD(dst, R(src));
|
||||
MOVD_xmm(EAX, R(src));
|
||||
MOVD_xmm(RSCRATCH, R(src));
|
||||
}
|
||||
// A sneaky hack: floating-point zero is rather common and we don't want to confuse it for denormals and
|
||||
// needlessly send it through the slow path. If we subtract 1 before doing the comparison, it turns
|
||||
|
@ -724,14 +724,14 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
|
|||
FixupBranch continue1, continue2, continue3, continue4;
|
||||
if (cpu_info.bSSE4_1)
|
||||
{
|
||||
MOVQ_xmm(R(RAX), xmm);
|
||||
SHR(64, R(RAX), Imm8(63)); // Get the sign bit; almost all the branches need it.
|
||||
MOVQ_xmm(R(RSCRATCH), xmm);
|
||||
SHR(64, R(RSCRATCH), Imm8(63)); // Get the sign bit; almost all the branches need it.
|
||||
PTEST(xmm, M((void*)psDoubleExp));
|
||||
FixupBranch maxExponent = J_CC(CC_C);
|
||||
FixupBranch zeroExponent = J_CC(CC_Z);
|
||||
|
||||
// Nice normalized number: sign ? PPC_FPCLASS_NN : PPC_FPCLASS_PN;
|
||||
LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_NN - MathUtil::PPC_FPCLASS_PN, MathUtil::PPC_FPCLASS_PN));
|
||||
LEA(32, RSCRATCH, MScaled(RSCRATCH, MathUtil::PPC_FPCLASS_NN - MathUtil::PPC_FPCLASS_PN, MathUtil::PPC_FPCLASS_PN));
|
||||
continue1 = J();
|
||||
|
||||
SetJumpTarget(maxExponent);
|
||||
|
@ -739,12 +739,12 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
|
|||
FixupBranch notNAN = J_CC(CC_Z);
|
||||
|
||||
// Max exponent + mantissa: PPC_FPCLASS_QNAN
|
||||
MOV(32, R(EAX), Imm32(MathUtil::PPC_FPCLASS_QNAN));
|
||||
MOV(32, R(RSCRATCH), Imm32(MathUtil::PPC_FPCLASS_QNAN));
|
||||
continue2 = J();
|
||||
|
||||
// Max exponent + no mantissa: sign ? PPC_FPCLASS_NINF : PPC_FPCLASS_PINF;
|
||||
SetJumpTarget(notNAN);
|
||||
LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_NINF - MathUtil::PPC_FPCLASS_PINF, MathUtil::PPC_FPCLASS_NINF));
|
||||
LEA(32, RSCRATCH, MScaled(RSCRATCH, MathUtil::PPC_FPCLASS_NINF - MathUtil::PPC_FPCLASS_PINF, MathUtil::PPC_FPCLASS_NINF));
|
||||
continue3 = J();
|
||||
|
||||
SetJumpTarget(zeroExponent);
|
||||
|
@ -752,55 +752,55 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
|
|||
FixupBranch zero = J_CC(CC_Z);
|
||||
|
||||
// No exponent + mantissa: sign ? PPC_FPCLASS_ND : PPC_FPCLASS_PD;
|
||||
LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_ND - MathUtil::PPC_FPCLASS_PD, MathUtil::PPC_FPCLASS_ND));
|
||||
LEA(32, RSCRATCH, MScaled(RSCRATCH, MathUtil::PPC_FPCLASS_ND - MathUtil::PPC_FPCLASS_PD, MathUtil::PPC_FPCLASS_ND));
|
||||
continue4 = J();
|
||||
|
||||
// Zero: sign ? PPC_FPCLASS_NZ : PPC_FPCLASS_PZ;
|
||||
SetJumpTarget(zero);
|
||||
SHL(32, R(EAX), Imm8(4));
|
||||
ADD(32, R(EAX), Imm8(MathUtil::PPC_FPCLASS_PZ));
|
||||
SHL(32, R(RSCRATCH), Imm8(4));
|
||||
ADD(32, R(RSCRATCH), Imm8(MathUtil::PPC_FPCLASS_PZ));
|
||||
}
|
||||
else
|
||||
{
|
||||
MOVQ_xmm(R(RAX), xmm);
|
||||
TEST(64, R(RAX), M((void*)psDoubleExp));
|
||||
MOVQ_xmm(R(RSCRATCH), xmm);
|
||||
TEST(64, R(RSCRATCH), M((void*)psDoubleExp));
|
||||
FixupBranch zeroExponent = J_CC(CC_Z);
|
||||
AND(64, R(RAX), M((void*)psDoubleNoSign));
|
||||
CMP(64, R(RAX), M((void*)psDoubleExp));
|
||||
FixupBranch nan = J_CC(CC_G); // This works because if the sign bit is set, RAX is negative
|
||||
AND(64, R(RSCRATCH), M((void*)psDoubleNoSign));
|
||||
CMP(64, R(RSCRATCH), M((void*)psDoubleExp));
|
||||
FixupBranch nan = J_CC(CC_G); // This works because if the sign bit is set, RSCRATCH is negative
|
||||
FixupBranch infinity = J_CC(CC_E);
|
||||
MOVQ_xmm(R(RAX), xmm);
|
||||
SHR(64, R(RAX), Imm8(63));
|
||||
LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_NN - MathUtil::PPC_FPCLASS_PN, MathUtil::PPC_FPCLASS_PN));
|
||||
MOVQ_xmm(R(RSCRATCH), xmm);
|
||||
SHR(64, R(RSCRATCH), Imm8(63));
|
||||
LEA(32, RSCRATCH, MScaled(RSCRATCH, MathUtil::PPC_FPCLASS_NN - MathUtil::PPC_FPCLASS_PN, MathUtil::PPC_FPCLASS_PN));
|
||||
continue1 = J();
|
||||
SetJumpTarget(nan);
|
||||
MOVQ_xmm(R(RAX), xmm);
|
||||
SHR(64, R(RAX), Imm8(63));
|
||||
MOV(32, R(EAX), Imm32(MathUtil::PPC_FPCLASS_QNAN));
|
||||
MOVQ_xmm(R(RSCRATCH), xmm);
|
||||
SHR(64, R(RSCRATCH), Imm8(63));
|
||||
MOV(32, R(RSCRATCH), Imm32(MathUtil::PPC_FPCLASS_QNAN));
|
||||
continue2 = J();
|
||||
SetJumpTarget(infinity);
|
||||
MOVQ_xmm(R(RAX), xmm);
|
||||
SHR(64, R(RAX), Imm8(63));
|
||||
LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_NINF - MathUtil::PPC_FPCLASS_PINF, MathUtil::PPC_FPCLASS_NINF));
|
||||
MOVQ_xmm(R(RSCRATCH), xmm);
|
||||
SHR(64, R(RSCRATCH), Imm8(63));
|
||||
LEA(32, RSCRATCH, MScaled(RSCRATCH, MathUtil::PPC_FPCLASS_NINF - MathUtil::PPC_FPCLASS_PINF, MathUtil::PPC_FPCLASS_NINF));
|
||||
continue3 = J();
|
||||
SetJumpTarget(zeroExponent);
|
||||
TEST(64, R(RAX), R(RAX));
|
||||
TEST(64, R(RSCRATCH), R(RSCRATCH));
|
||||
FixupBranch zero = J_CC(CC_Z);
|
||||
SHR(64, R(RAX), Imm8(63));
|
||||
LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_ND - MathUtil::PPC_FPCLASS_PD, MathUtil::PPC_FPCLASS_ND));
|
||||
SHR(64, R(RSCRATCH), Imm8(63));
|
||||
LEA(32, RSCRATCH, MScaled(RSCRATCH, MathUtil::PPC_FPCLASS_ND - MathUtil::PPC_FPCLASS_PD, MathUtil::PPC_FPCLASS_ND));
|
||||
continue4 = J();
|
||||
SetJumpTarget(zero);
|
||||
SHR(64, R(RAX), Imm8(63));
|
||||
SHL(32, R(EAX), Imm8(4));
|
||||
ADD(32, R(EAX), Imm8(MathUtil::PPC_FPCLASS_PZ));
|
||||
SHR(64, R(RSCRATCH), Imm8(63));
|
||||
SHL(32, R(RSCRATCH), Imm8(4));
|
||||
ADD(32, R(RSCRATCH), Imm8(MathUtil::PPC_FPCLASS_PZ));
|
||||
}
|
||||
|
||||
SetJumpTarget(continue1);
|
||||
SetJumpTarget(continue2);
|
||||
SetJumpTarget(continue3);
|
||||
SetJumpTarget(continue4);
|
||||
SHL(32, R(EAX), Imm8(FPRF_SHIFT));
|
||||
OR(32, PPCSTATE(fpscr), R(EAX));
|
||||
SHL(32, R(RSCRATCH), Imm8(FPRF_SHIFT));
|
||||
OR(32, PPCSTATE(fpscr), R(RSCRATCH));
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ namespace MMIO { class Mapping; }
|
|||
|
||||
// We offset by 0x80 because the range of one byte memory offsets is
|
||||
// -0x80..0x7f.
|
||||
#define PPCSTATE(x) MDisp(RBP, \
|
||||
#define PPCSTATE(x) MDisp(RPPCSTATE, \
|
||||
(int) ((char *) &PowerPC::ppcState.x - (char *) &PowerPC::ppcState) - 0x80)
|
||||
// In case you want to disable the ppcstate register:
|
||||
// #define PPCSTATE(x) M((void*) &PowerPC::ppcState.x)
|
||||
|
@ -54,11 +54,11 @@ public:
|
|||
SAFE_LOADSTORE_NO_SWAP = 1,
|
||||
SAFE_LOADSTORE_NO_PROLOG = 2,
|
||||
SAFE_LOADSTORE_NO_FASTMEM = 4,
|
||||
SAFE_LOADSTORE_CLOBBER_EAX_INSTEAD_OF_ADDR = 8
|
||||
SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR = 8
|
||||
};
|
||||
|
||||
void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend, int flags = 0);
|
||||
// Clobbers EAX or reg_addr depending on the relevant flag. Preserves
|
||||
// Clobbers RSCRATCH or reg_addr depending on the relevant flag. Preserves
|
||||
// reg_value if the load fails and js.memcheck is enabled.
|
||||
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags = 0);
|
||||
|
||||
|
@ -79,9 +79,8 @@ public:
|
|||
void ForceSinglePrecisionP(Gen::X64Reg xmm);
|
||||
void Force25BitPrecision(Gen::X64Reg xmm, Gen::X64Reg tmp);
|
||||
|
||||
// EAX might get trashed
|
||||
// RSCRATCH might get trashed
|
||||
void ConvertSingleToDouble(Gen::X64Reg dst, Gen::X64Reg src, bool src_is_gpr = false);
|
||||
// EAX might get trashed
|
||||
void ConvertDoubleToSingle(Gen::X64Reg dst, Gen::X64Reg src);
|
||||
void SetFPRF(Gen::X64Reg xmm);
|
||||
protected:
|
||||
|
|
|
@ -321,7 +321,7 @@ void JitILBase::divwux(UGeckoInstruction inst)
|
|||
|
||||
#if 0
|
||||
int a = inst.RA, b = inst.RB, d = inst.RD;
|
||||
gpr.FlushLockX(EDX);
|
||||
gpr.FlushLockX(RSCRATCH1);
|
||||
gpr.Lock(a, b, d);
|
||||
|
||||
if (d != a && d != b)
|
||||
|
@ -333,11 +333,11 @@ void JitILBase::divwux(UGeckoInstruction inst)
|
|||
gpr.LoadToX64(d, true, true);
|
||||
}
|
||||
|
||||
MOV(32, R(EAX), gpr.R(a));
|
||||
XOR(32, R(EDX), R(EDX));
|
||||
MOV(32, R(RSCRATCH), gpr.R(a));
|
||||
XOR(32, R(RSCRATCH2), R(RSCRATCH));
|
||||
gpr.KillImmediate(b);
|
||||
DIV(32, gpr.R(b));
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
MOV(32, gpr.R(d), R(RSCRATCH));
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
|
||||
|
|
|
@ -137,19 +137,13 @@ void JitILBase::dcbz(UGeckoInstruction inst)
|
|||
return;
|
||||
}
|
||||
INSTRUCTION_START;
|
||||
MOV(32, R(EAX), gpr.R(inst.RB));
|
||||
MOV(32, R(RSCRATCH), gpr.R(inst.RB));
|
||||
if (inst.RA)
|
||||
ADD(32, R(EAX), gpr.R(inst.RA));
|
||||
AND(32, R(EAX), Imm32(~31));
|
||||
ADD(32, R(RSCRATCH), gpr.R(inst.RA));
|
||||
AND(32, R(RSCRATCH), Imm32(~31));
|
||||
PXOR(XMM0, R(XMM0));
|
||||
#if _M_X86_64
|
||||
MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0);
|
||||
MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0);
|
||||
#else
|
||||
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVAPS(MDisp(EAX, (u32)Memory::base), XMM0);
|
||||
MOVAPS(MDisp(EAX, (u32)Memory::base + 16), XMM0);
|
||||
#endif
|
||||
MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 0), XMM0);
|
||||
MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 16), XMM0);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue