Merge pull request #926 from comex/ppcstate-reg

PowerPCState register (and rationalize register usage, and add some registers to replace it)
This commit is contained in:
comex 2014-09-06 13:24:38 -04:00
commit 6c382f6627
27 changed files with 947 additions and 905 deletions

View File

@ -353,20 +353,7 @@ void XEmitter::ABI_CallFunctionR(void *func, X64Reg reg1)
void XEmitter::ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2, bool noProlog) void XEmitter::ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2, bool noProlog)
{ {
ABI_AlignStack(0, noProlog); ABI_AlignStack(0, noProlog);
if (reg2 != ABI_PARAM1) MOVTwo(64, ABI_PARAM1, reg1, ABI_PARAM2, reg2, ABI_PARAM3);
{
if (reg1 != ABI_PARAM1)
MOV(64, R(ABI_PARAM1), R(reg1));
if (reg2 != ABI_PARAM2)
MOV(64, R(ABI_PARAM2), R(reg2));
}
else
{
if (reg2 != ABI_PARAM2)
MOV(64, R(ABI_PARAM2), R(reg2));
if (reg1 != ABI_PARAM1)
MOV(64, R(ABI_PARAM1), R(reg1));
}
u64 distance = u64(func) - (u64(code) + 5); u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL && if (distance >= 0x0000000080000000ULL &&
distance < 0xFFFFFFFF80000000ULL) distance < 0xFFFFFFFF80000000ULL)
@ -382,6 +369,30 @@ void XEmitter::ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2, bool noP
ABI_RestoreStack(0, noProlog); ABI_RestoreStack(0, noProlog);
} }
void XEmitter::MOVTwo(int bits, Gen::X64Reg dst1, Gen::X64Reg src1, Gen::X64Reg dst2, Gen::X64Reg src2, X64Reg temp)
{
if (dst1 == src2 && dst2 == src1)
{
// need a temporary
MOV(bits, R(temp), R(src1));
src1 = temp;
}
if (src2 != dst1)
{
if (dst1 != src1)
MOV(bits, R(dst1), R(src1));
if (dst2 != src2)
MOV(bits, R(dst2), R(src2));
}
else
{
if (dst2 != src2)
MOV(bits, R(dst2), R(src2));
if (dst1 != src1)
MOV(bits, R(dst1), R(src1));
}
}
void XEmitter::ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2) void XEmitter::ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2)
{ {
ABI_AlignStack(0); ABI_AlignStack(0);

View File

@ -53,3 +53,5 @@
#endif // WIN32 #endif // WIN32
#define ABI_RETURN RAX

View File

@ -753,6 +753,9 @@ public:
void ABI_CallFunctionR(void *func, X64Reg reg1); void ABI_CallFunctionR(void *func, X64Reg reg1);
void ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2, bool noProlog = false); void ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2, bool noProlog = false);
// Helper method for the above, or can be used separately.
void MOVTwo(int bits, Gen::X64Reg dst1, Gen::X64Reg src1, Gen::X64Reg dst2, Gen::X64Reg src2, Gen::X64Reg temp);
// A function that doesn't have any control over what it will do to regs, // A function that doesn't have any control over what it will do to regs,
// such as the dispatcher, should be surrounded by these. // such as the dispatcher, should be surrounded by these.
void ABI_PushAllCalleeSavedRegsAndAdjustStack(); void ABI_PushAllCalleeSavedRegsAndAdjustStack();

View File

@ -39,14 +39,6 @@ using namespace PowerPC;
// Various notes below // Various notes below
// Register allocation
// RAX - Generic quicktemp register
// RBX - point to base of memory map
// RSI RDI R12 R13 R14 R15 - free for allocation
// RCX RDX R8 R9 R10 R11 - allocate in emergencies. These need to be flushed before functions are called.
// RSP - stack pointer, do not generally use, very dangerous
// RBP - ?
// IMPORTANT: // IMPORTANT:
// Make sure that all generated code and all emulator state sits under the 2GB boundary so that // Make sure that all generated code and all emulator state sits under the 2GB boundary so that
// RIP addressing can be used easily. Windows will always allocate static code under the 2GB boundary. // RIP addressing can be used easily. Windows will always allocate static code under the 2GB boundary.
@ -210,8 +202,8 @@ void Jit64::WriteCallInterpreter(UGeckoInstruction inst)
fpr.Flush(); fpr.Flush();
if (js.isLastInstruction) if (js.isLastInstruction)
{ {
MOV(32, M(&PC), Imm32(js.compilerPC)); MOV(32, PPCSTATE(pc), Imm32(js.compilerPC));
MOV(32, M(&NPC), Imm32(js.compilerPC + 4)); MOV(32, PPCSTATE(npc), Imm32(js.compilerPC + 4));
} }
Interpreter::_interpreterInstruction instr = GetInterpreterOp(inst); Interpreter::_interpreterInstruction instr = GetInterpreterOp(inst);
ABI_CallFunctionC((void*)instr, inst.hex); ABI_CallFunctionC((void*)instr, inst.hex);
@ -279,7 +271,7 @@ void Jit64::WriteExit(u32 destination)
{ {
Cleanup(); Cleanup();
SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
//If nobody has taken care of this yet (this can be removed when all branches are done) //If nobody has taken care of this yet (this can be removed when all branches are done)
JitBlock *b = js.curBlock; JitBlock *b = js.curBlock;
@ -298,48 +290,48 @@ void Jit64::WriteExit(u32 destination)
} }
else else
{ {
MOV(32, M(&PC), Imm32(destination)); MOV(32, PPCSTATE(pc), Imm32(destination));
JMP(asm_routines.dispatcher, true); JMP(asm_routines.dispatcher, true);
} }
b->linkData.push_back(linkData); b->linkData.push_back(linkData);
} }
void Jit64::WriteExitDestInEAX() void Jit64::WriteExitDestInRSCRATCH()
{ {
MOV(32, M(&PC), R(EAX)); MOV(32, PPCSTATE(pc), R(RSCRATCH));
Cleanup(); Cleanup();
SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
JMP(asm_routines.dispatcher, true); JMP(asm_routines.dispatcher, true);
} }
void Jit64::WriteRfiExitDestInEAX() void Jit64::WriteRfiExitDestInRSCRATCH()
{ {
MOV(32, M(&PC), R(EAX)); MOV(32, PPCSTATE(pc), R(RSCRATCH));
MOV(32, M(&NPC), R(EAX)); MOV(32, PPCSTATE(npc), R(RSCRATCH));
Cleanup(); Cleanup();
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions)); ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions));
SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
JMP(asm_routines.dispatcher, true); JMP(asm_routines.dispatcher, true);
} }
void Jit64::WriteExceptionExit() void Jit64::WriteExceptionExit()
{ {
Cleanup(); Cleanup();
MOV(32, R(EAX), M(&PC)); MOV(32, R(RSCRATCH), PPCSTATE(pc));
MOV(32, M(&NPC), R(EAX)); MOV(32, PPCSTATE(npc), R(RSCRATCH));
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions)); ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions));
SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
JMP(asm_routines.dispatcher, true); JMP(asm_routines.dispatcher, true);
} }
void Jit64::WriteExternalExceptionExit() void Jit64::WriteExternalExceptionExit()
{ {
Cleanup(); Cleanup();
MOV(32, R(EAX), M(&PC)); MOV(32, R(RSCRATCH), PPCSTATE(pc));
MOV(32, M(&NPC), R(EAX)); MOV(32, PPCSTATE(npc), R(RSCRATCH));
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExternalExceptions)); ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExternalExceptions));
SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
JMP(asm_routines.dispatcher, true); JMP(asm_routines.dispatcher, true);
} }
@ -426,7 +418,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
// Downcount flag check. The last block decremented downcounter, and the flag should still be available. // Downcount flag check. The last block decremented downcounter, and the flag should still be available.
FixupBranch skip = J_CC(CC_NBE); FixupBranch skip = J_CC(CC_NBE);
MOV(32, M(&PC), Imm32(js.blockStart)); MOV(32, PPCSTATE(pc), Imm32(js.blockStart));
JMP(asm_routines.doTiming, true); // downcount hit zero - go doTiming. JMP(asm_routines.doTiming, true); // downcount hit zero - go doTiming.
SetJumpTarget(skip); SetJumpTarget(skip);
@ -452,7 +444,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
} }
#if defined(_DEBUG) || defined(DEBUGFAST) || defined(NAN_CHECK) #if defined(_DEBUG) || defined(DEBUGFAST) || defined(NAN_CHECK)
// should help logged stack-traces become more accurate // should help logged stack-traces become more accurate
MOV(32, M(&PC), Imm32(js.blockStart)); MOV(32, PPCSTATE(pc), Imm32(js.blockStart));
#endif #endif
// Start up the register allocators // Start up the register allocators
@ -501,7 +493,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32) if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
{ {
js.fifoBytesThisBlock -= 32; js.fifoBytesThisBlock -= 32;
MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write
u32 registersInUse = CallerSavedRegistersInUse(); u32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, false); ABI_PushRegistersAndAdjustStack(registersInUse, false);
ABI_CallFunction((void *)&GPFifo::CheckGatherPipe); ABI_CallFunction((void *)&GPFifo::CheckGatherPipe);
@ -520,9 +512,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
HLEFunction(function); HLEFunction(function);
if (type == HLE::HLE_HOOK_REPLACE) if (type == HLE::HLE_HOOK_REPLACE)
{ {
MOV(32, R(EAX), M(&NPC)); MOV(32, R(RSCRATCH), PPCSTATE(npc));
js.downcountAmount += js.st.numCycles; js.downcountAmount += js.st.numCycles;
WriteExitDestInEAX(); WriteExitDestInRSCRATCH();
break; break;
} }
} }
@ -537,13 +529,13 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
fpr.Flush(); fpr.Flush();
//This instruction uses FPU - needs to add FP exception bailout //This instruction uses FPU - needs to add FP exception bailout
TEST(32, M(&PowerPC::ppcState.msr), Imm32(1 << 13)); // Test FP enabled bit TEST(32, PPCSTATE(msr), Imm32(1 << 13)); // Test FP enabled bit
FixupBranch b1 = J_CC(CC_NZ, true); FixupBranch b1 = J_CC(CC_NZ, true);
// If a FPU exception occurs, the exception handler will read // If a FPU exception occurs, the exception handler will read
// from PC. Update PC with the latest value in case that happens. // from PC. Update PC with the latest value in case that happens.
MOV(32, M(&PC), Imm32(ops[i].address)); MOV(32, PPCSTATE(pc), Imm32(ops[i].address));
OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE));
WriteExceptionExit(); WriteExceptionExit();
SetJumpTarget(b1); SetJumpTarget(b1);
@ -557,16 +549,16 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
gpr.Flush(); gpr.Flush();
fpr.Flush(); fpr.Flush();
TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_ISI | EXCEPTION_PROGRAM | EXCEPTION_SYSCALL | EXCEPTION_FPU_UNAVAILABLE | EXCEPTION_DSI | EXCEPTION_ALIGNMENT)); TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_ISI | EXCEPTION_PROGRAM | EXCEPTION_SYSCALL | EXCEPTION_FPU_UNAVAILABLE | EXCEPTION_DSI | EXCEPTION_ALIGNMENT));
FixupBranch clearInt = J_CC(CC_NZ, true); FixupBranch clearInt = J_CC(CC_NZ, true);
TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_EXTERNAL_INT)); TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_EXTERNAL_INT));
FixupBranch noExtException = J_CC(CC_Z, true); FixupBranch noExtException = J_CC(CC_Z, true);
TEST(32, M((void *)&PowerPC::ppcState.msr), Imm32(0x0008000)); TEST(32, PPCSTATE(msr), Imm32(0x0008000));
FixupBranch noExtIntEnable = J_CC(CC_Z, true); FixupBranch noExtIntEnable = J_CC(CC_Z, true);
TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH)); TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH));
FixupBranch noCPInt = J_CC(CC_Z, true); FixupBranch noCPInt = J_CC(CC_Z, true);
MOV(32, M(&PC), Imm32(ops[i].address)); MOV(32, PPCSTATE(pc), Imm32(ops[i].address));
WriteExternalExceptionExit(); WriteExternalExceptionExit();
SetJumpTarget(noCPInt); SetJumpTarget(noCPInt);
@ -580,7 +572,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
gpr.Flush(); gpr.Flush();
fpr.Flush(); fpr.Flush();
MOV(32, M(&PC), Imm32(ops[i].address)); MOV(32, PPCSTATE(pc), Imm32(ops[i].address));
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckBreakPoints)); ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckBreakPoints));
TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
FixupBranch noBreakpoint = J_CC(CC_Z); FixupBranch noBreakpoint = J_CC(CC_Z);
@ -597,12 +589,12 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
gpr.Flush(); gpr.Flush();
fpr.Flush(); fpr.Flush();
TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_DSI)); TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
FixupBranch noMemException = J_CC(CC_Z, true); FixupBranch noMemException = J_CC(CC_Z, true);
// If a memory exception occurs, the exception handler will read // If a memory exception occurs, the exception handler will read
// from PC. Update PC with the latest value in case that happens. // from PC. Update PC with the latest value in case that happens.
MOV(32, M(&PC), Imm32(ops[i].address)); MOV(32, PPCSTATE(pc), Imm32(ops[i].address));
WriteExceptionExit(); WriteExceptionExit();
SetJumpTarget(noMemException); SetJumpTarget(noMemException);
} }
@ -645,13 +637,13 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
if (code_block.m_memory_exception) if (code_block.m_memory_exception)
{ {
// Address of instruction could not be translated // Address of instruction could not be translated
MOV(32, M(&NPC), Imm32(js.compilerPC)); MOV(32, PPCSTATE(npc), Imm32(js.compilerPC));
OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_ISI)); OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_ISI));
// Remove the invalid instruction from the icache, forcing a recompile // Remove the invalid instruction from the icache, forcing a recompile
MOV(64, R(RAX), ImmPtr(jit->GetBlockCache()->GetICachePtr(js.compilerPC))); MOV(64, R(RSCRATCH), ImmPtr(jit->GetBlockCache()->GetICachePtr(js.compilerPC)));
MOV(32,MatR(RAX),Imm32(JIT_ICACHE_INVALID_WORD)); MOV(32,MatR(RSCRATCH),Imm32(JIT_ICACHE_INVALID_WORD));
WriteExceptionExit(); WriteExceptionExit();
} }

View File

@ -90,10 +90,10 @@ public:
// Utilities for use by opcodes // Utilities for use by opcodes
void WriteExit(u32 destination); void WriteExit(u32 destination);
void WriteExitDestInEAX(); void WriteExitDestInRSCRATCH();
void WriteExceptionExit(); void WriteExceptionExit();
void WriteExternalExceptionExit(); void WriteExternalExceptionExit();
void WriteRfiExitDestInEAX(); void WriteRfiExitDestInRSCRATCH();
void WriteCallInterpreter(UGeckoInstruction _inst); void WriteCallInterpreter(UGeckoInstruction _inst);
void Cleanup(); void Cleanup();
@ -101,16 +101,15 @@ public:
void GenerateConstantOverflow(s64 val); void GenerateConstantOverflow(s64 val);
void GenerateOverflow(); void GenerateOverflow();
void FinalizeCarryOverflow(bool oe, bool inv = false); void FinalizeCarryOverflow(bool oe, bool inv = false);
void GetCarryEAXAndClear(); void GetCarryRSCRATCHAndClear();
void FinalizeCarryGenerateOverflowEAX(bool oe, bool inv = false); void FinalizeCarryGenerateOverflowRSCRATCH(bool oe, bool inv = false);
void GenerateCarry(); void GenerateCarry();
void GenerateRC(); void GenerateRC();
void ComputeRC(const Gen::OpArg & arg); void ComputeRC(const Gen::OpArg & arg);
// Reads a given bit of a given CR register part. Clobbers ABI_PARAM1, // Reads a given bit of a given CR register part.
// don't forget to xlock it before.
void GetCRFieldBit(int field, int bit, Gen::X64Reg out, bool negate = false); void GetCRFieldBit(int field, int bit, Gen::X64Reg out, bool negate = false);
// Clobbers ABI_PARAM1, xlock it before. // Clobbers RDX.
void SetCRFieldBit(int field, int bit, Gen::X64Reg in); void SetCRFieldBit(int field, int bit, Gen::X64Reg in);
// Generates a branch that will check if a given bit of a CR register part // Generates a branch that will check if a given bit of a CR register part

View File

@ -9,14 +9,6 @@
using namespace Gen; using namespace Gen;
//GLOBAL STATIC ALLOCATIONS x86
//EAX - ubiquitous scratch register - EVERYBODY scratches this
//GLOBAL STATIC ALLOCATIONS x64
//EAX - ubiquitous scratch register - EVERYBODY scratches this
//RBX - Base pointer of memory
//R15 - Pointer to array of block pointers
// PLAN: no more block numbers - crazy opcodes just contain offset within // PLAN: no more block numbers - crazy opcodes just contain offset within
// dynarec buffer // dynarec buffer
// At this offset - 4, there is an int specifying the block number. // At this offset - 4, there is an int specifying the block number.
@ -27,8 +19,9 @@ void Jit64AsmRoutineManager::Generate()
ABI_PushAllCalleeSavedRegsAndAdjustStack(); ABI_PushAllCalleeSavedRegsAndAdjustStack();
// Two statically allocated registers. // Two statically allocated registers.
MOV(64, R(RBX), Imm64((u64)Memory::base)); MOV(64, R(RMEM), Imm64((u64)Memory::base));
MOV(64, R(R15), Imm64((u64)jit->GetBlockCache()->GetCodePointers())); //It's below 2GB so 32 bits are good enough MOV(64, R(RCODE_POINTERS), Imm64((u64)jit->GetBlockCache()->GetCodePointers())); //It's below 2GB so 32 bits are good enough
MOV(64, R(RPPCSTATE), Imm64((u64)&PowerPC::ppcState + 0x80));
const u8* outerLoop = GetCodePtr(); const u8* outerLoop = GetCodePtr();
ABI_CallFunction(reinterpret_cast<void *>(&CoreTiming::Advance)); ABI_CallFunction(reinterpret_cast<void *>(&CoreTiming::Advance));
@ -55,8 +48,8 @@ void Jit64AsmRoutineManager::Generate()
SetJumpTarget(skipToRealDispatch); SetJumpTarget(skipToRealDispatch);
dispatcherNoCheck = GetCodePtr(); dispatcherNoCheck = GetCodePtr();
MOV(32, R(EAX), M(&PowerPC::ppcState.pc)); MOV(32, R(RSCRATCH), PPCSTATE(pc));
dispatcherPcInEAX = GetCodePtr(); dispatcherPcInRSCRATCH = GetCodePtr();
u32 mask = 0; u32 mask = 0;
FixupBranch no_mem; FixupBranch no_mem;
@ -68,12 +61,12 @@ void Jit64AsmRoutineManager::Generate()
mask |= JIT_ICACHE_VMEM_BIT; mask |= JIT_ICACHE_VMEM_BIT;
if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack) if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack)
{ {
TEST(32, R(EAX), Imm32(mask)); TEST(32, R(RSCRATCH), Imm32(mask));
no_mem = J_CC(CC_NZ); no_mem = J_CC(CC_NZ);
} }
AND(32, R(EAX), Imm32(JIT_ICACHE_MASK)); AND(32, R(RSCRATCH), Imm32(JIT_ICACHE_MASK));
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->iCache)); MOV(64, R(RSCRATCH2), Imm64((u64)jit->GetBlockCache()->iCache));
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0)); MOV(32, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_1, 0));
if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack) if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack)
{ {
@ -82,22 +75,22 @@ void Jit64AsmRoutineManager::Generate()
} }
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack) if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack)
{ {
TEST(32, R(EAX), Imm32(JIT_ICACHE_VMEM_BIT)); TEST(32, R(RSCRATCH), Imm32(JIT_ICACHE_VMEM_BIT));
FixupBranch no_vmem = J_CC(CC_Z); FixupBranch no_vmem = J_CC(CC_Z);
AND(32, R(EAX), Imm32(JIT_ICACHE_MASK)); AND(32, R(RSCRATCH), Imm32(JIT_ICACHE_MASK));
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->iCacheVMEM)); MOV(64, R(RSCRATCH2), Imm64((u64)jit->GetBlockCache()->iCacheVMEM));
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0)); MOV(32, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_1, 0));
if (Core::g_CoreStartupParameter.bWii) exit_vmem = J(); if (Core::g_CoreStartupParameter.bWii) exit_vmem = J();
SetJumpTarget(no_vmem); SetJumpTarget(no_vmem);
} }
if (Core::g_CoreStartupParameter.bWii) if (Core::g_CoreStartupParameter.bWii)
{ {
TEST(32, R(EAX), Imm32(JIT_ICACHE_EXRAM_BIT)); TEST(32, R(RSCRATCH), Imm32(JIT_ICACHE_EXRAM_BIT));
FixupBranch no_exram = J_CC(CC_Z); FixupBranch no_exram = J_CC(CC_Z);
AND(32, R(EAX), Imm32(JIT_ICACHEEX_MASK)); AND(32, R(RSCRATCH), Imm32(JIT_ICACHEEX_MASK));
MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->iCacheEx)); MOV(64, R(RSCRATCH2), Imm64((u64)jit->GetBlockCache()->iCacheEx));
MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0)); MOV(32, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_1, 0));
SetJumpTarget(no_exram); SetJumpTarget(no_exram);
} }
@ -106,14 +99,14 @@ void Jit64AsmRoutineManager::Generate()
if (Core::g_CoreStartupParameter.bWii && (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack)) if (Core::g_CoreStartupParameter.bWii && (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack))
SetJumpTarget(exit_vmem); SetJumpTarget(exit_vmem);
TEST(32, R(EAX), R(EAX)); TEST(32, R(RSCRATCH), R(RSCRATCH));
FixupBranch notfound = J_CC(CC_L); FixupBranch notfound = J_CC(CC_L);
//grab from list and jump to it //grab from list and jump to it
JMPptr(MComplex(R15, RAX, 8, 0)); JMPptr(MComplex(RCODE_POINTERS, RSCRATCH, 8, 0));
SetJumpTarget(notfound); SetJumpTarget(notfound);
//Ok, no block, let's jit //Ok, no block, let's jit
MOV(32, R(ABI_PARAM1), M(&PowerPC::ppcState.pc)); MOV(32, R(ABI_PARAM1), PPCSTATE(pc));
CALL((void *)&Jit); CALL((void *)&Jit);
JMP(dispatcherNoCheck); // no point in special casing this JMP(dispatcherNoCheck); // no point in special casing this
@ -122,10 +115,10 @@ void Jit64AsmRoutineManager::Generate()
doTiming = GetCodePtr(); doTiming = GetCodePtr();
// Test external exceptions. // Test external exceptions.
TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_EXTERNAL_INT | EXCEPTION_PERFORMANCE_MONITOR | EXCEPTION_DECREMENTER)); TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_EXTERNAL_INT | EXCEPTION_PERFORMANCE_MONITOR | EXCEPTION_DECREMENTER));
FixupBranch noExtException = J_CC(CC_Z); FixupBranch noExtException = J_CC(CC_Z);
MOV(32, R(EAX), M(&PC)); MOV(32, R(RSCRATCH), PPCSTATE(pc));
MOV(32, M(&NPC), R(EAX)); MOV(32, PPCSTATE(npc), R(RSCRATCH));
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExternalExceptions)); ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExternalExceptions));
SetJumpTarget(noExtException); SetJumpTarget(noExtException);
@ -168,8 +161,8 @@ void Jit64AsmRoutineManager::GenerateCommon()
const u8 *fastMemWrite8 = AlignCode16(); const u8 *fastMemWrite8 = AlignCode16();
CMP(32, R(ABI_PARAM2), Imm32(0xCC008000)); CMP(32, R(ABI_PARAM2), Imm32(0xCC008000));
FixupBranch skip_fast_write = J_CC(CC_NE, false); FixupBranch skip_fast_write = J_CC(CC_NE, false);
MOV(32, EAX, M(&m_gatherPipeCount)); MOV(32, RSCRATCH, M(&m_gatherPipeCount));
MOV(8, MDisp(EAX, (u32)&m_gatherPipe), ABI_PARAM1); MOV(8, MDisp(RSCRATCH, (u32)&m_gatherPipe), ABI_PARAM1);
ADD(32, 1, M(&m_gatherPipeCount)); ADD(32, 1, M(&m_gatherPipeCount));
RET(); RET();
SetJumpTarget(skip_fast_write); SetJumpTarget(skip_fast_write);

View File

@ -200,9 +200,9 @@ const int* GPRRegCache::GetAllocationOrder(size_t& count)
{ {
// R12, when used as base register, for example in a LEA, can generate bad code! Need to look into this. // R12, when used as base register, for example in a LEA, can generate bad code! Need to look into this.
#ifdef _WIN32 #ifdef _WIN32
RSI, RDI, R13, R14, R8, R9, R10, R11, R12, //, RCX RSI, RDI, R13, R14, R8, R9, R10, R11, R12, RCX
#else #else
RBP, R13, R14, R8, R9, R10, R11, R12, //, RCX R12, R13, R14, RSI, RDI, R8, R9, R10, R11, RCX
#endif #endif
}; };
count = sizeof(allocationOrder) / sizeof(const int); count = sizeof(allocationOrder) / sizeof(const int);
@ -221,12 +221,12 @@ const int* FPURegCache::GetAllocationOrder(size_t& count)
OpArg GPRRegCache::GetDefaultLocation(size_t reg) const OpArg GPRRegCache::GetDefaultLocation(size_t reg) const
{ {
return M(&ppcState.gpr[reg]); return PPCSTATE(gpr[reg]);
} }
OpArg FPURegCache::GetDefaultLocation(size_t reg) const OpArg FPURegCache::GetDefaultLocation(size_t reg) const
{ {
return M(&ppcState.ps[reg][0]); return PPCSTATE(ps[reg][0]);
} }
void RegCache::KillImmediate(size_t preg, bool doLoad, bool makeDirty) void RegCache::KillImmediate(size_t preg, bool doLoad, bool makeDirty)

View File

@ -28,9 +28,9 @@ void Jit64::sc(UGeckoInstruction inst)
gpr.Flush(); gpr.Flush();
fpr.Flush(); fpr.Flush();
MOV(32, M(&PC), Imm32(js.compilerPC + 4)); MOV(32, PPCSTATE(pc), Imm32(js.compilerPC + 4));
LOCK(); LOCK();
OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_SYSCALL)); OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_SYSCALL));
WriteExceptionExit(); WriteExceptionExit();
} }
@ -45,13 +45,13 @@ void Jit64::rfi(UGeckoInstruction inst)
const u32 mask = 0x87C0FFFF; const u32 mask = 0x87C0FFFF;
const u32 clearMSR13 = 0xFFFBFFFF; // Mask used to clear the bit MSR[13] const u32 clearMSR13 = 0xFFFBFFFF; // Mask used to clear the bit MSR[13]
// MSR = ((MSR & ~mask) | (SRR1 & mask)) & clearMSR13; // MSR = ((MSR & ~mask) | (SRR1 & mask)) & clearMSR13;
AND(32, M(&MSR), Imm32((~mask) & clearMSR13)); AND(32, PPCSTATE(msr), Imm32((~mask) & clearMSR13));
MOV(32, R(EAX), M(&SRR1)); MOV(32, R(RSCRATCH), PPCSTATE_SRR1);
AND(32, R(EAX), Imm32(mask & clearMSR13)); AND(32, R(RSCRATCH), Imm32(mask & clearMSR13));
OR(32, M(&MSR), R(EAX)); OR(32, PPCSTATE(msr), R(RSCRATCH));
// NPC = SRR0; // NPC = SRR0;
MOV(32, R(EAX), M(&SRR0)); MOV(32, R(RSCRATCH), PPCSTATE_SRR0);
WriteRfiExitDestInEAX(); WriteRfiExitDestInRSCRATCH();
} }
void Jit64::bx(UGeckoInstruction inst) void Jit64::bx(UGeckoInstruction inst)
@ -62,7 +62,7 @@ void Jit64::bx(UGeckoInstruction inst)
// We must always process the following sentence // We must always process the following sentence
// even if the blocks are merged by PPCAnalyst::Flatten(). // even if the blocks are merged by PPCAnalyst::Flatten().
if (inst.LK) if (inst.LK)
MOV(32, M(&LR), Imm32(js.compilerPC + 4)); MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4));
// If this is not the last instruction of a block, // If this is not the last instruction of a block,
// we will skip the rest process. // we will skip the rest process.
@ -82,7 +82,7 @@ void Jit64::bx(UGeckoInstruction inst)
destination = js.compilerPC + SignExt26(inst.LI << 2); destination = js.compilerPC + SignExt26(inst.LI << 2);
#ifdef ACID_TEST #ifdef ACID_TEST
if (inst.LK) if (inst.LK)
AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xFF000000))); AND(32, PPCSTATE(cr), Imm32(~(0xFF000000)));
#endif #endif
if (destination == js.compilerPC) if (destination == js.compilerPC)
{ {
@ -108,7 +108,7 @@ void Jit64::bcx(UGeckoInstruction inst)
FixupBranch pCTRDontBranch; FixupBranch pCTRDontBranch;
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR
{ {
SUB(32, M(&CTR), Imm8(1)); SUB(32, PPCSTATE_CTR, Imm8(1));
if (inst.BO & BO_BRANCH_IF_CTR_0) if (inst.BO & BO_BRANCH_IF_CTR_0)
pCTRDontBranch = J_CC(CC_NZ, true); pCTRDontBranch = J_CC(CC_NZ, true);
else else
@ -123,7 +123,7 @@ void Jit64::bcx(UGeckoInstruction inst)
} }
if (inst.LK) if (inst.LK)
MOV(32, M(&LR), Imm32(js.compilerPC + 4)); MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4));
u32 destination; u32 destination;
if (inst.AA) if (inst.AA)
@ -164,11 +164,11 @@ void Jit64::bcctrx(UGeckoInstruction inst)
gpr.Flush(); gpr.Flush();
fpr.Flush(); fpr.Flush();
MOV(32, R(EAX), M(&CTR)); MOV(32, R(RSCRATCH), PPCSTATE_CTR);
if (inst.LK_3) if (inst.LK_3)
MOV(32, M(&LR), Imm32(js.compilerPC + 4)); // LR = PC + 4; MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // LR = PC + 4;
AND(32, R(EAX), Imm32(0xFFFFFFFC)); AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
WriteExitDestInEAX(); WriteExitDestInRSCRATCH();
} }
else else
{ {
@ -179,15 +179,15 @@ void Jit64::bcctrx(UGeckoInstruction inst)
FixupBranch b = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), FixupBranch b = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3),
!(inst.BO_2 & BO_BRANCH_IF_TRUE)); !(inst.BO_2 & BO_BRANCH_IF_TRUE));
MOV(32, R(EAX), M(&CTR)); MOV(32, R(RSCRATCH), PPCSTATE_CTR);
AND(32, R(EAX), Imm32(0xFFFFFFFC)); AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
//MOV(32, M(&PC), R(EAX)); => Already done in WriteExitDestInEAX() //MOV(32, PPCSTATE(pc), R(RSCRATCH)); => Already done in WriteExitDestInRSCRATCH()
if (inst.LK_3) if (inst.LK_3)
MOV(32, M(&LR), Imm32(js.compilerPC + 4)); // LR = PC + 4; MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // LR = PC + 4;
gpr.Flush(FLUSH_MAINTAIN_STATE); gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE);
WriteExitDestInEAX(); WriteExitDestInRSCRATCH();
// Would really like to continue the block here, but it ends. TODO. // Would really like to continue the block here, but it ends. TODO.
SetJumpTarget(b); SetJumpTarget(b);
@ -204,7 +204,7 @@ void Jit64::bclrx(UGeckoInstruction inst)
FixupBranch pCTRDontBranch; FixupBranch pCTRDontBranch;
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR
{ {
SUB(32, M(&CTR), Imm8(1)); SUB(32, PPCSTATE_CTR, Imm8(1));
if (inst.BO & BO_BRANCH_IF_CTR_0) if (inst.BO & BO_BRANCH_IF_CTR_0)
pCTRDontBranch = J_CC(CC_NZ, true); pCTRDontBranch = J_CC(CC_NZ, true);
else else
@ -221,17 +221,17 @@ void Jit64::bclrx(UGeckoInstruction inst)
// This below line can be used to prove that blr "eats flags" in practice. // This below line can be used to prove that blr "eats flags" in practice.
// This observation will let us do a lot of fun observations. // This observation will let us do a lot of fun observations.
#ifdef ACID_TEST #ifdef ACID_TEST
AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xFF000000))); AND(32, PPCSTATE(cr), Imm32(~(0xFF000000)));
#endif #endif
MOV(32, R(EAX), M(&LR)); MOV(32, R(RSCRATCH), PPCSTATE_LR);
AND(32, R(EAX), Imm32(0xFFFFFFFC)); AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
if (inst.LK) if (inst.LK)
MOV(32, M(&LR), Imm32(js.compilerPC + 4)); MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4));
gpr.Flush(FLUSH_MAINTAIN_STATE); gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE);
WriteExitDestInEAX(); WriteExitDestInRSCRATCH();
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
SetJumpTarget( pConditionDontBranch ); SetJumpTarget( pConditionDontBranch );

View File

@ -248,7 +248,7 @@ void Jit64::fcmpx(UGeckoInstruction inst)
fpr.BindToRegister(b, true); fpr.BindToRegister(b, true);
if (fprf) if (fprf)
AND(32, M(&FPSCR), Imm32(~FPRF_MASK)); AND(32, PPCSTATE(fpscr), Imm32(~FPRF_MASK));
// Are we masking sNaN invalid floating point exceptions? If not this could crash if we don't handle the exception? // Are we masking sNaN invalid floating point exceptions? If not this could crash if we don't handle the exception?
UCOMISD(fpr.R(b).GetSimpleReg(), fpr.R(a)); UCOMISD(fpr.R(b).GetSimpleReg(), fpr.R(a));
@ -271,31 +271,31 @@ void Jit64::fcmpx(UGeckoInstruction inst)
pGreater = J_CC(CC_B); pGreater = J_CC(CC_B);
} }
MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_EQ))); MOV(64, R(RSCRATCH), Imm64(PPCCRToInternal(CR_EQ)));
if (fprf) if (fprf)
OR(32, M(&FPSCR), Imm32(CR_EQ << FPRF_SHIFT)); OR(32, PPCSTATE(fpscr), Imm32(CR_EQ << FPRF_SHIFT));
continue1 = J(); continue1 = J();
SetJumpTarget(pNaN); SetJumpTarget(pNaN);
MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_SO))); MOV(64, R(RSCRATCH), Imm64(PPCCRToInternal(CR_SO)));
if (fprf) if (fprf)
OR(32, M(&FPSCR), Imm32(CR_SO << FPRF_SHIFT)); OR(32, PPCSTATE(fpscr), Imm32(CR_SO << FPRF_SHIFT));
if (a != b) if (a != b)
{ {
continue2 = J(); continue2 = J();
SetJumpTarget(pGreater); SetJumpTarget(pGreater);
MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_GT))); MOV(64, R(RSCRATCH), Imm64(PPCCRToInternal(CR_GT)));
if (fprf) if (fprf)
OR(32, M(&FPSCR), Imm32(CR_GT << FPRF_SHIFT)); OR(32, PPCSTATE(fpscr), Imm32(CR_GT << FPRF_SHIFT));
continue3 = J(); continue3 = J();
SetJumpTarget(pLesser); SetJumpTarget(pLesser);
MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_LT))); MOV(64, R(RSCRATCH), Imm64(PPCCRToInternal(CR_LT)));
if (fprf) if (fprf)
OR(32, M(&FPSCR), Imm32(CR_LT << FPRF_SHIFT)); OR(32, PPCSTATE(fpscr), Imm32(CR_LT << FPRF_SHIFT));
} }
SetJumpTarget(continue1); SetJumpTarget(continue1);
@ -305,7 +305,7 @@ void Jit64::fcmpx(UGeckoInstruction inst)
SetJumpTarget(continue3); SetJumpTarget(continue3);
} }
MOV(64, M(&PowerPC::ppcState.cr_val[crf]), R(RAX)); MOV(64, PPCSTATE(cr_val[crf]), R(RSCRATCH));
fpr.UnlockAll(); fpr.UnlockAll();
} }
@ -375,8 +375,7 @@ void Jit64::frsqrtex(UGeckoInstruction inst)
int b = inst.FB; int b = inst.FB;
int d = inst.FD; int d = inst.FD;
// rsqrtex requires ECX and EDX free gpr.FlushLockX(RSCRATCH_EXTRA);
gpr.FlushLockX(ECX, EDX);
fpr.Lock(b, d); fpr.Lock(b, d);
fpr.BindToRegister(d, d == b); fpr.BindToRegister(d, d == b);
MOVSD(XMM0, fpr.R(b)); MOVSD(XMM0, fpr.R(b));
@ -395,8 +394,7 @@ void Jit64::fresx(UGeckoInstruction inst)
int b = inst.FB; int b = inst.FB;
int d = inst.FD; int d = inst.FD;
// resx requires ECX and EDX free gpr.FlushLockX(RSCRATCH_EXTRA);
gpr.FlushLockX(ECX, EDX);
fpr.Lock(b, d); fpr.Lock(b, d);
fpr.BindToRegister(d, d == b); fpr.BindToRegister(d, d == b);
MOVSD(XMM0, fpr.R(b)); MOVSD(XMM0, fpr.R(b));

View File

@ -21,12 +21,12 @@ void Jit64::GenerateConstantOverflow(bool overflow)
if (overflow) if (overflow)
{ {
//XER[OV/SO] = 1 //XER[OV/SO] = 1
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK));
} }
else else
{ {
//XER[OV] = 0 //XER[OV] = 0
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_OV_MASK)); AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_OV_MASK));
} }
} }
@ -34,11 +34,11 @@ void Jit64::GenerateOverflow()
{ {
FixupBranch jno = J_CC(CC_NO); FixupBranch jno = J_CC(CC_NO);
//XER[OV/SO] = 1 //XER[OV/SO] = 1
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK));
FixupBranch exit = J(); FixupBranch exit = J();
SetJumpTarget(jno); SetJumpTarget(jno);
//XER[OV] = 0 //XER[OV] = 0
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_OV_MASK)); AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_OV_MASK));
SetJumpTarget(exit); SetJumpTarget(exit);
} }
@ -54,7 +54,7 @@ void Jit64::FinalizeCarryOverflow(bool oe, bool inv)
JitSetCA(); JitSetCA();
SetJumpTarget(carry1); SetJumpTarget(carry1);
//XER[OV/SO] = 1 //XER[OV/SO] = 1
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK));
FixupBranch exit = J(); FixupBranch exit = J();
SetJumpTarget(jno); SetJumpTarget(jno);
// Do carry // Do carry
@ -72,14 +72,14 @@ void Jit64::FinalizeCarryOverflow(bool oe, bool inv)
} }
} }
void Jit64::GetCarryEAXAndClear() void Jit64::GetCarryRSCRATCHAndClear()
{ {
MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); MOV(32, R(RSCRATCH), PPCSTATE(spr[SPR_XER]));
BTR(32, R(EAX), Imm8(29)); BTR(32, R(RSCRATCH), Imm8(29));
} }
// Assumes that XER is in EAX and that the CA bit is clear. // Assumes that XER is in RSCRATCH and that the CA bit is clear.
void Jit64::FinalizeCarryGenerateOverflowEAX(bool oe, bool inv) void Jit64::FinalizeCarryGenerateOverflowRSCRATCH(bool oe, bool inv)
{ {
// USES_XER // USES_XER
if (oe) if (oe)
@ -87,29 +87,29 @@ void Jit64::FinalizeCarryGenerateOverflowEAX(bool oe, bool inv)
FixupBranch jno = J_CC(CC_NO); FixupBranch jno = J_CC(CC_NO);
// Do carry // Do carry
FixupBranch carry1 = J_CC(inv ? CC_C : CC_NC); FixupBranch carry1 = J_CC(inv ? CC_C : CC_NC);
OR(32, R(EAX), Imm32(XER_CA_MASK)); OR(32, R(RSCRATCH), Imm32(XER_CA_MASK));
SetJumpTarget(carry1); SetJumpTarget(carry1);
//XER[OV/SO] = 1 //XER[OV/SO] = 1
OR(32, R(EAX), Imm32(XER_SO_MASK | XER_OV_MASK)); OR(32, R(RSCRATCH), Imm32(XER_SO_MASK | XER_OV_MASK));
FixupBranch exit = J(); FixupBranch exit = J();
SetJumpTarget(jno); SetJumpTarget(jno);
// Do carry // Do carry
FixupBranch carry2 = J_CC(inv ? CC_C : CC_NC); FixupBranch carry2 = J_CC(inv ? CC_C : CC_NC);
OR(32, R(EAX), Imm32(XER_CA_MASK)); OR(32, R(RSCRATCH), Imm32(XER_CA_MASK));
SetJumpTarget(carry2); SetJumpTarget(carry2);
//XER[OV] = 0 //XER[OV] = 0
AND(32, R(EAX), Imm32(~XER_OV_MASK)); AND(32, R(RSCRATCH), Imm32(~XER_OV_MASK));
SetJumpTarget(exit); SetJumpTarget(exit);
} }
else else
{ {
// Do carry // Do carry
FixupBranch carry1 = J_CC(inv ? CC_C : CC_NC); FixupBranch carry1 = J_CC(inv ? CC_C : CC_NC);
OR(32, R(EAX), Imm32(XER_CA_MASK)); OR(32, R(RSCRATCH), Imm32(XER_CA_MASK));
SetJumpTarget(carry1); SetJumpTarget(carry1);
} }
// Dump EAX back into XER // Dump RSCRATCH back into XER
MOV(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(EAX)); MOV(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH));
} }
// Assumes that the flags were just set through an addition. // Assumes that the flags were just set through an addition.
@ -117,10 +117,10 @@ void Jit64::GenerateCarry()
{ {
// USES_XER // USES_XER
FixupBranch pNoCarry = J_CC(CC_NC); FixupBranch pNoCarry = J_CC(CC_NC);
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_CA_MASK)); OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_CA_MASK));
FixupBranch pContinue = J(); FixupBranch pContinue = J();
SetJumpTarget(pNoCarry); SetJumpTarget(pNoCarry);
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~(XER_CA_MASK))); AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~(XER_CA_MASK)));
SetJumpTarget(pContinue); SetJumpTarget(pContinue);
} }
@ -128,12 +128,12 @@ void Jit64::ComputeRC(const Gen::OpArg & arg)
{ {
if (arg.IsImm()) if (arg.IsImm())
{ {
MOV(64, M(&PowerPC::ppcState.cr_val[0]), Imm32((s32)arg.offset)); MOV(64, PPCSTATE(cr_val[0]), Imm32((s32)arg.offset));
} }
else else
{ {
MOVSX(64, 32, RAX, arg); MOVSX(64, 32, RSCRATCH, arg);
MOV(64, M(&PowerPC::ppcState.cr_val[0]), R(RAX)); MOV(64, PPCSTATE(cr_val[0]), R(RSCRATCH));
} }
} }
@ -374,8 +374,8 @@ void Jit64::cmpXX(UGeckoInstruction inst)
else else
compareResult = CR_LT; compareResult = CR_LT;
} }
MOV(64, R(RAX), Imm64(PPCCRToInternal(compareResult))); MOV(64, R(RSCRATCH), Imm64(PPCCRToInternal(compareResult)));
MOV(64, M(&PowerPC::ppcState.cr_val[crf]), R(RAX)); MOV(64, PPCSTATE(cr_val[crf]), R(RSCRATCH));
gpr.UnlockAll(); gpr.UnlockAll();
if (merge_branch) if (merge_branch)
@ -393,7 +393,7 @@ void Jit64::cmpXX(UGeckoInstruction inst)
if (js.next_inst.OPCD == 16) // bcx if (js.next_inst.OPCD == 16) // bcx
{ {
if (js.next_inst.LK) if (js.next_inst.LK)
MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4));
u32 destination; u32 destination;
if (js.next_inst.AA) if (js.next_inst.AA)
@ -405,17 +405,17 @@ void Jit64::cmpXX(UGeckoInstruction inst)
else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx
{ {
if (js.next_inst.LK) if (js.next_inst.LK)
MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4));
MOV(32, R(EAX), M(&CTR)); MOV(32, R(RSCRATCH), PPCSTATE_CTR);
AND(32, R(EAX), Imm32(0xFFFFFFFC)); AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
WriteExitDestInEAX(); WriteExitDestInRSCRATCH();
} }
else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx
{ {
MOV(32, R(EAX), M(&LR)); MOV(32, R(RSCRATCH), PPCSTATE_LR);
if (js.next_inst.LK) if (js.next_inst.LK)
MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4));
WriteExitDestInEAX(); WriteExitDestInRSCRATCH();
} }
else else
{ {
@ -436,32 +436,32 @@ void Jit64::cmpXX(UGeckoInstruction inst)
if (signedCompare) if (signedCompare)
{ {
if (gpr.R(a).IsImm()) if (gpr.R(a).IsImm())
MOV(64, R(RAX), Imm32((s32)gpr.R(a).offset)); MOV(64, R(RSCRATCH), Imm32((s32)gpr.R(a).offset));
else else
MOVSX(64, 32, RAX, gpr.R(a)); MOVSX(64, 32, RSCRATCH, gpr.R(a));
if (!comparand.IsImm()) if (!comparand.IsImm())
{ {
MOVSX(64, 32, ABI_PARAM1, comparand); MOVSX(64, 32, RSCRATCH2, comparand);
comparand = R(ABI_PARAM1); comparand = R(RSCRATCH2);
} }
} }
else else
{ {
if (gpr.R(a).IsImm()) if (gpr.R(a).IsImm())
MOV(32, R(RAX), Imm32((u32)gpr.R(a).offset)); MOV(32, R(RSCRATCH), Imm32((u32)gpr.R(a).offset));
else else
MOVZX(64, 32, RAX, gpr.R(a)); MOVZX(64, 32, RSCRATCH, gpr.R(a));
if (comparand.IsImm()) if (comparand.IsImm())
MOV(32, R(ABI_PARAM1), comparand); MOV(32, R(RSCRATCH2), comparand);
else else
MOVZX(64, 32, ABI_PARAM1, comparand); MOVZX(64, 32, RSCRATCH2, comparand);
comparand = R(ABI_PARAM1); comparand = R(RSCRATCH2);
} }
SUB(64, R(RAX), comparand); SUB(64, R(RSCRATCH), comparand);
MOV(64, M(&PowerPC::ppcState.cr_val[crf]), R(RAX)); MOV(64, PPCSTATE(cr_val[crf]), R(RSCRATCH));
if (merge_branch) if (merge_branch)
{ {
@ -492,7 +492,7 @@ void Jit64::cmpXX(UGeckoInstruction inst)
if (js.next_inst.OPCD == 16) // bcx if (js.next_inst.OPCD == 16) // bcx
{ {
if (js.next_inst.LK) if (js.next_inst.LK)
MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4));
u32 destination; u32 destination;
if (js.next_inst.AA) if (js.next_inst.AA)
@ -504,21 +504,21 @@ void Jit64::cmpXX(UGeckoInstruction inst)
else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx
{ {
if (js.next_inst.LK) if (js.next_inst.LK)
MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4));
MOV(32, R(EAX), M(&CTR)); MOV(32, R(RSCRATCH), PPCSTATE_CTR);
AND(32, R(EAX), Imm32(0xFFFFFFFC)); AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
WriteExitDestInEAX(); WriteExitDestInRSCRATCH();
} }
else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx
{ {
MOV(32, R(EAX), M(&LR)); MOV(32, R(RSCRATCH), PPCSTATE_LR);
AND(32, R(EAX), Imm32(0xFFFFFFFC)); AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
if (js.next_inst.LK) if (js.next_inst.LK)
MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4));
WriteExitDestInEAX(); WriteExitDestInRSCRATCH();
} }
else else
{ {
@ -636,9 +636,9 @@ void Jit64::boolX(UGeckoInstruction inst)
} }
else else
{ {
MOV(32, R(EAX), operand); MOV(32, R(RSCRATCH), operand);
NOT(32, R(EAX)); NOT(32, R(RSCRATCH));
AND(32, gpr.R(a), R(EAX)); AND(32, gpr.R(a), R(RSCRATCH));
} }
} }
else if (inst.SUBOP10 == 444) // orx else if (inst.SUBOP10 == 444) // orx
@ -659,9 +659,9 @@ void Jit64::boolX(UGeckoInstruction inst)
} }
else else
{ {
MOV(32, R(EAX), operand); MOV(32, R(RSCRATCH), operand);
NOT(32, R(EAX)); NOT(32, R(RSCRATCH));
OR(32, gpr.R(a), R(EAX)); OR(32, gpr.R(a), R(RSCRATCH));
} }
} }
else if (inst.SUBOP10 == 316) // xorx else if (inst.SUBOP10 == 316) // xorx
@ -755,11 +755,7 @@ void Jit64::extsbx(UGeckoInstruction inst)
{ {
gpr.Lock(a, s); gpr.Lock(a, s);
gpr.BindToRegister(a, a == s, true); gpr.BindToRegister(a, a == s, true);
// Always force moving to EAX because it isn't possible MOVSX(32, 8, gpr.RX(a), gpr.R(s));
// to refer to the lowest byte of some registers, at least in
// 32-bit mode.
MOV(32, R(EAX), gpr.R(s));
MOVSX(32, 8, gpr.RX(a), R(AL)); // watch out for ah and friends
gpr.UnlockAll(); gpr.UnlockAll();
} }
@ -863,9 +859,9 @@ void Jit64::subfcx(UGeckoInstruction inst)
} }
else if (d == a) else if (d == a)
{ {
MOV(32, R(EAX), gpr.R(a)); MOV(32, R(RSCRATCH), gpr.R(a));
MOV(32, gpr.R(d), gpr.R(b)); MOV(32, gpr.R(d), gpr.R(b));
SUB(32, gpr.R(d), R(EAX)); SUB(32, gpr.R(d), R(RSCRATCH));
} }
else else
{ {
@ -887,7 +883,7 @@ void Jit64::subfex(UGeckoInstruction inst)
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
gpr.BindToRegister(d, (d == a || d == b), true); gpr.BindToRegister(d, (d == a || d == b), true);
GetCarryEAXAndClear(); GetCarryRSCRATCHAndClear();
bool invertedCarry = false; bool invertedCarry = false;
if (d == b) if (d == b)
@ -908,7 +904,7 @@ void Jit64::subfex(UGeckoInstruction inst)
NOT(32, gpr.R(d)); NOT(32, gpr.R(d));
ADC(32, gpr.R(d), gpr.R(b)); ADC(32, gpr.R(d), gpr.R(b));
} }
FinalizeCarryGenerateOverflowEAX(inst.OE, invertedCarry); FinalizeCarryGenerateOverflowRSCRATCH(inst.OE, invertedCarry);
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
@ -924,14 +920,14 @@ void Jit64::subfmex(UGeckoInstruction inst)
gpr.Lock(a, d); gpr.Lock(a, d);
gpr.BindToRegister(d, d == a); gpr.BindToRegister(d, d == a);
GetCarryEAXAndClear(); GetCarryRSCRATCHAndClear();
if (d != a) if (d != a)
{ {
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
} }
NOT(32, gpr.R(d)); NOT(32, gpr.R(d));
ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); ADC(32, gpr.R(d), Imm32(0xFFFFFFFF));
FinalizeCarryGenerateOverflowEAX(inst.OE); FinalizeCarryGenerateOverflowRSCRATCH(inst.OE);
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
gpr.UnlockAll(); gpr.UnlockAll();
@ -947,14 +943,14 @@ void Jit64::subfzex(UGeckoInstruction inst)
gpr.Lock(a, d); gpr.Lock(a, d);
gpr.BindToRegister(d, d == a); gpr.BindToRegister(d, d == a);
GetCarryEAXAndClear(); GetCarryRSCRATCHAndClear();
if (d != a) if (d != a)
{ {
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
} }
NOT(32, gpr.R(d)); NOT(32, gpr.R(d));
ADC(32, gpr.R(d), Imm8(0)); ADC(32, gpr.R(d), Imm8(0));
FinalizeCarryGenerateOverflowEAX(inst.OE); FinalizeCarryGenerateOverflowRSCRATCH(inst.OE);
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
@ -990,9 +986,9 @@ void Jit64::subfx(UGeckoInstruction inst)
} }
else if (d == a) else if (d == a)
{ {
MOV(32, R(EAX), gpr.R(a)); MOV(32, R(RSCRATCH), gpr.R(a));
MOV(32, gpr.R(d), gpr.R(b)); MOV(32, gpr.R(d), gpr.R(b));
SUB(32, gpr.R(d), R(EAX)); SUB(32, gpr.R(d), R(RSCRATCH));
} }
else else
{ {
@ -1170,11 +1166,10 @@ void Jit64::mulhwXx(UGeckoInstruction inst)
} }
else else
{ {
gpr.FlushLockX(EDX);
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
// no register choice
gpr.FlushLockX(EDX, EAX);
gpr.BindToRegister(d, (d == a || d == b), true); gpr.BindToRegister(d, (d == a || d == b), true);
if (gpr.RX(d) == EDX)
PanicAlert("mulhwux : WTF");
MOV(32, R(EAX), gpr.R(a)); MOV(32, R(EAX), gpr.R(a));
gpr.KillImmediate(b, true, false); gpr.KillImmediate(b, true, false);
if (sign) if (sign)
@ -1253,11 +1248,11 @@ void Jit64::divwux(UGeckoInstruction inst)
// If failed, use slower round-down method // If failed, use slower round-down method
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
gpr.BindToRegister(d, d == a, true); gpr.BindToRegister(d, d == a, true);
MOV(32, R(EAX), Imm32(magic)); MOV(32, R(RSCRATCH), Imm32(magic));
if (d != a) if (d != a)
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
IMUL(64, gpr.RX(d), R(RAX)); IMUL(64, gpr.RX(d), R(RSCRATCH));
ADD(64, gpr.R(d), R(RAX)); ADD(64, gpr.R(d), R(RSCRATCH));
SHR(64, gpr.R(d), Imm8(shift+32)); SHR(64, gpr.R(d), Imm8(shift+32));
} }
else else
@ -1268,8 +1263,8 @@ void Jit64::divwux(UGeckoInstruction inst)
gpr.BindToRegister(d, false, true); gpr.BindToRegister(d, false, true);
if (d == a) if (d == a)
{ {
MOV(32, R(EAX), Imm32(magic+1)); MOV(32, R(RSCRATCH), Imm32(magic+1));
IMUL(64, gpr.RX(d), R(RAX)); IMUL(64, gpr.RX(d), R(RSCRATCH));
} }
else else
{ {
@ -1288,8 +1283,9 @@ void Jit64::divwux(UGeckoInstruction inst)
} }
else else
{ {
gpr.FlushLockX(EDX);
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
// no register choice (do we need to do this?)
gpr.FlushLockX(EAX, EDX);
gpr.BindToRegister(d, (d == a || d == b), true); gpr.BindToRegister(d, (d == a || d == b), true);
MOV(32, R(EAX), gpr.R(a)); MOV(32, R(EAX), gpr.R(a));
XOR(32, R(EDX), R(EDX)); XOR(32, R(EDX), R(EDX));
@ -1301,7 +1297,7 @@ void Jit64::divwux(UGeckoInstruction inst)
{ {
GenerateConstantOverflow(true); GenerateConstantOverflow(true);
} }
//MOV(32, R(EAX), gpr.R(d)); //MOV(32, R(RAX), gpr.R(d));
FixupBranch end = J(); FixupBranch end = J();
SetJumpTarget(not_div_by_zero); SetJumpTarget(not_div_by_zero);
DIV(32, gpr.R(b)); DIV(32, gpr.R(b));
@ -1349,8 +1345,9 @@ void Jit64::divwx(UGeckoInstruction inst)
} }
else else
{ {
gpr.FlushLockX(EDX);
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
// no register choice
gpr.FlushLockX(EAX, EDX);
gpr.BindToRegister(d, (d == a || d == b), true); gpr.BindToRegister(d, (d == a || d == b), true);
MOV(32, R(EAX), gpr.R(a)); MOV(32, R(EAX), gpr.R(a));
CDQ(); CDQ();
@ -1459,9 +1456,9 @@ void Jit64::addex(UGeckoInstruction inst)
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
gpr.BindToRegister(d, true); gpr.BindToRegister(d, true);
GetCarryEAXAndClear(); GetCarryRSCRATCHAndClear();
ADC(32, gpr.R(d), gpr.R((d == a) ? b : a)); ADC(32, gpr.R(d), gpr.R((d == a) ? b : a));
FinalizeCarryGenerateOverflowEAX(inst.OE); FinalizeCarryGenerateOverflowRSCRATCH(inst.OE);
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
gpr.UnlockAll(); gpr.UnlockAll();
@ -1471,10 +1468,10 @@ void Jit64::addex(UGeckoInstruction inst)
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
gpr.BindToRegister(d, false); gpr.BindToRegister(d, false);
GetCarryEAXAndClear(); GetCarryRSCRATCHAndClear();
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
ADC(32, gpr.R(d), gpr.R(b)); ADC(32, gpr.R(d), gpr.R(b));
FinalizeCarryGenerateOverflowEAX(inst.OE); FinalizeCarryGenerateOverflowRSCRATCH(inst.OE);
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
gpr.UnlockAll(); gpr.UnlockAll();
@ -1525,9 +1522,9 @@ void Jit64::addmex(UGeckoInstruction inst)
gpr.Lock(d); gpr.Lock(d);
gpr.BindToRegister(d, true); gpr.BindToRegister(d, true);
GetCarryEAXAndClear(); GetCarryRSCRATCHAndClear();
ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); ADC(32, gpr.R(d), Imm32(0xFFFFFFFF));
FinalizeCarryGenerateOverflowEAX(inst.OE); FinalizeCarryGenerateOverflowRSCRATCH(inst.OE);
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
gpr.UnlockAll(); gpr.UnlockAll();
@ -1537,10 +1534,10 @@ void Jit64::addmex(UGeckoInstruction inst)
gpr.Lock(a, d); gpr.Lock(a, d);
gpr.BindToRegister(d, false); gpr.BindToRegister(d, false);
GetCarryEAXAndClear(); GetCarryRSCRATCHAndClear();
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); ADC(32, gpr.R(d), Imm32(0xFFFFFFFF));
FinalizeCarryGenerateOverflowEAX(inst.OE); FinalizeCarryGenerateOverflowRSCRATCH(inst.OE);
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
gpr.UnlockAll(); gpr.UnlockAll();
@ -1559,9 +1556,9 @@ void Jit64::addzex(UGeckoInstruction inst)
gpr.Lock(d); gpr.Lock(d);
gpr.BindToRegister(d, true); gpr.BindToRegister(d, true);
GetCarryEAXAndClear(); GetCarryRSCRATCHAndClear();
ADC(32, gpr.R(d), Imm8(0)); ADC(32, gpr.R(d), Imm8(0));
FinalizeCarryGenerateOverflowEAX(inst.OE); FinalizeCarryGenerateOverflowRSCRATCH(inst.OE);
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
gpr.UnlockAll(); gpr.UnlockAll();
@ -1571,10 +1568,10 @@ void Jit64::addzex(UGeckoInstruction inst)
gpr.Lock(a, d); gpr.Lock(a, d);
gpr.BindToRegister(d, false); gpr.BindToRegister(d, false);
GetCarryEAXAndClear(); GetCarryRSCRATCHAndClear();
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
ADC(32, gpr.R(d), Imm8(0)); ADC(32, gpr.R(d), Imm8(0));
FinalizeCarryGenerateOverflowEAX(inst.OE); FinalizeCarryGenerateOverflowRSCRATCH(inst.OE);
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
gpr.UnlockAll(); gpr.UnlockAll();
@ -1692,25 +1689,25 @@ void Jit64::rlwimix(UGeckoInstruction inst)
{ {
if (mask == 0U - (1U << inst.SH)) if (mask == 0U - (1U << inst.SH))
{ {
MOV(32, R(EAX), gpr.R(s)); MOV(32, R(RSCRATCH), gpr.R(s));
SHL(32, R(EAX), Imm8(inst.SH)); SHL(32, R(RSCRATCH), Imm8(inst.SH));
AND(32, gpr.R(a), Imm32(~mask)); AND(32, gpr.R(a), Imm32(~mask));
OR(32, gpr.R(a), R(EAX)); OR(32, gpr.R(a), R(RSCRATCH));
} }
else if (mask == (1U << inst.SH) - 1) else if (mask == (1U << inst.SH) - 1)
{ {
MOV(32, R(EAX), gpr.R(s)); MOV(32, R(RSCRATCH), gpr.R(s));
SHR(32, R(EAX), Imm8(32-inst.SH)); SHR(32, R(RSCRATCH), Imm8(32-inst.SH));
AND(32, gpr.R(a), Imm32(~mask)); AND(32, gpr.R(a), Imm32(~mask));
OR(32, gpr.R(a), R(EAX)); OR(32, gpr.R(a), R(RSCRATCH));
} }
else else
{ {
MOV(32, R(EAX), gpr.R(s)); MOV(32, R(RSCRATCH), gpr.R(s));
ROL(32, R(EAX), Imm8(inst.SH)); ROL(32, R(RSCRATCH), Imm8(inst.SH));
XOR(32, R(EAX), gpr.R(a)); XOR(32, R(RSCRATCH), gpr.R(a));
AND(32, R(EAX), Imm32(mask)); AND(32, R(RSCRATCH), Imm32(mask));
XOR(32, gpr.R(a), R(EAX)); XOR(32, gpr.R(a), R(RSCRATCH));
} }
if (inst.Rc) if (inst.Rc)
@ -1745,6 +1742,7 @@ void Jit64::rlwnmx(UGeckoInstruction inst)
} }
else else
{ {
// no register choice
gpr.FlushLockX(ECX); gpr.FlushLockX(ECX);
gpr.Lock(a, b, s); gpr.Lock(a, b, s);
gpr.BindToRegister(a, (a == b || a == s), true); gpr.BindToRegister(a, (a == b || a == s), true);
@ -1812,6 +1810,7 @@ void Jit64::srwx(UGeckoInstruction inst)
} }
else else
{ {
// no register choice
gpr.FlushLockX(ECX); gpr.FlushLockX(ECX);
gpr.Lock(a, b, s); gpr.Lock(a, b, s);
gpr.BindToRegister(a, (a == b || a == s), true); gpr.BindToRegister(a, (a == b || a == s), true);
@ -1850,6 +1849,7 @@ void Jit64::slwx(UGeckoInstruction inst)
} }
else else
{ {
// no register choice
gpr.FlushLockX(ECX); gpr.FlushLockX(ECX);
gpr.Lock(a, b, s); gpr.Lock(a, b, s);
gpr.BindToRegister(a, (a == b || a == s), true); gpr.BindToRegister(a, (a == b || a == s), true);
@ -1881,8 +1881,8 @@ void Jit64::srawx(UGeckoInstruction inst)
int a = inst.RA; int a = inst.RA;
int b = inst.RB; int b = inst.RB;
int s = inst.RS; int s = inst.RS;
gpr.Lock(a, s, b);
gpr.FlushLockX(ECX); gpr.FlushLockX(ECX);
gpr.Lock(a, s, b);
gpr.BindToRegister(a, (a == s || a == b), true); gpr.BindToRegister(a, (a == s || a == b), true);
JitClearCA(); JitClearCA();
MOV(32, R(ECX), gpr.R(b)); MOV(32, R(ECX), gpr.R(b));
@ -1890,9 +1890,9 @@ void Jit64::srawx(UGeckoInstruction inst)
MOV(32, gpr.R(a), gpr.R(s)); MOV(32, gpr.R(a), gpr.R(s));
SHL(64, gpr.R(a), Imm8(32)); SHL(64, gpr.R(a), Imm8(32));
SAR(64, gpr.R(a), R(ECX)); SAR(64, gpr.R(a), R(ECX));
MOV(32, R(EAX), gpr.R(a)); MOV(32, R(RSCRATCH), gpr.R(a));
SHR(64, gpr.R(a), Imm8(32)); SHR(64, gpr.R(a), Imm8(32));
TEST(32, gpr.R(a), R(EAX)); TEST(32, gpr.R(a), R(RSCRATCH));
FixupBranch nocarry = J_CC(CC_Z); FixupBranch nocarry = J_CC(CC_Z);
JitSetCA(); JitSetCA();
SetJumpTarget(nocarry); SetJumpTarget(nocarry);
@ -1917,16 +1917,16 @@ void Jit64::srawix(UGeckoInstruction inst)
gpr.Lock(a, s); gpr.Lock(a, s);
gpr.BindToRegister(a, a == s, true); gpr.BindToRegister(a, a == s, true);
JitClearCA(); JitClearCA();
MOV(32, R(EAX), gpr.R(s)); MOV(32, R(RSCRATCH), gpr.R(s));
if (a != s) if (a != s)
{ {
MOV(32, gpr.R(a), R(EAX)); MOV(32, gpr.R(a), R(RSCRATCH));
} }
SAR(32, gpr.R(a), Imm8(amount)); SAR(32, gpr.R(a), Imm8(amount));
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(a)); ComputeRC(gpr.R(a));
SHL(32, R(EAX), Imm8(32-amount)); SHL(32, R(RSCRATCH), Imm8(32-amount));
TEST(32, R(EAX), gpr.R(a)); TEST(32, R(RSCRATCH), gpr.R(a));
FixupBranch nocarry = J_CC(CC_Z); FixupBranch nocarry = J_CC(CC_Z);
JitSetCA(); JitSetCA();
SetJumpTarget(nocarry); SetJumpTarget(nocarry);
@ -2020,7 +2020,7 @@ void Jit64::twx(UGeckoInstruction inst)
SetJumpTarget(fixup); SetJumpTarget(fixup);
} }
LOCK(); LOCK();
OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_PROGRAM)); OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_PROGRAM));
gpr.Flush(FLUSH_MAINTAIN_STATE); gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE);

View File

@ -123,7 +123,7 @@ void Jit64::lXXx(UGeckoInstruction inst)
ABI_PopRegistersAndAdjustStack(registersInUse, false); ABI_PopRegistersAndAdjustStack(registersInUse, false);
// ! we must continue executing of the loop after exception handling, maybe there is still 0 in r0 // ! we must continue executing of the loop after exception handling, maybe there is still 0 in r0
//MOV(32, M(&PowerPC::ppcState.pc), Imm32(js.compilerPC)); //MOV(32, PPCSTATE(pc), Imm32(js.compilerPC));
WriteExceptionExit(); WriteExceptionExit();
SetJumpTarget(noIdle); SetJumpTarget(noIdle);
@ -197,14 +197,13 @@ void Jit64::lXXx(UGeckoInstruction inst)
else else
{ {
// In this case we need an extra temporary register. // In this case we need an extra temporary register.
gpr.FlushLockX(ABI_PARAM1); opAddress = R(RSCRATCH2);
opAddress = R(ABI_PARAM1);
storeAddress = true; storeAddress = true;
if (use_constant_offset) if (use_constant_offset)
{ {
if (gpr.R(a).IsSimpleReg() && offset != 0) if (gpr.R(a).IsSimpleReg() && offset != 0)
{ {
LEA(32, ABI_PARAM1, MDisp(gpr.RX(a), offset)); LEA(32, RSCRATCH2, MDisp(gpr.RX(a), offset));
} }
else else
{ {
@ -215,7 +214,7 @@ void Jit64::lXXx(UGeckoInstruction inst)
} }
else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
{ {
LEA(32, ABI_PARAM1, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); LEA(32, RSCRATCH2, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
} }
else else
{ {
@ -228,7 +227,13 @@ void Jit64::lXXx(UGeckoInstruction inst)
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
gpr.BindToRegister(d, js.memcheck, true); gpr.BindToRegister(d, js.memcheck, true);
SafeLoadToReg(gpr.RX(d), opAddress, accessSize, loadOffset, CallerSavedRegistersInUse(), signExtend); u32 registersInUse = CallerSavedRegistersInUse();
if (update && storeAddress)
{
// We need to save the (usually scratch) address register for the update.
registersInUse |= (1 << RSCRATCH2);
}
SafeLoadToReg(gpr.RX(d), opAddress, accessSize, loadOffset, registersInUse, signExtend);
if (update && storeAddress) if (update && storeAddress)
{ {
@ -269,11 +274,11 @@ void Jit64::dcbz(UGeckoInstruction inst)
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack) if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack)
mem_mask |= Memory::ADDR_MASK_MEM1; mem_mask |= Memory::ADDR_MASK_MEM1;
MOV(32, R(EAX), gpr.R(b)); MOV(32, R(RSCRATCH), gpr.R(b));
if (a) if (a)
ADD(32, R(EAX), gpr.R(a)); ADD(32, R(RSCRATCH), gpr.R(a));
AND(32, R(EAX), Imm32(~31)); AND(32, R(RSCRATCH), Imm32(~31));
TEST(32, R(EAX), Imm32(mem_mask)); TEST(32, R(RSCRATCH), Imm32(mem_mask));
FixupBranch fast = J_CC(CC_Z, true); FixupBranch fast = J_CC(CC_Z, true);
// Should this code ever run? I can't find any games that use DCBZ on non-physical addresses, but // Should this code ever run? I can't find any games that use DCBZ on non-physical addresses, but
@ -281,14 +286,14 @@ void Jit64::dcbz(UGeckoInstruction inst)
MOV(32, M(&PC), Imm32(jit->js.compilerPC)); MOV(32, M(&PC), Imm32(jit->js.compilerPC));
u32 registersInUse = CallerSavedRegistersInUse(); u32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, false); ABI_PushRegistersAndAdjustStack(registersInUse, false);
ABI_CallFunctionR((void *)&Memory::ClearCacheLine, EAX); ABI_CallFunctionR((void *)&Memory::ClearCacheLine, RSCRATCH);
ABI_PopRegistersAndAdjustStack(registersInUse, false); ABI_PopRegistersAndAdjustStack(registersInUse, false);
FixupBranch exit = J(); FixupBranch exit = J();
SetJumpTarget(fast); SetJumpTarget(fast);
PXOR(XMM0, R(XMM0)); PXOR(XMM0, R(XMM0));
MOVAPS(MComplex(RBX, RAX, SCALE_1, 0), XMM0); MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 0), XMM0);
MOVAPS(MComplex(RBX, RAX, SCALE_1, 16), XMM0); MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 16), XMM0);
SetJumpTarget(exit); SetJumpTarget(exit);
} }
@ -331,10 +336,9 @@ void Jit64::stX(UGeckoInstruction inst)
if ((addr & 0xFFFFF000) == 0xCC008000 && jo.optimizeGatherPipe) if ((addr & 0xFFFFF000) == 0xCC008000 && jo.optimizeGatherPipe)
{ {
// Helps external systems know which instruction triggered the write // Helps external systems know which instruction triggered the write
MOV(32, M(&PC), Imm32(jit->js.compilerPC)); MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC));
gpr.FlushLockX(ABI_PARAM1); MOV(32, R(RSCRATCH2), gpr.R(s));
MOV(32, R(ABI_PARAM1), gpr.R(s));
if (update) if (update)
gpr.SetImmediate32(a, addr); gpr.SetImmediate32(a, addr);
@ -358,8 +362,8 @@ void Jit64::stX(UGeckoInstruction inst)
} }
else if (Memory::IsRAMAddress(addr)) else if (Memory::IsRAMAddress(addr))
{ {
MOV(32, R(EAX), gpr.R(s)); MOV(32, R(RSCRATCH), gpr.R(s));
WriteToConstRamAddress(accessSize, EAX, addr, true); WriteToConstRamAddress(accessSize, RSCRATCH, addr, true);
if (update) if (update)
gpr.SetImmediate32(a, addr); gpr.SetImmediate32(a, addr);
return; return;
@ -367,7 +371,7 @@ void Jit64::stX(UGeckoInstruction inst)
else else
{ {
// Helps external systems know which instruction triggered the write // Helps external systems know which instruction triggered the write
MOV(32, M(&PC), Imm32(jit->js.compilerPC)); MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC));
u32 registersInUse = CallerSavedRegistersInUse(); u32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, false); ABI_PushRegistersAndAdjustStack(registersInUse, false);
@ -390,24 +394,31 @@ void Jit64::stX(UGeckoInstruction inst)
} }
} }
gpr.FlushLockX(ECX, EDX); gpr.Lock(a, s);
gpr.Lock(s, a); gpr.BindToRegister(a, true, false);
MOV(32, R(EDX), gpr.R(a)); X64Reg reg_value;
MOV(32, R(ECX), gpr.R(s)); if (WriteClobbersRegValue(accessSize, /* swap */ true))
SafeWriteRegToReg(ECX, EDX, accessSize, offset, CallerSavedRegistersInUse()); {
MOV(32, R(RSCRATCH2), gpr.R(s));
reg_value = RSCRATCH2;
}
else
{
gpr.BindToRegister(s, true, false);
reg_value = gpr.RX(s);
}
SafeWriteRegToReg(reg_value, gpr.RX(a), accessSize, offset, CallerSavedRegistersInUse(), SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR);
if (update && offset) if (update && offset)
{ {
gpr.KillImmediate(a, true, true);
MEMCHECK_START MEMCHECK_START
gpr.KillImmediate(a, true, true);
ADD(32, gpr.R(a), Imm32((u32)offset)); ADD(32, gpr.R(a), Imm32((u32)offset));
MEMCHECK_END MEMCHECK_END
} }
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX();
} }
else else
{ {
@ -424,24 +435,21 @@ void Jit64::stXx(UGeckoInstruction inst)
FALLBACK_IF(!a || a == s || a == b); FALLBACK_IF(!a || a == s || a == b);
gpr.Lock(a, b, s); gpr.Lock(a, b, s);
gpr.FlushLockX(ECX, EDX);
if (inst.SUBOP10 & 32) if (inst.SUBOP10 & 32)
{ {
MEMCHECK_START
gpr.BindToRegister(a, true, true); gpr.BindToRegister(a, true, true);
ADD(32, gpr.R(a), gpr.R(b)); ADD(32, gpr.R(a), gpr.R(b));
MOV(32, R(EDX), gpr.R(a)); MOV(32, R(RSCRATCH2), gpr.R(a));
MEMCHECK_END
} }
else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
{ {
LEA(32, EDX, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); LEA(32, RSCRATCH2, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
} }
else else
{ {
MOV(32, R(EDX), gpr.R(a)); MOV(32, R(RSCRATCH2), gpr.R(a));
ADD(32, R(EDX), gpr.R(b)); ADD(32, R(RSCRATCH2), gpr.R(b));
} }
int accessSize; int accessSize;
@ -462,8 +470,18 @@ void Jit64::stXx(UGeckoInstruction inst)
break; break;
} }
MOV(32, R(ECX), gpr.R(s)); X64Reg reg_value;
SafeWriteRegToReg(ECX, EDX, accessSize, 0, CallerSavedRegistersInUse()); if (WriteClobbersRegValue(accessSize, /* swap */ true))
{
MOV(32, R(RSCRATCH), gpr.R(s));
reg_value = RSCRATCH;
}
else
{
gpr.BindToRegister(s, true, false);
reg_value = gpr.RX(s);
}
SafeWriteRegToReg(reg_value, RSCRATCH2, accessSize, 0, CallerSavedRegistersInUse());
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
@ -476,15 +494,14 @@ void Jit64::lmw(UGeckoInstruction inst)
JITDISABLE(bJITLoadStoreOff); JITDISABLE(bJITLoadStoreOff);
// TODO: This doesn't handle rollback on DSI correctly // TODO: This doesn't handle rollback on DSI correctly
gpr.FlushLockX(ECX); MOV(32, R(RSCRATCH2), Imm32((u32)(s32)inst.SIMM_16));
MOV(32, R(ECX), Imm32((u32)(s32)inst.SIMM_16));
if (inst.RA) if (inst.RA)
ADD(32, R(ECX), gpr.R(inst.RA)); ADD(32, R(RSCRATCH2), gpr.R(inst.RA));
for (int i = inst.RD; i < 32; i++) for (int i = inst.RD; i < 32; i++)
{ {
SafeLoadToReg(EAX, R(ECX), 32, (i - inst.RD) * 4, CallerSavedRegistersInUse(), false); SafeLoadToReg(RSCRATCH, R(RSCRATCH2), 32, (i - inst.RD) * 4, CallerSavedRegistersInUse() | (1 << RSCRATCH_EXTRA), false);
gpr.BindToRegister(i, false, true); gpr.BindToRegister(i, false, true);
MOV(32, gpr.R(i), R(EAX)); MOV(32, gpr.R(i), R(RSCRATCH));
} }
gpr.UnlockAllX(); gpr.UnlockAllX();
} }
@ -495,15 +512,14 @@ void Jit64::stmw(UGeckoInstruction inst)
JITDISABLE(bJITLoadStoreOff); JITDISABLE(bJITLoadStoreOff);
// TODO: This doesn't handle rollback on DSI correctly // TODO: This doesn't handle rollback on DSI correctly
gpr.FlushLockX(ECX);
for (int i = inst.RD; i < 32; i++) for (int i = inst.RD; i < 32; i++)
{ {
if (inst.RA) if (inst.RA)
MOV(32, R(EAX), gpr.R(inst.RA)); MOV(32, R(RSCRATCH), gpr.R(inst.RA));
else else
XOR(32, R(EAX), R(EAX)); XOR(32, R(RSCRATCH), R(RSCRATCH));
MOV(32, R(ECX), gpr.R(i)); MOV(32, R(RSCRATCH2), gpr.R(i));
SafeWriteRegToReg(ECX, EAX, 32, (i - inst.RD) * 4 + (u32)(s32)inst.SIMM_16, CallerSavedRegistersInUse()); SafeWriteRegToReg(RSCRATCH2, RSCRATCH, 32, (i - inst.RD) * 4 + (u32)(s32)inst.SIMM_16, CallerSavedRegistersInUse());
} }
gpr.UnlockAllX(); gpr.UnlockAllX();
} }

View File

@ -42,9 +42,9 @@ void Jit64::lfXXX(UGeckoInstruction inst)
} }
else else
{ {
addr = R(EAX); addr = R(RSCRATCH);
if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
LEA(32, EAX, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); LEA(32, RSCRATCH, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
else else
{ {
MOV(32, addr, gpr.R(b)); MOV(32, addr, gpr.R(b));
@ -61,18 +61,18 @@ void Jit64::lfXXX(UGeckoInstruction inst)
offset = (s32)(s16)inst.SIMM_16; offset = (s32)(s16)inst.SIMM_16;
} }
SafeLoadToReg(RAX, addr, single ? 32 : 64, offset, CallerSavedRegistersInUse(), false); SafeLoadToReg(RSCRATCH, addr, single ? 32 : 64, offset, CallerSavedRegistersInUse(), false);
fpr.Lock(d); fpr.Lock(d);
fpr.BindToRegister(d, js.memcheck || !single); fpr.BindToRegister(d, js.memcheck || !single);
MEMCHECK_START MEMCHECK_START
if (single) if (single)
{ {
ConvertSingleToDouble(fpr.RX(d), EAX, true); ConvertSingleToDouble(fpr.RX(d), RSCRATCH, true);
} }
else else
{ {
MOVQ_xmm(XMM0, R(RAX)); MOVQ_xmm(XMM0, R(RSCRATCH));
MOVSD(fpr.RX(d), R(XMM0)); MOVSD(fpr.RX(d), R(XMM0));
} }
MEMCHECK_END MEMCHECK_END
@ -96,24 +96,23 @@ void Jit64::stfXXX(UGeckoInstruction inst)
FALLBACK_IF(!indexed && !a); FALLBACK_IF(!indexed && !a);
s32 offset = 0; s32 offset = 0;
gpr.FlushLockX(ABI_PARAM1);
if (indexed) if (indexed)
{ {
if (update) if (update)
{ {
gpr.BindToRegister(a, true, true); gpr.BindToRegister(a, true, true);
ADD(32, gpr.R(a), gpr.R(b)); ADD(32, gpr.R(a), gpr.R(b));
MOV(32, R(ABI_PARAM1), gpr.R(a)); MOV(32, R(RSCRATCH2), gpr.R(a));
} }
else else
{ {
if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
LEA(32, ABI_PARAM1, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); LEA(32, RSCRATCH2, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
else else
{ {
MOV(32, R(ABI_PARAM1), gpr.R(b)); MOV(32, R(RSCRATCH2), gpr.R(b));
if (a) if (a)
ADD(32, R(ABI_PARAM1), gpr.R(a)); ADD(32, R(RSCRATCH2), gpr.R(a));
} }
} }
} }
@ -128,23 +127,23 @@ void Jit64::stfXXX(UGeckoInstruction inst)
{ {
offset = (s32)(s16)inst.SIMM_16; offset = (s32)(s16)inst.SIMM_16;
} }
MOV(32, R(ABI_PARAM1), gpr.R(a)); MOV(32, R(RSCRATCH2), gpr.R(a));
} }
if (single) if (single)
{ {
fpr.BindToRegister(s, true, false); fpr.BindToRegister(s, true, false);
ConvertDoubleToSingle(XMM0, fpr.RX(s)); ConvertDoubleToSingle(XMM0, fpr.RX(s));
SafeWriteF32ToReg(XMM0, ABI_PARAM1, offset, CallerSavedRegistersInUse()); SafeWriteF32ToReg(XMM0, RSCRATCH2, offset, CallerSavedRegistersInUse());
fpr.UnlockAll(); fpr.UnlockAll();
} }
else else
{ {
if (fpr.R(s).IsSimpleReg()) if (fpr.R(s).IsSimpleReg())
MOVQ_xmm(R(RAX), fpr.RX(s)); MOVQ_xmm(R(RSCRATCH), fpr.RX(s));
else else
MOV(64, R(RAX), fpr.R(s)); MOV(64, R(RSCRATCH), fpr.R(s));
SafeWriteRegToReg(RAX, ABI_PARAM1, 64, offset, CallerSavedRegistersInUse()); SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 64, offset, CallerSavedRegistersInUse());
} }
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
@ -160,15 +159,14 @@ void Jit64::stfiwx(UGeckoInstruction inst)
int a = inst.RA; int a = inst.RA;
int b = inst.RB; int b = inst.RB;
gpr.FlushLockX(ABI_PARAM1); MOV(32, R(RSCRATCH2), gpr.R(b));
MOV(32, R(ABI_PARAM1), gpr.R(b));
if (a) if (a)
ADD(32, R(ABI_PARAM1), gpr.R(a)); ADD(32, R(RSCRATCH2), gpr.R(a));
if (fpr.R(s).IsSimpleReg()) if (fpr.R(s).IsSimpleReg())
MOVD_xmm(R(EAX), fpr.RX(s)); MOVD_xmm(R(RSCRATCH), fpr.RX(s));
else else
MOV(32, R(EAX), fpr.R(s)); MOV(32, R(RSCRATCH), fpr.R(s));
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, CallerSavedRegistersInUse()); SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 32, 0, CallerSavedRegistersInUse());
gpr.UnlockAllX(); gpr.UnlockAllX();
} }

View File

@ -28,37 +28,36 @@ void Jit64::psq_st(UGeckoInstruction inst)
int a = inst.RA; int a = inst.RA;
int s = inst.RS; // Fp numbers int s = inst.RS; // Fp numbers
gpr.FlushLockX(EAX, EDX); gpr.FlushLockX(RSCRATCH, RSCRATCH_EXTRA);
gpr.FlushLockX(ECX);
if (update) if (update)
gpr.BindToRegister(inst.RA, true, true); gpr.BindToRegister(inst.RA, true, true);
fpr.BindToRegister(inst.RS, true, false); fpr.BindToRegister(inst.RS, true, false);
MOV(32, R(ECX), gpr.R(inst.RA)); MOV(32, R(RSCRATCH_EXTRA), gpr.R(inst.RA));
if (offset) if (offset)
ADD(32, R(ECX), Imm32((u32)offset)); ADD(32, R(RSCRATCH_EXTRA), Imm32((u32)offset));
if (update && offset) if (update && offset)
MOV(32, gpr.R(a), R(ECX)); MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
// Some games (e.g. Dirt 2) incorrectly set the unused bits which breaks the lookup table code. // Some games (e.g. Dirt 2) incorrectly set the unused bits which breaks the lookup table code.
// Hence, we need to mask out the unused bits. The layout of the GQR register is // Hence, we need to mask out the unused bits. The layout of the GQR register is
// UU[SCALE]UUUUU[TYPE] where SCALE is 6 bits and TYPE is 3 bits, so we have to AND with // UU[SCALE]UUUUU[TYPE] where SCALE is 6 bits and TYPE is 3 bits, so we have to AND with
// 0b0011111100000111, or 0x3F07. // 0b0011111100000111, or 0x3F07.
MOV(32, R(EAX), Imm32(0x3F07)); MOV(32, R(RSCRATCH), Imm32(0x3F07));
AND(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_GQR0 + inst.I])); AND(32, R(RSCRATCH), PPCSTATE(spr[SPR_GQR0 + inst.I]));
MOVZX(32, 8, EDX, R(AL)); MOVZX(32, 8, RSCRATCH2, R(RSCRATCH));
// FIXME: Fix ModR/M encoding to allow [EDX*4+disp32] without a base register! // FIXME: Fix ModR/M encoding to allow [RSCRATCH2*4+disp32] without a base register!
if (inst.W) if (inst.W)
{ {
// One value // One value
PXOR(XMM0, R(XMM0)); // TODO: See if we can get rid of this cheaply by tweaking the code in the singleStore* functions. PXOR(XMM0, R(XMM0)); // TODO: See if we can get rid of this cheaply by tweaking the code in the singleStore* functions.
CVTSD2SS(XMM0, fpr.R(s)); CVTSD2SS(XMM0, fpr.R(s));
CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)asm_routines.singleStoreQuantized)); CALLptr(MScaled(RSCRATCH2, SCALE_8, (u32)(u64)asm_routines.singleStoreQuantized));
} }
else else
{ {
// Pair of values // Pair of values
CVTPD2PS(XMM0, fpr.R(s)); CVTPD2PS(XMM0, fpr.R(s));
CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)asm_routines.pairedStoreQuantized)); CALLptr(MScaled(RSCRATCH2, SCALE_8, (u32)(u64)asm_routines.pairedStoreQuantized));
} }
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
@ -73,24 +72,23 @@ void Jit64::psq_l(UGeckoInstruction inst)
bool update = inst.OPCD == 57; bool update = inst.OPCD == 57;
int offset = inst.SIMM_12; int offset = inst.SIMM_12;
gpr.FlushLockX(EAX, EDX); gpr.FlushLockX(RSCRATCH, RSCRATCH_EXTRA);
gpr.FlushLockX(ECX);
gpr.BindToRegister(inst.RA, true, update && offset); gpr.BindToRegister(inst.RA, true, update && offset);
fpr.BindToRegister(inst.RS, false, true); fpr.BindToRegister(inst.RS, false, true);
if (offset) if (offset)
LEA(32, ECX, MDisp(gpr.RX(inst.RA), offset)); LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(inst.RA), offset));
else else
MOV(32, R(ECX), gpr.R(inst.RA)); MOV(32, R(RSCRATCH_EXTRA), gpr.R(inst.RA));
if (update && offset) if (update && offset)
MOV(32, gpr.R(inst.RA), R(ECX)); MOV(32, gpr.R(inst.RA), R(RSCRATCH_EXTRA));
MOV(32, R(EAX), Imm32(0x3F07)); MOV(32, R(RSCRATCH), Imm32(0x3F07));
AND(32, R(EAX), M(((char *)&GQR(inst.I)) + 2)); AND(32, R(RSCRATCH), M(((char *)&GQR(inst.I)) + 2));
MOVZX(32, 8, EDX, R(AL)); MOVZX(32, 8, RSCRATCH2, R(RSCRATCH));
if (inst.W) if (inst.W)
OR(32, R(EDX), Imm8(8)); OR(32, R(RSCRATCH2), Imm8(8));
ABI_AlignStack(0); ABI_AlignStack(0);
CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)asm_routines.pairedLoadQuantized)); CALLptr(MScaled(RSCRATCH2, SCALE_8, (u32)(u64)asm_routines.pairedLoadQuantized));
ABI_RestoreStack(0); ABI_RestoreStack(0);
// MEMCHECK_START // FIXME: MMU does not work here because of unsafe memory access // MEMCHECK_START // FIXME: MMU does not work here because of unsafe memory access

View File

@ -16,22 +16,22 @@ void Jit64::GetCRFieldBit(int field, int bit, Gen::X64Reg out, bool negate)
switch (bit) switch (bit)
{ {
case CR_SO_BIT: // check bit 61 set case CR_SO_BIT: // check bit 61 set
BT(64, M(&PowerPC::ppcState.cr_val[field]), Imm8(61)); BT(64, PPCSTATE(cr_val[field]), Imm8(61));
SETcc(negate ? CC_NC : CC_C, R(out)); SETcc(negate ? CC_NC : CC_C, R(out));
break; break;
case CR_EQ_BIT: // check bits 31-0 == 0 case CR_EQ_BIT: // check bits 31-0 == 0
CMP(32, M(&PowerPC::ppcState.cr_val[field]), Imm8(0)); CMP(32, PPCSTATE(cr_val[field]), Imm8(0));
SETcc(negate ? CC_NZ : CC_Z, R(out)); SETcc(negate ? CC_NZ : CC_Z, R(out));
break; break;
case CR_GT_BIT: // check val > 0 case CR_GT_BIT: // check val > 0
CMP(64, M(&PowerPC::ppcState.cr_val[field]), Imm8(0)); CMP(64, PPCSTATE(cr_val[field]), Imm8(0));
SETcc(negate ? CC_NG : CC_G, R(out)); SETcc(negate ? CC_NG : CC_G, R(out));
break; break;
case CR_LT_BIT: // check bit 62 set case CR_LT_BIT: // check bit 62 set
BT(64, M(&PowerPC::ppcState.cr_val[field]), Imm8(62)); BT(64, PPCSTATE(cr_val[field]), Imm8(62));
SETcc(negate ? CC_NC : CC_C, R(out)); SETcc(negate ? CC_NC : CC_C, R(out));
break; break;
@ -42,40 +42,40 @@ void Jit64::GetCRFieldBit(int field, int bit, Gen::X64Reg out, bool negate)
void Jit64::SetCRFieldBit(int field, int bit, Gen::X64Reg in) void Jit64::SetCRFieldBit(int field, int bit, Gen::X64Reg in)
{ {
MOV(64, R(ABI_PARAM1), M(&PowerPC::ppcState.cr_val[field])); MOV(64, R(RSCRATCH2), PPCSTATE(cr_val[field]));
MOVZX(32, 8, in, R(in)); MOVZX(32, 8, in, R(in));
switch (bit) switch (bit)
{ {
case CR_SO_BIT: // set bit 61 to input case CR_SO_BIT: // set bit 61 to input
BTR(64, R(ABI_PARAM1), Imm8(61)); BTR(64, R(RSCRATCH2), Imm8(61));
SHL(64, R(in), Imm8(61)); SHL(64, R(in), Imm8(61));
OR(64, R(ABI_PARAM1), R(in)); OR(64, R(RSCRATCH2), R(in));
break; break;
case CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input case CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input
SHR(64, R(ABI_PARAM1), Imm8(32)); SHR(64, R(RSCRATCH2), Imm8(32));
SHL(64, R(ABI_PARAM1), Imm8(32)); SHL(64, R(RSCRATCH2), Imm8(32));
XOR(32, R(in), Imm8(1)); XOR(32, R(in), Imm8(1));
OR(64, R(ABI_PARAM1), R(in)); OR(64, R(RSCRATCH2), R(in));
break; break;
case CR_GT_BIT: // set bit 63 to !input case CR_GT_BIT: // set bit 63 to !input
BTR(64, R(ABI_PARAM1), Imm8(63)); BTR(64, R(RSCRATCH2), Imm8(63));
NOT(32, R(in)); NOT(32, R(in));
SHL(64, R(in), Imm8(63)); SHL(64, R(in), Imm8(63));
OR(64, R(ABI_PARAM1), R(in)); OR(64, R(RSCRATCH2), R(in));
break; break;
case CR_LT_BIT: // set bit 62 to input case CR_LT_BIT: // set bit 62 to input
BTR(64, R(ABI_PARAM1), Imm8(62)); BTR(64, R(RSCRATCH2), Imm8(62));
SHL(64, R(in), Imm8(62)); SHL(64, R(in), Imm8(62));
OR(64, R(ABI_PARAM1), R(in)); OR(64, R(RSCRATCH2), R(in));
break; break;
} }
BTS(64, R(ABI_PARAM1), Imm8(32)); BTS(64, R(RSCRATCH2), Imm8(32));
MOV(64, M(&PowerPC::ppcState.cr_val[field]), R(ABI_PARAM1)); MOV(64, PPCSTATE(cr_val[field]), R(RSCRATCH2));
} }
FixupBranch Jit64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) FixupBranch Jit64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set)
@ -83,19 +83,19 @@ FixupBranch Jit64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set)
switch (bit) switch (bit)
{ {
case CR_SO_BIT: // check bit 61 set case CR_SO_BIT: // check bit 61 set
BT(64, M(&PowerPC::ppcState.cr_val[field]), Imm8(61)); BT(64, PPCSTATE(cr_val[field]), Imm8(61));
return J_CC(jump_if_set ? CC_C : CC_NC, true); return J_CC(jump_if_set ? CC_C : CC_NC, true);
case CR_EQ_BIT: // check bits 31-0 == 0 case CR_EQ_BIT: // check bits 31-0 == 0
CMP(32, M(&PowerPC::ppcState.cr_val[field]), Imm8(0)); CMP(32, PPCSTATE(cr_val[field]), Imm8(0));
return J_CC(jump_if_set ? CC_Z : CC_NZ, true); return J_CC(jump_if_set ? CC_Z : CC_NZ, true);
case CR_GT_BIT: // check val > 0 case CR_GT_BIT: // check val > 0
CMP(64, M(&PowerPC::ppcState.cr_val[field]), Imm8(0)); CMP(64, PPCSTATE(cr_val[field]), Imm8(0));
return J_CC(jump_if_set ? CC_G : CC_LE, true); return J_CC(jump_if_set ? CC_G : CC_LE, true);
case CR_LT_BIT: // check bit 62 set case CR_LT_BIT: // check bit 62 set
BT(64, M(&PowerPC::ppcState.cr_val[field]), Imm8(62)); BT(64, PPCSTATE(cr_val[field]), Imm8(62));
return J_CC(jump_if_set ? CC_C : CC_NC, true); return J_CC(jump_if_set ? CC_C : CC_NC, true);
default: default:
@ -154,7 +154,7 @@ void Jit64::mtspr(UGeckoInstruction inst)
gpr.Lock(d); gpr.Lock(d);
gpr.BindToRegister(d, true, false); gpr.BindToRegister(d, true, false);
} }
MOV(32, M(&PowerPC::ppcState.spr[iIndex]), gpr.R(d)); MOV(32, PPCSTATE(spr[iIndex]), gpr.R(d));
gpr.UnlockAll(); gpr.UnlockAll();
} }
@ -173,8 +173,10 @@ void Jit64::mfspr(UGeckoInstruction inst)
// typical use of this instruction is to call it three times, e.g. mftbu/mftbl/mftbu/cmpw/bne // typical use of this instruction is to call it three times, e.g. mftbu/mftbl/mftbu/cmpw/bne
// to deal with possible timer wraparound. This makes the second two (out of three) completely // to deal with possible timer wraparound. This makes the second two (out of three) completely
// redundant for the JIT. // redundant for the JIT.
// no register choice
gpr.FlushLockX(RDX, RAX);
u32 offset = js.downcountAmount / SystemTimers::TIMER_RATIO; u32 offset = js.downcountAmount / SystemTimers::TIMER_RATIO;
gpr.FlushLockX(EDX);
// An inline implementation of CoreTiming::GetFakeTimeBase, since in timer-heavy games the // An inline implementation of CoreTiming::GetFakeTimeBase, since in timer-heavy games the
// cost of calling out to C for this is actually significant. // cost of calling out to C for this is actually significant.
@ -190,7 +192,7 @@ void Jit64::mfspr(UGeckoInstruction inst)
LEA(64, RAX, MComplex(RAX, RDX, SCALE_1, offset)); LEA(64, RAX, MComplex(RAX, RDX, SCALE_1, offset));
else else
ADD(64, R(RAX), R(RDX)); ADD(64, R(RAX), R(RDX));
MOV(64, M(&TL), R(RAX)); MOV(64, PPCSTATE(spr[SPR_TL]), R(RAX));
// Two calls of TU/TL next to each other are extremely common in typical usage, so merge them // Two calls of TU/TL next to each other are extremely common in typical usage, so merge them
// if we can. // if we can.
@ -205,14 +207,14 @@ void Jit64::mfspr(UGeckoInstruction inst)
gpr.BindToRegister(d, false); gpr.BindToRegister(d, false);
gpr.BindToRegister(n, false); gpr.BindToRegister(n, false);
if (iIndex == SPR_TL) if (iIndex == SPR_TL)
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(RAX));
if (nextIndex == SPR_TL) if (nextIndex == SPR_TL)
MOV(32, gpr.R(n), R(EAX)); MOV(32, gpr.R(n), R(RAX));
SHR(64, R(RAX), Imm8(32)); SHR(64, R(RAX), Imm8(32));
if (iIndex == SPR_TU) if (iIndex == SPR_TU)
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(RAX));
if (nextIndex == SPR_TU) if (nextIndex == SPR_TU)
MOV(32, gpr.R(n), R(EAX)); MOV(32, gpr.R(n), R(RAX));
} }
else else
{ {
@ -220,8 +222,9 @@ void Jit64::mfspr(UGeckoInstruction inst)
gpr.BindToRegister(d, false); gpr.BindToRegister(d, false);
if (iIndex == SPR_TU) if (iIndex == SPR_TU)
SHR(64, R(RAX), Imm8(32)); SHR(64, R(RAX), Imm8(32));
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(RAX));
} }
gpr.UnlockAllX();
break; break;
} }
case SPR_WPAR: case SPR_WPAR:
@ -234,11 +237,10 @@ void Jit64::mfspr(UGeckoInstruction inst)
default: default:
gpr.Lock(d); gpr.Lock(d);
gpr.BindToRegister(d, false); gpr.BindToRegister(d, false);
MOV(32, gpr.R(d), M(&PowerPC::ppcState.spr[iIndex])); MOV(32, gpr.R(d), PPCSTATE(spr[iIndex]));
break; break;
} }
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX();
} }
void Jit64::mtmsr(UGeckoInstruction inst) void Jit64::mtmsr(UGeckoInstruction inst)
@ -251,7 +253,7 @@ void Jit64::mtmsr(UGeckoInstruction inst)
gpr.Lock(inst.RS); gpr.Lock(inst.RS);
gpr.BindToRegister(inst.RS, true, false); gpr.BindToRegister(inst.RS, true, false);
} }
MOV(32, M(&MSR), gpr.R(inst.RS)); MOV(32, PPCSTATE(msr), gpr.R(inst.RS));
gpr.UnlockAll(); gpr.UnlockAll();
gpr.Flush(); gpr.Flush();
fpr.Flush(); fpr.Flush();
@ -259,17 +261,17 @@ void Jit64::mtmsr(UGeckoInstruction inst)
// If some exceptions are pending and EE are now enabled, force checking // If some exceptions are pending and EE are now enabled, force checking
// external exceptions when going out of mtmsr in order to execute delayed // external exceptions when going out of mtmsr in order to execute delayed
// interrupts as soon as possible. // interrupts as soon as possible.
TEST(32, M(&MSR), Imm32(0x8000)); TEST(32, PPCSTATE(msr), Imm32(0x8000));
FixupBranch eeDisabled = J_CC(CC_Z); FixupBranch eeDisabled = J_CC(CC_Z);
TEST(32, M((void*)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_EXTERNAL_INT | EXCEPTION_PERFORMANCE_MONITOR | EXCEPTION_DECREMENTER)); TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_EXTERNAL_INT | EXCEPTION_PERFORMANCE_MONITOR | EXCEPTION_DECREMENTER));
FixupBranch noExceptionsPending = J_CC(CC_Z); FixupBranch noExceptionsPending = J_CC(CC_Z);
// Check if a CP interrupt is waiting and keep the GPU emulation in sync (issue 4336) // Check if a CP interrupt is waiting and keep the GPU emulation in sync (issue 4336)
TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP)); TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP));
FixupBranch cpInt = J_CC(CC_NZ); FixupBranch cpInt = J_CC(CC_NZ);
MOV(32, M(&PC), Imm32(js.compilerPC + 4)); MOV(32, PPCSTATE(pc), Imm32(js.compilerPC + 4));
WriteExternalExceptionExit(); WriteExternalExceptionExit();
SetJumpTarget(cpInt); SetJumpTarget(cpInt);
@ -288,7 +290,7 @@ void Jit64::mfmsr(UGeckoInstruction inst)
//Privileged? //Privileged?
gpr.Lock(inst.RD); gpr.Lock(inst.RD);
gpr.BindToRegister(inst.RD, false, true); gpr.BindToRegister(inst.RD, false, true);
MOV(32, gpr.R(inst.RD), M(&MSR)); MOV(32, gpr.R(inst.RD), PPCSTATE(msr));
gpr.UnlockAll(); gpr.UnlockAll();
} }
@ -308,33 +310,32 @@ void Jit64::mfcr(UGeckoInstruction inst)
gpr.BindToRegister(d, false, true); gpr.BindToRegister(d, false, true);
XOR(32, gpr.R(d), gpr.R(d)); XOR(32, gpr.R(d), gpr.R(d));
gpr.FlushLockX(ABI_PARAM1); X64Reg cr_val = RSCRATCH2;
X64Reg cr_val = ABI_PARAM1; // we only need to zero the high bits of RSCRATCH once
// we only need to zero the high bits of EAX once XOR(32, R(RSCRATCH), R(RSCRATCH));
XOR(32, R(EAX), R(EAX));
for (int i = 0; i < 8; i++) for (int i = 0; i < 8; i++)
{ {
static const u8 m_flagTable[8] = {0x0,0x1,0x8,0x9,0x0,0x1,0x8,0x9}; static const u8 m_flagTable[8] = {0x0,0x1,0x8,0x9,0x0,0x1,0x8,0x9};
if (i != 0) if (i != 0)
SHL(32, gpr.R(d), Imm8(4)); SHL(32, gpr.R(d), Imm8(4));
MOV(64, R(cr_val), M(&PowerPC::ppcState.cr_val[i])); MOV(64, R(cr_val), PPCSTATE(cr_val[i]));
// EQ: Bits 31-0 == 0; set flag bit 1 // EQ: Bits 31-0 == 0; set flag bit 1
TEST(32, R(cr_val), R(cr_val)); TEST(32, R(cr_val), R(cr_val));
SETcc(CC_Z, R(EAX)); SETcc(CC_Z, R(RSCRATCH));
LEA(32, gpr.RX(d), MComplex(gpr.RX(d), EAX, SCALE_2, 0)); LEA(32, gpr.RX(d), MComplex(gpr.RX(d), RSCRATCH, SCALE_2, 0));
// GT: Value > 0; set flag bit 2 // GT: Value > 0; set flag bit 2
TEST(64, R(cr_val), R(cr_val)); TEST(64, R(cr_val), R(cr_val));
SETcc(CC_G, R(EAX)); SETcc(CC_G, R(RSCRATCH));
LEA(32, gpr.RX(d), MComplex(gpr.RX(d), EAX, SCALE_4, 0)); LEA(32, gpr.RX(d), MComplex(gpr.RX(d), RSCRATCH, SCALE_4, 0));
// SO: Bit 61 set; set flag bit 0 // SO: Bit 61 set; set flag bit 0
// LT: Bit 62 set; set flag bit 3 // LT: Bit 62 set; set flag bit 3
SHR(64, R(cr_val), Imm8(61)); SHR(64, R(cr_val), Imm8(61));
MOVZX(32, 8, EAX, MDisp(cr_val, (u32)(u64)m_flagTable)); MOVZX(32, 8, RSCRATCH, MDisp(cr_val, (u32)(u64)m_flagTable));
OR(32, gpr.R(d), R(EAX)); OR(32, gpr.R(d), R(RSCRATCH));
} }
gpr.UnlockAll(); gpr.UnlockAll();
@ -360,12 +361,12 @@ void Jit64::mtcrf(UGeckoInstruction inst)
u64 newcrval = PPCCRToInternal(newcr); u64 newcrval = PPCCRToInternal(newcr);
if ((s64)newcrval == (s32)newcrval) if ((s64)newcrval == (s32)newcrval)
{ {
MOV(64, M(&PowerPC::ppcState.cr_val[i]), Imm32((s32)newcrval)); MOV(64, PPCSTATE(cr_val[i]), Imm32((s32)newcrval));
} }
else else
{ {
MOV(64, R(RAX), Imm64(newcrval)); MOV(64, R(RSCRATCH), Imm64(newcrval));
MOV(64, M(&PowerPC::ppcState.cr_val[i]), R(RAX)); MOV(64, PPCSTATE(cr_val[i]), R(RSCRATCH));
} }
} }
} }
@ -378,13 +379,13 @@ void Jit64::mtcrf(UGeckoInstruction inst)
{ {
if ((crm & (0x80 >> i)) != 0) if ((crm & (0x80 >> i)) != 0)
{ {
MOV(32, R(EAX), gpr.R(inst.RS)); MOV(32, R(RSCRATCH), gpr.R(inst.RS));
if (i != 7) if (i != 7)
SHR(32, R(EAX), Imm8(28 - (i * 4))); SHR(32, R(RSCRATCH), Imm8(28 - (i * 4)));
if (i != 0) if (i != 0)
AND(32, R(EAX), Imm8(0xF)); AND(32, R(RSCRATCH), Imm8(0xF));
MOV(64, R(EAX), MScaled(EAX, SCALE_8, (u32)(u64)m_crTable)); MOV(64, R(RSCRATCH), MScaled(RSCRATCH, SCALE_8, (u32)(u64)m_crTable));
MOV(64, M(&PowerPC::ppcState.cr_val[i]), R(EAX)); MOV(64, PPCSTATE(cr_val[i]), R(RSCRATCH));
} }
} }
gpr.UnlockAll(); gpr.UnlockAll();
@ -400,8 +401,8 @@ void Jit64::mcrf(UGeckoInstruction inst)
// USES_CR // USES_CR
if (inst.CRFS != inst.CRFD) if (inst.CRFS != inst.CRFD)
{ {
MOV(64, R(EAX), M(&PowerPC::ppcState.cr_val[inst.CRFS])); MOV(64, R(RSCRATCH), PPCSTATE(cr_val[inst.CRFS]));
MOV(64, M(&PowerPC::ppcState.cr_val[inst.CRFD]), R(EAX)); MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(RSCRATCH));
} }
} }
@ -413,14 +414,14 @@ void Jit64::mcrxr(UGeckoInstruction inst)
// USES_CR // USES_CR
// Copy XER[0-3] into CR[inst.CRFD] // Copy XER[0-3] into CR[inst.CRFD]
MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); MOV(32, R(RSCRATCH), PPCSTATE(spr[SPR_XER]));
SHR(32, R(EAX), Imm8(28)); SHR(32, R(RSCRATCH), Imm8(28));
MOV(64, R(EAX), MScaled(EAX, SCALE_8, (u32)(u64)m_crTable)); MOV(64, R(RSCRATCH), MScaled(RSCRATCH, SCALE_8, (u32)(u64)m_crTable));
MOV(64, M(&PowerPC::ppcState.cr_val[inst.CRFD]), R(EAX)); MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(RSCRATCH));
// Clear XER[0-3] // Clear XER[0-3]
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(0x0FFFFFFF)); AND(32, PPCSTATE(spr[SPR_XER]), Imm32(0x0FFFFFFF));
} }
void Jit64::crXXX(UGeckoInstruction inst) void Jit64::crXXX(UGeckoInstruction inst)
@ -439,9 +440,8 @@ void Jit64::crXXX(UGeckoInstruction inst)
// crnand or crnor // crnand or crnor
bool negateB = inst.SUBOP10 == 225 || inst.SUBOP10 == 33; bool negateB = inst.SUBOP10 == 225 || inst.SUBOP10 == 33;
gpr.FlushLockX(ABI_PARAM1); GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), RSCRATCH2, negateA);
GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), ABI_PARAM1, negateA); GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), RSCRATCH, negateB);
GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), EAX, negateB);
// Compute combined bit // Compute combined bit
switch (inst.SUBOP10) switch (inst.SUBOP10)
@ -449,23 +449,23 @@ void Jit64::crXXX(UGeckoInstruction inst)
case 33: // crnor: ~(A || B) == (~A && ~B) case 33: // crnor: ~(A || B) == (~A && ~B)
case 129: // crandc case 129: // crandc
case 257: // crand case 257: // crand
AND(8, R(EAX), R(ABI_PARAM1)); AND(8, R(RSCRATCH), R(RSCRATCH2));
break; break;
case 193: // crxor case 193: // crxor
case 289: // creqv case 289: // creqv
XOR(8, R(EAX), R(ABI_PARAM1)); XOR(8, R(RSCRATCH), R(RSCRATCH2));
break; break;
case 225: // crnand: ~(A && B) == (~A || ~B) case 225: // crnand: ~(A && B) == (~A || ~B)
case 417: // crorc case 417: // crorc
case 449: // cror case 449: // cror
OR(8, R(EAX), R(ABI_PARAM1)); OR(8, R(RSCRATCH), R(RSCRATCH2));
break; break;
} }
// Store result bit in CRBD // Store result bit in CRBD
SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), EAX); SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), RSCRATCH);
gpr.UnlockAllX(); gpr.UnlockAllX();
} }

View File

@ -157,13 +157,15 @@ static void fregSpill(RegInfo& RI, X64Reg reg)
RI.fregs[reg] = nullptr; RI.fregs[reg] = nullptr;
} }
// ECX is scratch, so we don't allocate it // RAX and RDX are scratch, so we don't allocate them
// (TODO: if we could lock RCX here too then we could allocate it - needed for
// shifts)
// 64-bit - calling conventions differ between linux & windows, so... // 64-bit - calling conventions differ between linux & windows, so...
#ifdef _WIN32 #ifdef _WIN32
static const X64Reg RegAllocOrder[] = {RSI, RDI, R12, R13, R14, R8, R9, R10, R11}; static const X64Reg RegAllocOrder[] = {RSI, RDI, R12, R13, R14, R8, R9, R10, R11};
#else #else
static const X64Reg RegAllocOrder[] = {RBP, R12, R13, R14, R8, R9, R10, R11}; static const X64Reg RegAllocOrder[] = {R12, R13, R14, R8, R9, R10, R11};
#endif #endif
static const int RegAllocSize = sizeof(RegAllocOrder) / sizeof(X64Reg); static const int RegAllocSize = sizeof(RegAllocOrder) / sizeof(X64Reg);
static const X64Reg FRegAllocOrder[] = {XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, XMM2, XMM3, XMM4, XMM5}; static const X64Reg FRegAllocOrder[] = {XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, XMM2, XMM3, XMM4, XMM5};
@ -602,22 +604,22 @@ static void regEmitMemStore(RegInfo& RI, InstLoc I, unsigned Size)
{ {
auto info = regBuildMemAddress(RI, I, getOp2(I), 2, Size, nullptr); auto info = regBuildMemAddress(RI, I, getOp2(I), 2, Size, nullptr);
if (info.first.IsImm()) if (info.first.IsImm())
RI.Jit->MOV(32, R(ECX), info.first); RI.Jit->MOV(32, R(RSCRATCH2), info.first);
else else
RI.Jit->LEA(32, ECX, MDisp(info.first.GetSimpleReg(), info.second)); RI.Jit->LEA(32, RSCRATCH2, MDisp(info.first.GetSimpleReg(), info.second));
regSpill(RI, EAX); regSpill(RI, RSCRATCH);
if (isImm(*getOp1(I))) if (isImm(*getOp1(I)))
{ {
RI.Jit->MOV(Size, R(EAX), regImmForConst(RI, getOp1(I), Size)); RI.Jit->MOV(Size, R(RSCRATCH), regImmForConst(RI, getOp1(I), Size));
} }
else else
{ {
RI.Jit->MOV(32, R(EAX), regLocForInst(RI, getOp1(I))); RI.Jit->MOV(32, R(RSCRATCH), regLocForInst(RI, getOp1(I)));
} }
RI.Jit->SafeWriteRegToReg(EAX, ECX, Size, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); RI.Jit->SafeWriteRegToReg(RSCRATCH, RSCRATCH2, Size, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM);
if (RI.IInfo[I - RI.FirstI] & 4) if (RI.IInfo[I - RI.FirstI] & 4)
regClearInst(RI, getOp1(I)); regClearInst(RI, getOp1(I));
} }
@ -675,9 +677,9 @@ static void regEmitCmp(RegInfo& RI, InstLoc I)
static void regEmitICmpInst(RegInfo& RI, InstLoc I, CCFlags flag) static void regEmitICmpInst(RegInfo& RI, InstLoc I, CCFlags flag)
{ {
regEmitCmp(RI, I); regEmitCmp(RI, I);
RI.Jit->SETcc(flag, R(ECX)); // Caution: SETCC uses 8-bit regs! RI.Jit->SETcc(flag, R(RSCRATCH2)); // Caution: SETCC uses 8-bit regs!
X64Reg reg = regBinReg(RI, I); X64Reg reg = regBinReg(RI, I);
RI.Jit->MOVZX(32, 8, reg, R(ECX)); RI.Jit->MOVZX(32, 8, reg, R(RSCRATCH2));
RI.regs[reg] = I; RI.regs[reg] = I;
regNormalRegClear(RI, I); regNormalRegClear(RI, I);
} }
@ -707,8 +709,8 @@ static void regEmitICmpCRInst(RegInfo& RI, InstLoc I)
unsigned RHS = RI.Build->GetImmValue(getOp2(I)); unsigned RHS = RI.Build->GetImmValue(getOp2(I));
if (!signed_compare && (RHS & 0x80000000U)) if (!signed_compare && (RHS & 0x80000000U))
{ {
RI.Jit->MOV(32, R(EAX), Imm32(RHS)); RI.Jit->MOV(32, R(RSCRATCH), Imm32(RHS));
RI.Jit->SUB(64, R(reg), R(RAX)); RI.Jit->SUB(64, R(reg), R(RSCRATCH));
} }
else if (RHS) else if (RHS)
{ {
@ -718,10 +720,10 @@ static void regEmitICmpCRInst(RegInfo& RI, InstLoc I)
else else
{ {
if (signed_compare) if (signed_compare)
RI.Jit->MOVSX(64, 32, RAX, regLocForInst(RI, getOp2(I))); RI.Jit->MOVSX(64, 32, RSCRATCH, regLocForInst(RI, getOp2(I)));
else else
RI.Jit->MOV(32, R(EAX), regLocForInst(RI, getOp2(I))); RI.Jit->MOV(32, R(RSCRATCH), regLocForInst(RI, getOp2(I)));
RI.Jit->SUB(64, R(reg), R(RAX)); RI.Jit->SUB(64, R(reg), R(RSCRATCH));
} }
RI.regs[reg] = I; RI.regs[reg] = I;
@ -949,8 +951,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
// interpreter call at the moment, but optimizing interpreter // interpreter call at the moment, but optimizing interpreter
// calls isn't completely out of the question... // calls isn't completely out of the question...
regSpillCallerSaved(RI); regSpillCallerSaved(RI);
Jit->MOV(32, M(&PC), Imm32(InstLoc)); Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc));
Jit->MOV(32, M(&NPC), Imm32(InstLoc+4)); Jit->MOV(32, PPCSTATE(npc), Imm32(InstLoc+4));
Jit->ABI_CallFunctionC((void*)GetInterpreterOp(InstCode), Jit->ABI_CallFunctionC((void*)GetInterpreterOp(InstCode),
InstCode); InstCode);
break; break;
@ -962,7 +964,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
X64Reg reg = regFindFreeReg(RI); X64Reg reg = regFindFreeReg(RI);
unsigned ppcreg = *I >> 8; unsigned ppcreg = *I >> 8;
Jit->MOV(32, R(reg), M(&PowerPC::ppcState.gpr[ppcreg])); Jit->MOV(32, R(reg), PPCSTATE(gpr[ppcreg]));
RI.regs[reg] = I; RI.regs[reg] = I;
break; break;
} }
@ -973,7 +975,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
X64Reg reg = regFindFreeReg(RI); X64Reg reg = regFindFreeReg(RI);
unsigned ppcreg = *I >> 8; unsigned ppcreg = *I >> 8;
Jit->MOV(64, R(reg), M(&PowerPC::ppcState.cr_val[ppcreg])); Jit->MOV(64, R(reg), PPCSTATE(cr_val[ppcreg]));
RI.regs[reg] = I; RI.regs[reg] = I;
break; break;
} }
@ -983,7 +985,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
break; break;
X64Reg reg = regFindFreeReg(RI); X64Reg reg = regFindFreeReg(RI);
Jit->MOV(32, R(reg), M(&CTR)); Jit->MOV(32, R(reg), PPCSTATE_CTR);
RI.regs[reg] = I; RI.regs[reg] = I;
break; break;
} }
@ -993,7 +995,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
break; break;
X64Reg reg = regFindFreeReg(RI); X64Reg reg = regFindFreeReg(RI);
Jit->MOV(32, R(reg), M(&LR)); Jit->MOV(32, R(reg), PPCSTATE_LR);
RI.regs[reg] = I; RI.regs[reg] = I;
break; break;
} }
@ -1003,7 +1005,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
break; break;
X64Reg reg = regFindFreeReg(RI); X64Reg reg = regFindFreeReg(RI);
Jit->MOV(32, R(reg), M(&MSR)); Jit->MOV(32, R(reg), PPCSTATE(msr));
RI.regs[reg] = I; RI.regs[reg] = I;
break; break;
} }
@ -1014,7 +1016,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
X64Reg reg = regFindFreeReg(RI); X64Reg reg = regFindFreeReg(RI);
unsigned gqr = *I >> 8; unsigned gqr = *I >> 8;
Jit->MOV(32, R(reg), M(&GQR(gqr))); Jit->MOV(32, R(reg), PPCSTATE(spr[SPR_GQR0 + gqr]));
RI.regs[reg] = I; RI.regs[reg] = I;
break; break;
} }
@ -1024,7 +1026,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
break; break;
X64Reg reg = regFindFreeReg(RI); X64Reg reg = regFindFreeReg(RI);
Jit->MOV(32, R(reg), M(&PowerPC::ppcState.spr[SPR_XER])); Jit->MOV(32, R(reg), PPCSTATE(spr[SPR_XER]));
Jit->SHR(32, R(reg), Imm8(29)); Jit->SHR(32, R(reg), Imm8(29));
Jit->AND(32, R(reg), Imm8(1)); Jit->AND(32, R(reg), Imm8(1));
RI.regs[reg] = I; RI.regs[reg] = I;
@ -1042,7 +1044,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
{ {
X64Reg reg = regEnsureInReg(RI, getOp1(I)); X64Reg reg = regEnsureInReg(RI, getOp1(I));
unsigned ppcreg = *I >> 16; unsigned ppcreg = *I >> 16;
Jit->MOV(64, M(&PowerPC::ppcState.cr_val[ppcreg]), R(reg)); Jit->MOV(64, PPCSTATE(cr_val[ppcreg]), R(reg));
regNormalRegClear(RI, I); regNormalRegClear(RI, I);
break; break;
} }
@ -1067,15 +1069,15 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
// If some exceptions are pending and EE are now enabled, force checking // If some exceptions are pending and EE are now enabled, force checking
// external exceptions when going out of mtmsr in order to execute delayed // external exceptions when going out of mtmsr in order to execute delayed
// interrupts as soon as possible. // interrupts as soon as possible.
Jit->MOV(32, R(EAX), M(&MSR)); Jit->MOV(32, R(RSCRATCH), PPCSTATE(msr));
Jit->TEST(32, R(EAX), Imm32(0x8000)); Jit->TEST(32, R(RSCRATCH), Imm32(0x8000));
FixupBranch eeDisabled = Jit->J_CC(CC_Z); FixupBranch eeDisabled = Jit->J_CC(CC_Z);
Jit->MOV(32, R(EAX), M((void*)&PowerPC::ppcState.Exceptions)); Jit->MOV(32, R(RSCRATCH), PPCSTATE(Exceptions));
Jit->TEST(32, R(EAX), R(EAX)); Jit->TEST(32, R(RSCRATCH), R(RSCRATCH));
FixupBranch noExceptionsPending = Jit->J_CC(CC_Z); FixupBranch noExceptionsPending = Jit->J_CC(CC_Z);
Jit->MOV(32, M(&PC), Imm32(InstLoc + 4)); Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc + 4));
Jit->WriteExceptionExit(); // TODO: Implement WriteExternalExceptionExit for JitIL Jit->WriteExceptionExit(); // TODO: Implement WriteExternalExceptionExit for JitIL
Jit->SetJumpTarget(eeDisabled); Jit->SetJumpTarget(eeDisabled);
@ -1111,11 +1113,11 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
} }
case StoreFPRF: case StoreFPRF:
{ {
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); Jit->MOV(32, R(RSCRATCH2), regLocForInst(RI, getOp1(I)));
Jit->AND(32, R(ECX), Imm8(0x1F)); Jit->AND(32, R(RSCRATCH2), Imm8(0x1F));
Jit->SHL(32, R(ECX), Imm8(12)); Jit->SHL(32, R(RSCRATCH2), Imm8(12));
Jit->AND(32, M(&FPSCR), Imm32(~(0x1F << 12))); Jit->AND(32, PPCSTATE(fpscr), Imm32(~(0x1F << 12)));
Jit->OR(32, M(&FPSCR), R(ECX)); Jit->OR(32, PPCSTATE(fpscr), R(RSCRATCH2));
regNormalRegClear(RI, I); regNormalRegClear(RI, I);
break; break;
} }
@ -1155,8 +1157,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
break; break;
X64Reg reg = regUReg(RI, I); X64Reg reg = regUReg(RI, I);
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); Jit->MOV(32, R(RSCRATCH2), regLocForInst(RI, getOp1(I)));
Jit->MOVSX(32, 8, reg, R(ECX)); Jit->MOVSX(32, 8, reg, R(RSCRATCH2));
RI.regs[reg] = I; RI.regs[reg] = I;
regNormalRegClear(RI, I); regNormalRegClear(RI, I);
break; break;
@ -1178,9 +1180,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
break; break;
X64Reg reg = regUReg(RI, I); X64Reg reg = regUReg(RI, I);
Jit->MOV(32, R(ECX), Imm32(63)); Jit->MOV(32, R(RSCRATCH2), Imm32(63));
Jit->BSR(32, reg, regLocForInst(RI, getOp1(I))); Jit->BSR(32, reg, regLocForInst(RI, getOp1(I)));
Jit->CMOVcc(32, reg, R(ECX), CC_Z); Jit->CMOVcc(32, reg, R(RSCRATCH2), CC_Z);
Jit->XOR(32, R(reg), Imm8(31)); Jit->XOR(32, R(reg), Imm8(31));
RI.regs[reg] = I; RI.regs[reg] = I;
regNormalRegClear(RI, I); regNormalRegClear(RI, I);
@ -1265,6 +1267,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
if (!thisUsed) if (!thisUsed)
break; break;
// no register choice
regSpill(RI, EAX); regSpill(RI, EAX);
regSpill(RI, EDX); regSpill(RI, EDX);
X64Reg reg = regBinReg(RI, I); X64Reg reg = regBinReg(RI, I);
@ -1419,35 +1422,35 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
X64Reg cr_val = regUReg(RI, I); X64Reg cr_val = regUReg(RI, I);
Jit->MOV(64, R(cr_val), regLocForInst(RI, getOp1(I))); Jit->MOV(64, R(cr_val), regLocForInst(RI, getOp1(I)));
Jit->XOR(32, R(EAX), R(EAX)); Jit->XOR(32, R(RSCRATCH), R(RSCRATCH));
// SO: Bit 61 set. // SO: Bit 61 set.
Jit->MOV(64, R(RCX), R(cr_val)); Jit->MOV(64, R(RSCRATCH2), R(cr_val));
Jit->SHR(64, R(RCX), Imm8(61)); Jit->SHR(64, R(RSCRATCH2), Imm8(61));
Jit->AND(32, R(ECX), Imm8(1)); Jit->AND(32, R(RSCRATCH2), Imm8(1));
Jit->OR(32, R(EAX), R(ECX)); Jit->OR(32, R(RSCRATCH), R(RSCRATCH2));
// EQ: Bits 31-0 == 0. // EQ: Bits 31-0 == 0.
Jit->XOR(32, R(ECX), R(ECX)); Jit->XOR(32, R(RSCRATCH2), R(RSCRATCH2));
Jit->TEST(32, R(cr_val), R(cr_val)); Jit->TEST(32, R(cr_val), R(cr_val));
Jit->SETcc(CC_Z, R(ECX)); Jit->SETcc(CC_Z, R(RSCRATCH2));
Jit->SHL(32, R(ECX), Imm8(1)); Jit->SHL(32, R(RSCRATCH2), Imm8(1));
Jit->OR(32, R(EAX), R(ECX)); Jit->OR(32, R(RSCRATCH), R(RSCRATCH2));
// GT: Value > 0. // GT: Value > 0.
Jit->XOR(32, R(ECX), R(ECX)); Jit->XOR(32, R(RSCRATCH2), R(RSCRATCH2));
Jit->TEST(64, R(cr_val), R(cr_val)); Jit->TEST(64, R(cr_val), R(cr_val));
Jit->SETcc(CC_G, R(ECX)); Jit->SETcc(CC_G, R(RSCRATCH2));
Jit->SHL(32, R(ECX), Imm8(2)); Jit->SHL(32, R(RSCRATCH2), Imm8(2));
Jit->OR(32, R(EAX), R(ECX)); Jit->OR(32, R(RSCRATCH), R(RSCRATCH2));
// LT: Bit 62 set. // LT: Bit 62 set.
Jit->MOV(64, R(ECX), R(cr_val)); Jit->MOV(64, R(RSCRATCH2), R(cr_val));
Jit->SHR(64, R(ECX), Imm8(62 - 3)); Jit->SHR(64, R(RSCRATCH2), Imm8(62 - 3));
Jit->AND(32, R(ECX), Imm8(0x8)); Jit->AND(32, R(RSCRATCH2), Imm8(0x8));
Jit->OR(32, R(EAX), R(ECX)); Jit->OR(32, R(RSCRATCH), R(RSCRATCH2));
Jit->MOV(32, R(cr_val), R(EAX)); Jit->MOV(32, R(cr_val), R(RSCRATCH));
RI.regs[cr_val] = I; RI.regs[cr_val] = I;
regNormalRegClear(RI, I); regNormalRegClear(RI, I);
break; break;
@ -1460,34 +1463,34 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
X64Reg cr_val = regUReg(RI, I); X64Reg cr_val = regUReg(RI, I);
Jit->MOV(64, R(cr_val), regLocForInst(RI, getOp1(I))); Jit->MOV(64, R(cr_val), regLocForInst(RI, getOp1(I)));
Jit->MOV(64, R(RCX), Imm64(1ull << 32)); Jit->MOV(64, R(RSCRATCH2), Imm64(1ull << 32));
// SO // SO
Jit->MOV(64, R(RAX), R(cr_val)); Jit->MOV(64, R(RSCRATCH), R(cr_val));
Jit->SHL(64, R(RAX), Imm8(63)); Jit->SHL(64, R(RSCRATCH), Imm8(63));
Jit->SHR(64, R(RAX), Imm8(63 - 61)); Jit->SHR(64, R(RSCRATCH), Imm8(63 - 61));
Jit->OR(64, R(RCX), R(RAX)); Jit->OR(64, R(RSCRATCH2), R(RSCRATCH));
// EQ // EQ
Jit->MOV(64, R(RAX), R(cr_val)); Jit->MOV(64, R(RSCRATCH), R(cr_val));
Jit->NOT(64, R(RAX)); Jit->NOT(64, R(RSCRATCH));
Jit->AND(64, R(RAX), Imm8(CR_EQ)); Jit->AND(64, R(RSCRATCH), Imm8(CR_EQ));
Jit->OR(64, R(RCX), R(RAX)); Jit->OR(64, R(RSCRATCH2), R(RSCRATCH));
// GT // GT
Jit->MOV(64, R(RAX), R(cr_val)); Jit->MOV(64, R(RSCRATCH), R(cr_val));
Jit->NOT(64, R(RAX)); Jit->NOT(64, R(RSCRATCH));
Jit->AND(64, R(RAX), Imm8(CR_GT)); Jit->AND(64, R(RSCRATCH), Imm8(CR_GT));
Jit->SHL(64, R(RAX), Imm8(63 - 2)); Jit->SHL(64, R(RSCRATCH), Imm8(63 - 2));
Jit->OR(64, R(RCX), R(RAX)); Jit->OR(64, R(RSCRATCH2), R(RSCRATCH));
// LT // LT
Jit->MOV(64, R(RAX), R(cr_val)); Jit->MOV(64, R(RSCRATCH), R(cr_val));
Jit->AND(64, R(RAX), Imm8(CR_LT)); Jit->AND(64, R(RSCRATCH), Imm8(CR_LT));
Jit->SHL(64, R(RAX), Imm8(62 - 3)); Jit->SHL(64, R(RSCRATCH), Imm8(62 - 3));
Jit->OR(64, R(RCX), R(RAX)); Jit->OR(64, R(RSCRATCH2), R(RSCRATCH));
Jit->MOV(64, R(cr_val), R(RCX)); Jit->MOV(64, R(cr_val), R(RSCRATCH2));
RI.regs[cr_val] = I; RI.regs[cr_val] = I;
regNormalRegClear(RI, I); regNormalRegClear(RI, I);
@ -1499,10 +1502,10 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
break; break;
X64Reg reg = regUReg(RI, I); X64Reg reg = regUReg(RI, I);
Jit->MOV(64, R(RAX), Imm64(1ull << 61)); Jit->MOV(64, R(RSCRATCH), Imm64(1ull << 61));
Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RAX)); Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RSCRATCH));
Jit->SETcc(CC_NZ, R(AL)); Jit->SETcc(CC_NZ, R(RSCRATCH));
Jit->MOVZX(32, 8, reg, R(AL)); Jit->MOVZX(32, 8, reg, R(RSCRATCH));
RI.regs[reg] = I; RI.regs[reg] = I;
regNormalRegClear(RI, I); regNormalRegClear(RI, I);
break; break;
@ -1514,8 +1517,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
X64Reg reg = regUReg(RI, I); X64Reg reg = regUReg(RI, I);
Jit->CMP(32, regLocForInst(RI, getOp1(I)), Imm32(0)); Jit->CMP(32, regLocForInst(RI, getOp1(I)), Imm32(0));
Jit->SETcc(CC_Z, R(AL)); Jit->SETcc(CC_Z, R(RSCRATCH));
Jit->MOVZX(32, 8, reg, R(AL)); Jit->MOVZX(32, 8, reg, R(RSCRATCH));
RI.regs[reg] = I; RI.regs[reg] = I;
regNormalRegClear(RI, I); regNormalRegClear(RI, I);
break; break;
@ -1527,8 +1530,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
X64Reg reg = regUReg(RI, I); X64Reg reg = regUReg(RI, I);
Jit->CMP(64, regLocForInst(RI, getOp1(I)), Imm8(0)); Jit->CMP(64, regLocForInst(RI, getOp1(I)), Imm8(0));
Jit->SETcc(CC_G, R(AL)); Jit->SETcc(CC_G, R(RSCRATCH));
Jit->MOVZX(32, 8, reg, R(AL)); Jit->MOVZX(32, 8, reg, R(RSCRATCH));
RI.regs[reg] = I; RI.regs[reg] = I;
regNormalRegClear(RI, I); regNormalRegClear(RI, I);
break; break;
@ -1539,10 +1542,10 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
break; break;
X64Reg reg = regUReg(RI, I); X64Reg reg = regUReg(RI, I);
Jit->MOV(64, R(RAX), Imm64(1ull << 62)); Jit->MOV(64, R(RSCRATCH), Imm64(1ull << 62));
Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RAX)); Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RSCRATCH));
Jit->SETcc(CC_NZ, R(AL)); Jit->SETcc(CC_NZ, R(RSCRATCH));
Jit->MOVZX(32, 8, reg, R(AL)); Jit->MOVZX(32, 8, reg, R(RSCRATCH));
RI.regs[reg] = I; RI.regs[reg] = I;
regNormalRegClear(RI, I); regNormalRegClear(RI, I);
break; break;
@ -1553,9 +1556,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
break; break;
X64Reg reg = fregFindFreeReg(RI); X64Reg reg = fregFindFreeReg(RI);
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); Jit->MOV(32, R(RSCRATCH2), regLocForInst(RI, getOp1(I)));
RI.Jit->SafeLoadToReg(ECX, R(ECX), 32, 0, regsInUse(RI), false, EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); RI.Jit->SafeLoadToReg(RSCRATCH2, R(RSCRATCH2), 32, 0, regsInUse(RI), false, EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM);
Jit->MOVD_xmm(reg, R(ECX)); Jit->MOVD_xmm(reg, R(RSCRATCH2));
RI.fregs[reg] = I; RI.fregs[reg] = I;
regNormalRegClear(RI, I); regNormalRegClear(RI, I);
break; break;
@ -1567,9 +1570,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
X64Reg reg = fregFindFreeReg(RI); X64Reg reg = fregFindFreeReg(RI);
const OpArg loc = regLocForInst(RI, getOp1(I)); const OpArg loc = regLocForInst(RI, getOp1(I));
Jit->MOV(32, R(ECX), loc); Jit->MOV(32, R(RSCRATCH2), loc);
RI.Jit->SafeLoadToReg(RCX, R(ECX), 64, 0, regsInUse(RI), false, EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); RI.Jit->SafeLoadToReg(RSCRATCH2, R(RSCRATCH2), 64, 0, regsInUse(RI), false, EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM);
Jit->MOVQ_xmm(reg, R(RCX)); Jit->MOVQ_xmm(reg, R(RSCRATCH2));
RI.fregs[reg] = I; RI.fregs[reg] = I;
regNormalRegClear(RI, I); regNormalRegClear(RI, I);
break; break;
@ -1579,8 +1582,6 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
if (!thisUsed) if (!thisUsed)
break; break;
regSpill(RI, EAX);
regSpill(RI, EDX);
X64Reg reg = fregFindFreeReg(RI); X64Reg reg = fregFindFreeReg(RI);
// The lower 3 bits is for GQR index. The next 1 bit is for inst.W // The lower 3 bits is for GQR index. The next 1 bit is for inst.W
unsigned int quantreg = (*I >> 16) & 0x7; unsigned int quantreg = (*I >> 16) & 0x7;
@ -1589,13 +1590,12 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
// Hence, we need to mask out the unused bits. The layout of the GQR register is // Hence, we need to mask out the unused bits. The layout of the GQR register is
// UU[SCALE]UUUUU[TYPE] where SCALE is 6 bits and TYPE is 3 bits, so we have to AND with // UU[SCALE]UUUUU[TYPE] where SCALE is 6 bits and TYPE is 3 bits, so we have to AND with
// 0b0011111100000111, or 0x3F07. // 0b0011111100000111, or 0x3F07.
Jit->MOV(32, R(EAX), Imm32(0x3F07)); Jit->MOV(32, R(RSCRATCH), Imm32(0x3F07));
Jit->AND(32, R(EAX), M(((char *)&GQR(quantreg)) + 2)); Jit->AND(32, R(RSCRATCH), M(((char *)&GQR(quantreg)) + 2));
Jit->MOVZX(32, 8, EDX, R(AL)); Jit->OR(32, R(RSCRATCH), Imm8(w << 3));
Jit->OR(32, R(EDX), Imm8(w << 3));
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); Jit->MOV(32, R(RSCRATCH2), regLocForInst(RI, getOp1(I)));
Jit->CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedLoadQuantized))); Jit->CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedLoadQuantized)));
Jit->MOVAPD(reg, R(XMM0)); Jit->MOVAPD(reg, R(XMM0));
RI.fregs[reg] = I; RI.fregs[reg] = I;
regNormalRegClear(RI, I); regNormalRegClear(RI, I);
@ -1603,15 +1603,15 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
} }
case StoreSingle: case StoreSingle:
{ {
regSpill(RI, EAX); regSpill(RI, RSCRATCH);
const OpArg loc1 = fregLocForInst(RI, getOp1(I)); const OpArg loc1 = fregLocForInst(RI, getOp1(I));
if (loc1.IsSimpleReg()) if (loc1.IsSimpleReg())
Jit->MOVD_xmm(R(EAX), loc1.GetSimpleReg()); Jit->MOVD_xmm(R(RSCRATCH), loc1.GetSimpleReg());
else else
Jit->MOV(32, R(EAX), loc1); Jit->MOV(32, R(RSCRATCH), loc1);
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I))); Jit->MOV(32, R(RSCRATCH2), regLocForInst(RI, getOp2(I)));
RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); RI.Jit->SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 32, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM);
if (RI.IInfo[I - RI.FirstI] & 4) if (RI.IInfo[I - RI.FirstI] & 4)
fregClearInst(RI, getOp1(I)); fregClearInst(RI, getOp1(I));
if (RI.IInfo[I - RI.FirstI] & 8) if (RI.IInfo[I - RI.FirstI] & 8)
@ -1620,14 +1620,14 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
} }
case StoreDouble: case StoreDouble:
{ {
regSpill(RI, EAX); regSpill(RI, RSCRATCH);
OpArg value = fregLocForInst(RI, getOp1(I)); OpArg value = fregLocForInst(RI, getOp1(I));
OpArg address = regLocForInst(RI, getOp2(I)); OpArg address = regLocForInst(RI, getOp2(I));
Jit->MOVAPD(XMM0, value); Jit->MOVAPD(XMM0, value);
Jit->MOVQ_xmm(R(RAX), XMM0); Jit->MOVQ_xmm(R(RSCRATCH), XMM0);
Jit->MOV(32, R(ECX), address); Jit->MOV(32, R(RSCRATCH2), address);
RI.Jit->SafeWriteRegToReg(RAX, ECX, 64, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); RI.Jit->SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 64, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM);
if (RI.IInfo[I - RI.FirstI] & 4) if (RI.IInfo[I - RI.FirstI] & 4)
fregClearInst(RI, getOp1(I)); fregClearInst(RI, getOp1(I));
@ -1637,16 +1637,16 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
} }
case StorePaired: case StorePaired:
{ {
regSpill(RI, EAX); regSpill(RI, RSCRATCH);
regSpill(RI, EDX); regSpill(RI, RSCRATCH2);
u32 quantreg = *I >> 24; u32 quantreg = *I >> 24;
Jit->MOV(32, R(EAX), Imm32(0x3F07)); Jit->MOV(32, R(RSCRATCH), Imm32(0x3F07));
Jit->AND(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_GQR0 + quantreg])); Jit->AND(32, R(RSCRATCH), PPCSTATE(spr[SPR_GQR0 + quantreg]));
Jit->MOVZX(32, 8, EDX, R(AL)); Jit->MOVZX(32, 8, RSCRATCH2, R(RSCRATCH));
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I))); Jit->MOV(32, R(RSCRATCH2), regLocForInst(RI, getOp2(I)));
Jit->MOVAPD(XMM0, fregLocForInst(RI, getOp1(I))); Jit->MOVAPD(XMM0, fregLocForInst(RI, getOp1(I)));
Jit->CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedStoreQuantized))); Jit->CALLptr(MScaled(RSCRATCH2, SCALE_8, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedStoreQuantized)));
if (RI.IInfo[I - RI.FirstI] & 4) if (RI.IInfo[I - RI.FirstI] & 4)
fregClearInst(RI, getOp1(I)); fregClearInst(RI, getOp1(I));
if (RI.IInfo[I - RI.FirstI] & 8) if (RI.IInfo[I - RI.FirstI] & 8)
@ -1778,7 +1778,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
X64Reg reg = fregFindFreeReg(RI); X64Reg reg = fregFindFreeReg(RI);
unsigned ppcreg = *I >> 8; unsigned ppcreg = *I >> 8;
Jit->MOVAPD(reg, M(&PowerPC::ppcState.ps[ppcreg])); Jit->MOVAPD(reg, PPCSTATE(ps[ppcreg]));
RI.fregs[reg] = I; RI.fregs[reg] = I;
break; break;
} }
@ -1790,21 +1790,21 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
X64Reg reg = fregFindFreeReg(RI); X64Reg reg = fregFindFreeReg(RI);
unsigned ppcreg = *I >> 8; unsigned ppcreg = *I >> 8;
char *p = (char*)&(PowerPC::ppcState.ps[ppcreg][0]); char *p = (char*)&(PowerPC::ppcState.ps[ppcreg][0]);
Jit->MOV(32, R(ECX), M(p+4)); Jit->MOV(32, R(RSCRATCH2), M(p+4));
Jit->AND(32, R(ECX), Imm32(0x7ff00000)); Jit->AND(32, R(RSCRATCH2), Imm32(0x7ff00000));
Jit->CMP(32, R(ECX), Imm32(0x38000000)); Jit->CMP(32, R(RSCRATCH2), Imm32(0x38000000));
FixupBranch ok = Jit->J_CC(CC_AE); FixupBranch ok = Jit->J_CC(CC_AE);
Jit->AND(32, M(p+4), Imm32(0x80000000)); Jit->AND(32, M(p+4), Imm32(0x80000000));
Jit->MOV(32, M(p), Imm32(0)); Jit->MOV(32, M(p), Imm32(0));
Jit->SetJumpTarget(ok); Jit->SetJumpTarget(ok);
Jit->MOVAPD(reg, M(&PowerPC::ppcState.ps[ppcreg])); Jit->MOVAPD(reg, PPCSTATE(ps[ppcreg]));
RI.fregs[reg] = I; RI.fregs[reg] = I;
break; break;
} }
case StoreFReg: case StoreFReg:
{ {
unsigned ppcreg = *I >> 16; unsigned ppcreg = *I >> 16;
Jit->MOVAPD(M(&PowerPC::ppcState.ps[ppcreg]), Jit->MOVAPD(PPCSTATE(ps[ppcreg]),
fregEnsureInReg(RI, getOp1(I))); fregEnsureInReg(RI, getOp1(I)));
fregNormalRegClear(RI, I); fregNormalRegClear(RI, I);
break; break;
@ -1911,17 +1911,17 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
Jit->MOVSD(M(isSNANTemp[1]), XMM0); Jit->MOVSD(M(isSNANTemp[1]), XMM0);
} }
Jit->ABI_CallFunction((void*)checkIsSNAN); Jit->ABI_CallFunction((void*)checkIsSNAN);
Jit->TEST(8, R(EAX), R(EAX)); Jit->TEST(8, R(ABI_RETURN), R(ABI_RETURN));
FixupBranch ok = Jit->J_CC(CC_Z); FixupBranch ok = Jit->J_CC(CC_Z);
Jit->OR(32, M(&FPSCR), Imm32(FPSCR_FX)); // FPSCR.FX = 1; Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX)); // FPSCR.FX = 1;
Jit->OR(32, M(&FPSCR), Imm32(FPSCR_VXSNAN)); // FPSCR.Hex |= mask; Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_VXSNAN)); // FPSCR.Hex |= mask;
Jit->TEST(32, M(&FPSCR), Imm32(FPSCR_VE)); Jit->TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_VE));
FixupBranch finish0 = Jit->J_CC(CC_NZ); FixupBranch finish0 = Jit->J_CC(CC_NZ);
Jit->OR(32, M(&FPSCR), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask; Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask;
FixupBranch finish1 = Jit->J(); FixupBranch finish1 = Jit->J();
Jit->SetJumpTarget(ok); Jit->SetJumpTarget(ok);
Jit->OR(32, M(&FPSCR), Imm32(FPSCR_FX)); // FPSCR.FX = 1; Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX)); // FPSCR.FX = 1;
Jit->OR(32, M(&FPSCR), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask; Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask;
Jit->SetJumpTarget(finish0); Jit->SetJumpTarget(finish0);
Jit->SetJumpTarget(finish1); Jit->SetJumpTarget(finish1);
} }
@ -1940,10 +1940,10 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
Jit->MOVSD(M(isSNANTemp[1]), XMM0); Jit->MOVSD(M(isSNANTemp[1]), XMM0);
} }
Jit->ABI_CallFunction((void*)checkIsSNAN); Jit->ABI_CallFunction((void*)checkIsSNAN);
Jit->TEST(8, R(EAX), R(EAX)); Jit->TEST(8, R(ABI_RETURN), R(ABI_RETURN));
FixupBranch finish = Jit->J_CC(CC_Z); FixupBranch finish = Jit->J_CC(CC_Z);
Jit->OR(32, M(&FPSCR), Imm32(FPSCR_FX)); // FPSCR.FX = 1; Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX)); // FPSCR.FX = 1;
Jit->OR(32, M(&FPSCR), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask; Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask;
Jit->SetJumpTarget(finish); Jit->SetJumpTarget(finish);
} }
@ -2094,7 +2094,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
RI.Jit->Cleanup(); // is it needed? RI.Jit->Cleanup(); // is it needed?
Jit->ABI_CallFunction((void *)&PowerPC::OnIdleIL); Jit->ABI_CallFunction((void *)&PowerPC::OnIdleIL);
Jit->MOV(32, M(&PC), Imm32(ibuild->GetImmValue( getOp2(I) ))); Jit->MOV(32, PPCSTATE(pc), Imm32(ibuild->GetImmValue( getOp2(I) )));
Jit->WriteExceptionExit(); Jit->WriteExceptionExit();
Jit->SetJumpTarget(cont); Jit->SetJumpTarget(cont);
@ -2179,7 +2179,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
{ {
unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); unsigned InstLoc = ibuild->GetImmValue(getOp1(I));
Jit->ABI_CallFunction((void *)&CoreTiming::Idle); Jit->ABI_CallFunction((void *)&CoreTiming::Idle);
Jit->MOV(32, M(&PC), Imm32(InstLoc)); Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc));
Jit->WriteExceptionExit(); Jit->WriteExceptionExit();
break; break;
} }
@ -2187,15 +2187,15 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
{ {
unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); unsigned InstLoc = ibuild->GetImmValue(getOp1(I));
Jit->LOCK(); Jit->LOCK();
Jit->OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_SYSCALL)); Jit->OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_SYSCALL));
Jit->MOV(32, M(&PC), Imm32(InstLoc + 4)); Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc + 4));
Jit->WriteExceptionExit(); Jit->WriteExceptionExit();
break; break;
} }
case InterpreterBranch: case InterpreterBranch:
{ {
Jit->MOV(32, R(EAX), M(&NPC)); Jit->MOV(32, R(RSCRATCH), PPCSTATE(npc));
Jit->WriteExitDestInOpArg(R(EAX)); Jit->WriteExitDestInOpArg(R(RSCRATCH));
break; break;
} }
case RFIExit: case RFIExit:
@ -2203,31 +2203,31 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
// See Interpreter rfi for details // See Interpreter rfi for details
const u32 mask = 0x87C0FFFF; const u32 mask = 0x87C0FFFF;
// MSR = (MSR & ~mask) | (SRR1 & mask); // MSR = (MSR & ~mask) | (SRR1 & mask);
Jit->MOV(32, R(EAX), M(&MSR)); Jit->MOV(32, R(RSCRATCH), PPCSTATE(msr));
Jit->MOV(32, R(ECX), M(&SRR1)); Jit->MOV(32, R(RSCRATCH2), PPCSTATE_SRR1);
Jit->AND(32, R(EAX), Imm32(~mask)); Jit->AND(32, R(RSCRATCH), Imm32(~mask));
Jit->AND(32, R(ECX), Imm32(mask)); Jit->AND(32, R(RSCRATCH2), Imm32(mask));
Jit->OR(32, R(EAX), R(ECX)); Jit->OR(32, R(RSCRATCH), R(RSCRATCH2));
// MSR &= 0xFFFBFFFF; // Mask used to clear the bit MSR[13] // MSR &= 0xFFFBFFFF; // Mask used to clear the bit MSR[13]
Jit->AND(32, R(EAX), Imm32(0xFFFBFFFF)); Jit->AND(32, R(RSCRATCH), Imm32(0xFFFBFFFF));
Jit->MOV(32, M(&MSR), R(EAX)); Jit->MOV(32, PPCSTATE(msr), R(RSCRATCH));
// NPC = SRR0; // NPC = SRR0;
Jit->MOV(32, R(EAX), M(&SRR0)); Jit->MOV(32, R(RSCRATCH), PPCSTATE_SRR0);
Jit->WriteRfiExitDestInOpArg(R(EAX)); Jit->WriteRfiExitDestInOpArg(R(RSCRATCH));
break; break;
} }
case FPExceptionCheck: case FPExceptionCheck:
{ {
unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); unsigned InstLoc = ibuild->GetImmValue(getOp1(I));
//This instruction uses FPU - needs to add FP exception bailout //This instruction uses FPU - needs to add FP exception bailout
Jit->TEST(32, M(&PowerPC::ppcState.msr), Imm32(1 << 13)); // Test FP enabled bit Jit->TEST(32, PPCSTATE(msr), Imm32(1 << 13)); // Test FP enabled bit
FixupBranch b1 = Jit->J_CC(CC_NZ); FixupBranch b1 = Jit->J_CC(CC_NZ);
// If a FPU exception occurs, the exception handler will read // If a FPU exception occurs, the exception handler will read
// from PC. Update PC with the latest value in case that happens. // from PC. Update PC with the latest value in case that happens.
Jit->MOV(32, M(&PC), Imm32(InstLoc)); Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc));
Jit->SUB(32, M(&PowerPC::ppcState.downcount), Imm32(Jit->js.downcountAmount)); Jit->SUB(32, PPCSTATE(downcount), Imm32(Jit->js.downcountAmount));
Jit->OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); Jit->OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE));
Jit->WriteExceptionExit(); Jit->WriteExceptionExit();
Jit->SetJumpTarget(b1); Jit->SetJumpTarget(b1);
break; break;
@ -2235,12 +2235,12 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
case DSIExceptionCheck: case DSIExceptionCheck:
{ {
unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); unsigned InstLoc = ibuild->GetImmValue(getOp1(I));
Jit->TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_DSI)); Jit->TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
FixupBranch noMemException = Jit->J_CC(CC_Z); FixupBranch noMemException = Jit->J_CC(CC_Z);
// If a memory exception occurs, the exception handler will read // If a memory exception occurs, the exception handler will read
// from PC. Update PC with the latest value in case that happens. // from PC. Update PC with the latest value in case that happens.
Jit->MOV(32, M(&PC), Imm32(InstLoc)); Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc));
Jit->WriteExceptionExit(); Jit->WriteExceptionExit();
Jit->SetJumpTarget(noMemException); Jit->SetJumpTarget(noMemException);
break; break;
@ -2250,12 +2250,12 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); unsigned InstLoc = ibuild->GetImmValue(getOp1(I));
// Address of instruction could not be translated // Address of instruction could not be translated
Jit->MOV(32, M(&NPC), Imm32(InstLoc)); Jit->MOV(32, PPCSTATE(npc), Imm32(InstLoc));
Jit->OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_ISI)); Jit->OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_ISI));
// Remove the invalid instruction from the icache, forcing a recompile // Remove the invalid instruction from the icache, forcing a recompile
Jit->MOV(64, R(RAX), ImmPtr(jit->GetBlockCache()->GetICachePtr(InstLoc))); Jit->MOV(64, R(RSCRATCH), ImmPtr(jit->GetBlockCache()->GetICachePtr(InstLoc)));
Jit->MOV(32, MatR(RAX), Imm32(JIT_ICACHE_INVALID_WORD)); Jit->MOV(32, MatR(RSCRATCH), Imm32(JIT_ICACHE_INVALID_WORD));
Jit->WriteExceptionExit(); Jit->WriteExceptionExit();
break; break;
} }
@ -2263,16 +2263,16 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
{ {
unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); unsigned InstLoc = ibuild->GetImmValue(getOp1(I));
Jit->TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_ISI | EXCEPTION_PROGRAM | EXCEPTION_SYSCALL | EXCEPTION_FPU_UNAVAILABLE | EXCEPTION_DSI | EXCEPTION_ALIGNMENT)); Jit->TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_ISI | EXCEPTION_PROGRAM | EXCEPTION_SYSCALL | EXCEPTION_FPU_UNAVAILABLE | EXCEPTION_DSI | EXCEPTION_ALIGNMENT));
FixupBranch clearInt = Jit->J_CC(CC_NZ); FixupBranch clearInt = Jit->J_CC(CC_NZ);
Jit->TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_EXTERNAL_INT)); Jit->TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_EXTERNAL_INT));
FixupBranch noExtException = Jit->J_CC(CC_Z); FixupBranch noExtException = Jit->J_CC(CC_Z);
Jit->TEST(32, M((void *)&PowerPC::ppcState.msr), Imm32(0x0008000)); Jit->TEST(32, PPCSTATE(msr), Imm32(0x0008000));
FixupBranch noExtIntEnable = Jit->J_CC(CC_Z); FixupBranch noExtIntEnable = Jit->J_CC(CC_Z);
Jit->TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH)); Jit->TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH));
FixupBranch noCPInt = Jit->J_CC(CC_Z); FixupBranch noCPInt = Jit->J_CC(CC_Z);
Jit->MOV(32, M(&PC), Imm32(InstLoc)); Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc));
Jit->WriteExceptionExit(); Jit->WriteExceptionExit();
Jit->SetJumpTarget(noCPInt); Jit->SetJumpTarget(noCPInt);
@ -2285,7 +2285,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
{ {
unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); unsigned InstLoc = ibuild->GetImmValue(getOp1(I));
Jit->MOV(32, M(&PC), Imm32(InstLoc)); Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc));
Jit->ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckBreakPoints)); Jit->ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckBreakPoints));
Jit->TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); Jit->TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF));
FixupBranch noBreakpoint = Jit->J_CC(CC_Z); FixupBranch noBreakpoint = Jit->J_CC(CC_Z);

View File

@ -313,15 +313,15 @@ void JitIL::WriteCallInterpreter(UGeckoInstruction inst)
{ {
if (js.isLastInstruction) if (js.isLastInstruction)
{ {
MOV(32, M(&PC), Imm32(js.compilerPC)); MOV(32, PPCSTATE(pc), Imm32(js.compilerPC));
MOV(32, M(&NPC), Imm32(js.compilerPC + 4)); MOV(32, PPCSTATE(npc), Imm32(js.compilerPC + 4));
} }
Interpreter::_interpreterInstruction instr = GetInterpreterOp(inst); Interpreter::_interpreterInstruction instr = GetInterpreterOp(inst);
ABI_CallFunctionC((void*)instr, inst.hex); ABI_CallFunctionC((void*)instr, inst.hex);
if (js.isLastInstruction) if (js.isLastInstruction)
{ {
MOV(32, R(EAX), M(&NPC)); MOV(32, R(RSCRATCH), PPCSTATE(npc));
WriteRfiExitDestInOpArg(R(EAX)); WriteRfiExitDestInOpArg(R(RSCRATCH));
} }
} }
@ -341,8 +341,8 @@ void JitIL::FallBackToInterpreter(UGeckoInstruction _inst)
void JitIL::HLEFunction(UGeckoInstruction _inst) void JitIL::HLEFunction(UGeckoInstruction _inst)
{ {
ABI_CallFunctionCC((void*)&HLE::Execute, js.compilerPC, _inst.hex); ABI_CallFunctionCC((void*)&HLE::Execute, js.compilerPC, _inst.hex);
MOV(32, R(EAX), M(&NPC)); MOV(32, R(RSCRATCH), PPCSTATE(npc));
WriteExitDestInOpArg(R(EAX)); WriteExitDestInOpArg(R(RSCRATCH));
} }
void JitIL::DoNothing(UGeckoInstruction _inst) void JitIL::DoNothing(UGeckoInstruction _inst)
@ -398,7 +398,7 @@ void JitIL::WriteExit(u32 destination)
{ {
ABI_CallFunction((void *)JitILProfiler::End); ABI_CallFunction((void *)JitILProfiler::End);
} }
SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
//If nobody has taken care of this yet (this can be removed when all branches are done) //If nobody has taken care of this yet (this can be removed when all branches are done)
JitBlock *b = js.curBlock; JitBlock *b = js.curBlock;
@ -417,7 +417,7 @@ void JitIL::WriteExit(u32 destination)
} }
else else
{ {
MOV(32, M(&PC), Imm32(destination)); MOV(32, PPCSTATE(pc), Imm32(destination));
JMP(asm_routines.dispatcher, true); JMP(asm_routines.dispatcher, true);
} }
b->linkData.push_back(linkData); b->linkData.push_back(linkData);
@ -425,27 +425,27 @@ void JitIL::WriteExit(u32 destination)
void JitIL::WriteExitDestInOpArg(const Gen::OpArg& arg) void JitIL::WriteExitDestInOpArg(const Gen::OpArg& arg)
{ {
MOV(32, M(&PC), arg); MOV(32, PPCSTATE(pc), arg);
Cleanup(); Cleanup();
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling) if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling)
{ {
ABI_CallFunction((void *)JitILProfiler::End); ABI_CallFunction((void *)JitILProfiler::End);
} }
SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
JMP(asm_routines.dispatcher, true); JMP(asm_routines.dispatcher, true);
} }
void JitIL::WriteRfiExitDestInOpArg(const Gen::OpArg& arg) void JitIL::WriteRfiExitDestInOpArg(const Gen::OpArg& arg)
{ {
MOV(32, M(&PC), arg); MOV(32, PPCSTATE(pc), arg);
MOV(32, M(&NPC), arg); MOV(32, PPCSTATE(npc), arg);
Cleanup(); Cleanup();
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling) if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling)
{ {
ABI_CallFunction((void *)JitILProfiler::End); ABI_CallFunction((void *)JitILProfiler::End);
} }
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions)); ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions));
SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
JMP(asm_routines.dispatcher, true); JMP(asm_routines.dispatcher, true);
} }
@ -456,10 +456,10 @@ void JitIL::WriteExceptionExit()
{ {
ABI_CallFunction((void *)JitILProfiler::End); ABI_CallFunction((void *)JitILProfiler::End);
} }
MOV(32, R(EAX), M(&PC)); MOV(32, R(EAX), PPCSTATE(pc));
MOV(32, M(&NPC), R(EAX)); MOV(32, PPCSTATE(npc), R(EAX));
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions)); ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions));
SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
JMP(asm_routines.dispatcher, true); JMP(asm_routines.dispatcher, true);
} }
@ -548,7 +548,7 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
// Downcount flag check. The last block decremented downcounter, and the flag should still be available. // Downcount flag check. The last block decremented downcounter, and the flag should still be available.
FixupBranch skip = J_CC(CC_NBE); FixupBranch skip = J_CC(CC_NBE);
MOV(32, M(&PC), Imm32(js.blockStart)); MOV(32, PPCSTATE(pc), Imm32(js.blockStart));
JMP(asm_routines.doTiming, true); // downcount hit zero - go doTiming. JMP(asm_routines.doTiming, true); // downcount hit zero - go doTiming.
SetJumpTarget(skip); SetJumpTarget(skip);
@ -561,13 +561,13 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
if (js.fpa.any) if (js.fpa.any)
{ {
// This block uses FPU - needs to add FP exception bailout // This block uses FPU - needs to add FP exception bailout
TEST(32, M(&PowerPC::ppcState.msr), Imm32(1 << 13)); //Test FP enabled bit TEST(32, PPCSTATE(msr), Imm32(1 << 13)); //Test FP enabled bit
FixupBranch b1 = J_CC(CC_NZ); FixupBranch b1 = J_CC(CC_NZ);
// If a FPU exception occurs, the exception handler will read // If a FPU exception occurs, the exception handler will read
// from PC. Update PC with the latest value in case that happens. // from PC. Update PC with the latest value in case that happens.
MOV(32, M(&PC), Imm32(js.blockStart)); MOV(32, PPCSTATE(pc), Imm32(js.blockStart));
OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE));
WriteExceptionExit(); WriteExceptionExit();
SetJumpTarget(b1); SetJumpTarget(b1);
@ -635,7 +635,7 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
HLEFunction(function); HLEFunction(function);
if (type == HLE::HLE_HOOK_REPLACE) if (type == HLE::HLE_HOOK_REPLACE)
{ {
MOV(32, R(EAX), M(&NPC)); MOV(32, R(EAX), PPCSTATE(npc));
jit->js.downcountAmount += jit->js.st.numCycles; jit->js.downcountAmount += jit->js.st.numCycles;
WriteExitDestInOpArg(R(EAX)); WriteExitDestInOpArg(R(EAX));
break; break;

View File

@ -9,8 +9,13 @@
#include "Core/PowerPC/JitCommon/JitAsmCommon.h" #include "Core/PowerPC/JitCommon/JitAsmCommon.h"
#include "Core/PowerPC/JitCommon/JitBase.h" #include "Core/PowerPC/JitCommon/JitBase.h"
#define QUANTIZED_REGS_TO_SAVE (ABI_ALL_CALLER_SAVED & ~((1 << RAX) | (1 << RCX) | (1 << RDX) | \ #define QUANTIZED_REGS_TO_SAVE \
(1 << (XMM0+16)) | (1 << (XMM1+16)))) (ABI_ALL_CALLER_SAVED & ~(\
(1 << RSCRATCH) | \
(1 << RSCRATCH2) | \
(1 << RSCRATCH_EXTRA)| \
(1 << (XMM0+16)) | \
(1 << (XMM1+16))))
using namespace Gen; using namespace Gen;
@ -18,19 +23,15 @@ static int temp32;
void CommonAsmRoutines::GenFifoWrite(int size) void CommonAsmRoutines::GenFifoWrite(int size)
{ {
// Assume value in ABI_PARAM1 // Assume value in RSCRATCH2
PUSH(ESI); PUSH(ESI);
if (size != 32) MOV(32, R(RSCRATCH), Imm32((u32)(u64)GPFifo::m_gatherPipe));
PUSH(EDX);
MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount)); MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
SwapAndStore(size, MComplex(RAX, RSI, 1, 0), ABI_PARAM1); SwapAndStore(size, MComplex(RSCRATCH, ESI, 1, 0), RSCRATCH2);
ADD(32, R(ESI), Imm8(size >> 3)); ADD(32, R(ESI), Imm8(size >> 3));
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI)); MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
if (size != 32)
POP(EDX);
POP(ESI); POP(ESI);
RET(); RET();
} }
@ -39,15 +40,13 @@ void CommonAsmRoutines::GenFifoFloatWrite()
{ {
// Assume value in XMM0 // Assume value in XMM0
PUSH(ESI); PUSH(ESI);
PUSH(EDX);
MOVSS(M(&temp32), XMM0); MOVSS(M(&temp32), XMM0);
MOV(32, R(EDX), M(&temp32)); MOV(32, R(RSCRATCH2), M(&temp32));
MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe)); MOV(32, R(RSCRATCH), Imm32((u32)(u64)GPFifo::m_gatherPipe));
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount)); MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
SwapAndStore(32, MComplex(RAX, RSI, 1, 0), EDX); SwapAndStore(32, MComplex(RSCRATCH, RSI, 1, 0), RSCRATCH2);
ADD(32, R(ESI), Imm8(4)); ADD(32, R(ESI), Imm8(4));
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI)); MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
POP(EDX);
POP(ESI); POP(ESI);
RET(); RET();
} }
@ -55,58 +54,58 @@ void CommonAsmRoutines::GenFifoFloatWrite()
void CommonAsmRoutines::GenFrsqrte() void CommonAsmRoutines::GenFrsqrte()
{ {
// Assume input in XMM0. // Assume input in XMM0.
// This function clobbers EAX, ECX, and EDX. // This function clobbers all three RSCRATCH.
MOVQ_xmm(R(RAX), XMM0); MOVQ_xmm(R(RSCRATCH), XMM0);
// Negative and zero inputs set an exception and take the complex path. // Negative and zero inputs set an exception and take the complex path.
TEST(64, R(RAX), R(RAX)); TEST(64, R(RSCRATCH), R(RSCRATCH));
FixupBranch zero = J_CC(CC_Z, true); FixupBranch zero = J_CC(CC_Z, true);
FixupBranch negative = J_CC(CC_S, true); FixupBranch negative = J_CC(CC_S, true);
MOV(64, R(RCX), R(RAX)); MOV(64, R(RSCRATCH_EXTRA), R(RSCRATCH));
SHR(64, R(RCX), Imm8(52)); SHR(64, R(RSCRATCH_EXTRA), Imm8(52));
// Zero and max exponents (non-normal floats) take the complex path. // Zero and max exponents (non-normal floats) take the complex path.
FixupBranch complex1 = J_CC(CC_Z, true); FixupBranch complex1 = J_CC(CC_Z, true);
CMP(32, R(ECX), Imm32(0x7FF)); CMP(32, R(RSCRATCH_EXTRA), Imm32(0x7FF));
FixupBranch complex2 = J_CC(CC_E, true); FixupBranch complex2 = J_CC(CC_E, true);
SUB(32, R(ECX), Imm32(0x3FD)); SUB(32, R(RSCRATCH_EXTRA), Imm32(0x3FD));
SAR(32, R(ECX), Imm8(1)); SAR(32, R(RSCRATCH_EXTRA), Imm8(1));
MOV(32, R(EDX), Imm32(0x3FF)); MOV(32, R(RSCRATCH2), Imm32(0x3FF));
SUB(32, R(EDX), R(ECX)); SUB(32, R(RSCRATCH2), R(RSCRATCH_EXTRA));
SHL(64, R(RDX), Imm8(52)); // exponent = ((0x3FFLL << 52) - ((exponent - (0x3FELL << 52)) / 2)) & (0x7FFLL << 52); SHL(64, R(RSCRATCH2), Imm8(52)); // exponent = ((0x3FFLL << 52) - ((exponent - (0x3FELL << 52)) / 2)) & (0x7FFLL << 52);
MOV(64, R(RCX), R(RAX)); MOV(64, R(RSCRATCH_EXTRA), R(RSCRATCH));
SHR(64, R(RCX), Imm8(48)); SHR(64, R(RSCRATCH_EXTRA), Imm8(48));
AND(32, R(ECX), Imm8(0x1F)); AND(32, R(RSCRATCH_EXTRA), Imm8(0x1F));
XOR(32, R(ECX), Imm8(0x10)); // int index = i / 2048 + (odd_exponent ? 16 : 0); XOR(32, R(RSCRATCH_EXTRA), Imm8(0x10)); // int index = i / 2048 + (odd_exponent ? 16 : 0);
SHR(64, R(RAX), Imm8(37)); SHR(64, R(RSCRATCH), Imm8(37));
AND(32, R(EAX), Imm32(0x7FF)); AND(32, R(RSCRATCH), Imm32(0x7FF));
IMUL(32, EAX, MScaled(RCX, SCALE_4, (u32)(u64)MathUtil::frsqrte_expected_dec)); IMUL(32, RSCRATCH, MScaled(RSCRATCH_EXTRA, SCALE_4, (u32)(u64)MathUtil::frsqrte_expected_dec));
MOV(32, R(ECX), MScaled(RCX, SCALE_4, (u32)(u64)MathUtil::frsqrte_expected_base)); MOV(32, R(RSCRATCH_EXTRA), MScaled(RSCRATCH_EXTRA, SCALE_4, (u32)(u64)MathUtil::frsqrte_expected_base));
SUB(32, R(ECX), R(EAX)); SUB(32, R(RSCRATCH_EXTRA), R(RSCRATCH));
SHL(64, R(RCX), Imm8(26)); SHL(64, R(RSCRATCH_EXTRA), Imm8(26));
OR(64, R(RDX), R(RCX)); // vali |= (s64)(frsqrte_expected_base[index] - frsqrte_expected_dec[index] * (i % 2048)) << 26; OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(frsqrte_expected_base[index] - frsqrte_expected_dec[index] * (i % 2048)) << 26;
MOVQ_xmm(XMM0, R(RDX)); MOVQ_xmm(XMM0, R(RSCRATCH2));
RET(); RET();
// Exception flags for zero input. // Exception flags for zero input.
SetJumpTarget(zero); SetJumpTarget(zero);
TEST(32, M(&FPSCR), Imm32(FPSCR_ZX)); TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_ZX));
FixupBranch skip_set_fx1 = J_CC(CC_NZ); FixupBranch skip_set_fx1 = J_CC(CC_NZ);
OR(32, M(&FPSCR), Imm32(FPSCR_FX)); OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX));
SetJumpTarget(skip_set_fx1); SetJumpTarget(skip_set_fx1);
OR(32, M(&FPSCR), Imm32(FPSCR_ZX)); OR(32, PPCSTATE(fpscr), Imm32(FPSCR_ZX));
FixupBranch complex3 = J(); FixupBranch complex3 = J();
// Exception flags for negative input. // Exception flags for negative input.
SetJumpTarget(negative); SetJumpTarget(negative);
TEST(32, M(&FPSCR), Imm32(FPSCR_VXSQRT)); TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_VXSQRT));
FixupBranch skip_set_fx2 = J_CC(CC_NZ); FixupBranch skip_set_fx2 = J_CC(CC_NZ);
OR(32, M(&FPSCR), Imm32(FPSCR_FX)); OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX));
SetJumpTarget(skip_set_fx2); SetJumpTarget(skip_set_fx2);
OR(32, M(&FPSCR), Imm32(FPSCR_VXSQRT)); OR(32, PPCSTATE(fpscr), Imm32(FPSCR_VXSQRT));
SetJumpTarget(complex1); SetJumpTarget(complex1);
SetJumpTarget(complex2); SetJumpTarget(complex2);
@ -120,53 +119,53 @@ void CommonAsmRoutines::GenFrsqrte()
void CommonAsmRoutines::GenFres() void CommonAsmRoutines::GenFres()
{ {
// Assume input in XMM0. // Assume input in XMM0.
// This function clobbers EAX, ECX, and EDX. // This function clobbers all three RSCRATCH.
MOVQ_xmm(R(RAX), XMM0); MOVQ_xmm(R(RSCRATCH), XMM0);
// Zero inputs set an exception and take the complex path. // Zero inputs set an exception and take the complex path.
TEST(64, R(RAX), R(RAX)); TEST(64, R(RSCRATCH), R(RSCRATCH));
FixupBranch zero = J_CC(CC_Z); FixupBranch zero = J_CC(CC_Z);
MOV(64, R(RCX), R(RAX)); MOV(64, R(RSCRATCH_EXTRA), R(RSCRATCH));
SHR(64, R(RCX), Imm8(52)); SHR(64, R(RSCRATCH_EXTRA), Imm8(52));
MOV(32, R(EDX), R(ECX)); MOV(32, R(RSCRATCH2), R(RSCRATCH_EXTRA));
AND(32, R(ECX), Imm32(0x7FF)); // exp AND(32, R(RSCRATCH_EXTRA), Imm32(0x7FF)); // exp
AND(32, R(EDX), Imm32(0x800)); // sign AND(32, R(RSCRATCH2), Imm32(0x800)); // sign
CMP(32, R(ECX), Imm32(895)); CMP(32, R(RSCRATCH_EXTRA), Imm32(895));
// Take the complex path for very large/small exponents. // Take the complex path for very large/small exponents.
FixupBranch complex1 = J_CC(CC_L); FixupBranch complex1 = J_CC(CC_L);
CMP(32, R(ECX), Imm32(1149)); CMP(32, R(RSCRATCH_EXTRA), Imm32(1149));
FixupBranch complex2 = J_CC(CC_GE); FixupBranch complex2 = J_CC(CC_GE);
SUB(32, R(ECX), Imm32(0x7FD)); SUB(32, R(RSCRATCH_EXTRA), Imm32(0x7FD));
NEG(32, R(ECX)); NEG(32, R(RSCRATCH_EXTRA));
OR(32, R(ECX), R(EDX)); OR(32, R(RSCRATCH_EXTRA), R(RSCRATCH2));
SHL(64, R(RCX), Imm8(52)); // vali = sign | exponent SHL(64, R(RSCRATCH_EXTRA), Imm8(52)); // vali = sign | exponent
MOV(64, R(RDX), R(RAX)); MOV(64, R(RSCRATCH2), R(RSCRATCH));
SHR(64, R(RAX), Imm8(37)); SHR(64, R(RSCRATCH), Imm8(37));
SHR(64, R(RDX), Imm8(47)); SHR(64, R(RSCRATCH2), Imm8(47));
AND(32, R(EAX), Imm32(0x3FF)); // i % 1024 AND(32, R(RSCRATCH), Imm32(0x3FF)); // i % 1024
AND(32, R(RDX), Imm8(0x1F)); // i / 1024 AND(32, R(RSCRATCH2), Imm8(0x1F)); // i / 1024
IMUL(32, EAX, MScaled(RDX, SCALE_4, (u32)(u64)MathUtil::fres_expected_dec)); IMUL(32, RSCRATCH, MScaled(RSCRATCH2, SCALE_4, (u32)(u64)MathUtil::fres_expected_dec));
ADD(32, R(EAX), Imm8(1)); ADD(32, R(RSCRATCH), Imm8(1));
SHR(32, R(EAX), Imm8(1)); SHR(32, R(RSCRATCH), Imm8(1));
MOV(32, R(EDX), MScaled(RDX, SCALE_4, (u32)(u64)MathUtil::fres_expected_base)); MOV(32, R(RSCRATCH2), MScaled(RSCRATCH2, SCALE_4, (u32)(u64)MathUtil::fres_expected_base));
SUB(32, R(EDX), R(EAX)); SUB(32, R(RSCRATCH2), R(RSCRATCH));
SHL(64, R(RDX), Imm8(29)); SHL(64, R(RSCRATCH2), Imm8(29));
OR(64, R(RDX), R(RCX)); // vali |= (s64)(fres_expected_base[i / 1024] - (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2) << 29 OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(fres_expected_base[i / 1024] - (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2) << 29
MOVQ_xmm(XMM0, R(RDX)); MOVQ_xmm(XMM0, R(RSCRATCH2));
RET(); RET();
// Exception flags for zero input. // Exception flags for zero input.
SetJumpTarget(zero); SetJumpTarget(zero);
TEST(32, M(&FPSCR), Imm32(FPSCR_ZX)); TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_ZX));
FixupBranch skip_set_fx1 = J_CC(CC_NZ); FixupBranch skip_set_fx1 = J_CC(CC_NZ);
OR(32, M(&FPSCR), Imm32(FPSCR_FX)); OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX));
SetJumpTarget(skip_set_fx1); SetJumpTarget(skip_set_fx1);
OR(32, M(&FPSCR), Imm32(FPSCR_ZX)); OR(32, PPCSTATE(fpscr), Imm32(FPSCR_ZX));
SetJumpTarget(complex1); SetJumpTarget(complex1);
SetJumpTarget(complex2); SetJumpTarget(complex2);
@ -253,21 +252,21 @@ void CommonAsmRoutines::GenQuantizedStores()
SHUFPS(XMM0, R(XMM0), 1); SHUFPS(XMM0, R(XMM0), 1);
MOVQ_xmm(M(&psTemp[0]), XMM0); MOVQ_xmm(M(&psTemp[0]), XMM0);
TEST(32, R(ECX), Imm32(0x0C000000)); TEST(32, R(RSCRATCH_EXTRA), Imm32(0x0C000000));
FixupBranch too_complex = J_CC(CC_NZ, true); FixupBranch too_complex = J_CC(CC_NZ, true);
MOV(64, R(RAX), M(&psTemp[0])); MOV(64, R(RSCRATCH), M(&psTemp[0]));
SwapAndStore(64, MComplex(RBX, RCX, SCALE_1, 0), RAX); SwapAndStore(64, MComplex(RMEM, RSCRATCH_EXTRA, SCALE_1, 0), RSCRATCH);
FixupBranch skip_complex = J(true); FixupBranch skip_complex = J(true);
SetJumpTarget(too_complex); SetJumpTarget(too_complex);
ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true); ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true);
ABI_CallFunctionR((void *)&WriteDual32, RCX); ABI_CallFunctionR((void *)&WriteDual32, RSCRATCH_EXTRA);
ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true); ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true);
SetJumpTarget(skip_complex); SetJumpTarget(skip_complex);
RET(); RET();
const u8* storePairedU8 = AlignCode4(); const u8* storePairedU8 = AlignCode4();
SHR(32, R(EAX), Imm8(6)); SHR(32, R(RSCRATCH), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1)); PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1)); MULPS(XMM0, R(XMM1));
#ifdef QUANTIZE_OVERFLOW_SAFE #ifdef QUANTIZE_OVERFLOW_SAFE
@ -278,14 +277,14 @@ void CommonAsmRoutines::GenQuantizedStores()
CVTTPS2DQ(XMM0, R(XMM0)); CVTTPS2DQ(XMM0, R(XMM0));
PACKSSDW(XMM0, R(XMM0)); PACKSSDW(XMM0, R(XMM0));
PACKUSWB(XMM0, R(XMM0)); PACKUSWB(XMM0, R(XMM0));
MOVD_xmm(R(EAX), XMM0); MOVD_xmm(R(RSCRATCH), XMM0);
SafeWriteRegToReg(AX, ECX, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
RET(); RET();
const u8* storePairedS8 = AlignCode4(); const u8* storePairedS8 = AlignCode4();
SHR(32, R(EAX), Imm8(6)); SHR(32, R(RSCRATCH), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1)); PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1)); MULPS(XMM0, R(XMM1));
#ifdef QUANTIZE_OVERFLOW_SAFE #ifdef QUANTIZE_OVERFLOW_SAFE
@ -296,15 +295,15 @@ void CommonAsmRoutines::GenQuantizedStores()
CVTTPS2DQ(XMM0, R(XMM0)); CVTTPS2DQ(XMM0, R(XMM0));
PACKSSDW(XMM0, R(XMM0)); PACKSSDW(XMM0, R(XMM0));
PACKSSWB(XMM0, R(XMM0)); PACKSSWB(XMM0, R(XMM0));
MOVD_xmm(R(EAX), XMM0); MOVD_xmm(R(RSCRATCH), XMM0);
SafeWriteRegToReg(AX, ECX, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
RET(); RET();
const u8* storePairedU16 = AlignCode4(); const u8* storePairedU16 = AlignCode4();
SHR(32, R(EAX), Imm8(6)); SHR(32, R(RSCRATCH), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1)); PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1)); MULPS(XMM0, R(XMM1));
@ -319,18 +318,18 @@ void CommonAsmRoutines::GenQuantizedStores()
MOVQ_xmm(M(psTemp), XMM0); MOVQ_xmm(M(psTemp), XMM0);
// place ps[0] into the higher word, ps[1] into the lower // place ps[0] into the higher word, ps[1] into the lower
// so no need in ROL after BSWAP // so no need in ROL after BSWAP
MOVZX(32, 16, EAX, M((char*)psTemp + 0)); MOVZX(32, 16, RSCRATCH, M((char*)psTemp + 0));
SHL(32, R(EAX), Imm8(16)); SHL(32, R(RSCRATCH), Imm8(16));
MOV(16, R(AX), M((char*)psTemp + 4)); MOV(16, R(RSCRATCH), M((char*)psTemp + 4));
BSWAP(32, EAX); BSWAP(32, RSCRATCH);
SafeWriteRegToReg(EAX, ECX, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
RET(); RET();
const u8* storePairedS16 = AlignCode4(); const u8* storePairedS16 = AlignCode4();
SHR(32, R(EAX), Imm8(6)); SHR(32, R(RSCRATCH), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS));
// SHUFPS or UNPCKLPS might be a better choice here. The last one might just be an alias though. // SHUFPS or UNPCKLPS might be a better choice here. The last one might just be an alias though.
PUNPCKLDQ(XMM1, R(XMM1)); PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1)); MULPS(XMM0, R(XMM1));
@ -341,10 +340,10 @@ void CommonAsmRoutines::GenQuantizedStores()
#endif #endif
CVTTPS2DQ(XMM0, R(XMM0)); CVTTPS2DQ(XMM0, R(XMM0));
PACKSSDW(XMM0, R(XMM0)); PACKSSDW(XMM0, R(XMM0));
MOVD_xmm(R(EAX), XMM0); MOVD_xmm(R(RSCRATCH), XMM0);
BSWAP(32, EAX); BSWAP(32, RSCRATCH);
ROL(32, R(EAX), Imm8(16)); ROL(32, R(RSCRATCH), Imm8(16));
SafeWriteRegToReg(EAX, ECX, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
RET(); RET();
@ -369,7 +368,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores()
// Easy! // Easy!
const u8* storeSingleFloat = AlignCode4(); const u8* storeSingleFloat = AlignCode4();
SafeWriteF32ToReg(XMM0, ECX, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); SafeWriteF32ToReg(XMM0, RSCRATCH_EXTRA, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
RET(); RET();
/* /*
if (cpu_info.bSSSE3) if (cpu_info.bSSSE3)
@ -377,56 +376,56 @@ void CommonAsmRoutines::GenQuantizedSingleStores()
PSHUFB(XMM0, M((void *)pbswapShuffle2x4)); PSHUFB(XMM0, M((void *)pbswapShuffle2x4));
// TODO: SafeWriteFloat // TODO: SafeWriteFloat
MOVSS(M(&psTemp[0]), XMM0); MOVSS(M(&psTemp[0]), XMM0);
MOV(32, R(EAX), M(&psTemp[0])); MOV(32, R(RSCRATCH), M(&psTemp[0]));
SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 32, 0, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
} }
else else
{ {
MOVSS(M(&psTemp[0]), XMM0); MOVSS(M(&psTemp[0]), XMM0);
MOV(32, R(EAX), M(&psTemp[0])); MOV(32, R(RSCRATCH), M(&psTemp[0]));
SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 32, 0, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
}*/ }*/
const u8* storeSingleU8 = AlignCode4(); // Used by MKWii const u8* storeSingleU8 = AlignCode4(); // Used by MKWii
SHR(32, R(EAX), Imm8(6)); SHR(32, R(RSCRATCH), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS));
MULSS(XMM0, R(XMM1)); MULSS(XMM0, R(XMM1));
PXOR(XMM1, R(XMM1)); PXOR(XMM1, R(XMM1));
MAXSS(XMM0, R(XMM1)); MAXSS(XMM0, R(XMM1));
MINSS(XMM0, M((void *)&m_255)); MINSS(XMM0, M((void *)&m_255));
CVTTSS2SI(EAX, R(XMM0)); CVTTSS2SI(RSCRATCH, R(XMM0));
SafeWriteRegToReg(AL, ECX, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
RET(); RET();
const u8* storeSingleS8 = AlignCode4(); const u8* storeSingleS8 = AlignCode4();
SHR(32, R(EAX), Imm8(6)); SHR(32, R(RSCRATCH), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS));
MULSS(XMM0, R(XMM1)); MULSS(XMM0, R(XMM1));
MAXSS(XMM0, M((void *)&m_m128)); MAXSS(XMM0, M((void *)&m_m128));
MINSS(XMM0, M((void *)&m_127)); MINSS(XMM0, M((void *)&m_127));
CVTTSS2SI(EAX, R(XMM0)); CVTTSS2SI(RSCRATCH, R(XMM0));
SafeWriteRegToReg(AL, ECX, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
RET(); RET();
const u8* storeSingleU16 = AlignCode4(); // Used by MKWii const u8* storeSingleU16 = AlignCode4(); // Used by MKWii
SHR(32, R(EAX), Imm8(6)); SHR(32, R(RSCRATCH), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS));
MULSS(XMM0, R(XMM1)); MULSS(XMM0, R(XMM1));
PXOR(XMM1, R(XMM1)); PXOR(XMM1, R(XMM1));
MAXSS(XMM0, R(XMM1)); MAXSS(XMM0, R(XMM1));
MINSS(XMM0, M((void *)&m_65535)); MINSS(XMM0, M((void *)&m_65535));
CVTTSS2SI(EAX, R(XMM0)); CVTTSS2SI(RSCRATCH, R(XMM0));
SafeWriteRegToReg(EAX, ECX, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
RET(); RET();
const u8* storeSingleS16 = AlignCode4(); const u8* storeSingleS16 = AlignCode4();
SHR(32, R(EAX), Imm8(6)); SHR(32, R(RSCRATCH), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS));
MULSS(XMM0, R(XMM1)); MULSS(XMM0, R(XMM1));
MAXSS(XMM0, M((void *)&m_m32768)); MAXSS(XMM0, M((void *)&m_m32768));
MINSS(XMM0, M((void *)&m_32767)); MINSS(XMM0, M((void *)&m_32767));
CVTTSS2SI(EAX, R(XMM0)); CVTTSS2SI(RSCRATCH, R(XMM0));
SafeWriteRegToReg(EAX, ECX, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
RET(); RET();
singleStoreQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCode16())); singleStoreQuantized = reinterpret_cast<const u8**>(const_cast<u8*>(AlignCode16()));
@ -450,126 +449,126 @@ void CommonAsmRoutines::GenQuantizedLoads()
const u8* loadPairedFloatTwo = AlignCode4(); const u8* loadPairedFloatTwo = AlignCode4();
if (cpu_info.bSSSE3) if (cpu_info.bSSSE3)
{ {
MOVQ_xmm(XMM0, MComplex(RBX, RCX, 1, 0)); MOVQ_xmm(XMM0, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0));
PSHUFB(XMM0, M((void *)pbswapShuffle2x4)); PSHUFB(XMM0, M((void *)pbswapShuffle2x4));
} }
else else
{ {
LoadAndSwap(64, RCX, MComplex(RBX, RCX, 1, 0)); LoadAndSwap(64, RSCRATCH_EXTRA, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0));
ROL(64, R(RCX), Imm8(32)); ROL(64, R(RSCRATCH_EXTRA), Imm8(32));
MOVQ_xmm(XMM0, R(RCX)); MOVQ_xmm(XMM0, R(RSCRATCH_EXTRA));
} }
RET(); RET();
const u8* loadPairedFloatOne = AlignCode4(); const u8* loadPairedFloatOne = AlignCode4();
if (cpu_info.bSSSE3) if (cpu_info.bSSSE3)
{ {
MOVD_xmm(XMM0, MComplex(RBX, RCX, 1, 0)); MOVD_xmm(XMM0, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0));
PSHUFB(XMM0, M((void *)pbswapShuffle1x4)); PSHUFB(XMM0, M((void *)pbswapShuffle1x4));
UNPCKLPS(XMM0, M((void*)m_one)); UNPCKLPS(XMM0, M((void*)m_one));
} }
else else
{ {
LoadAndSwap(32, RCX, MComplex(RBX, RCX, 1, 0)); LoadAndSwap(32, RSCRATCH_EXTRA, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0));
MOVD_xmm(XMM0, R(RCX)); MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
UNPCKLPS(XMM0, M((void*)m_one)); UNPCKLPS(XMM0, M((void*)m_one));
} }
RET(); RET();
const u8* loadPairedU8Two = AlignCode4(); const u8* loadPairedU8Two = AlignCode4();
UnsafeLoadRegToRegNoSwap(ECX, ECX, 16, 0); UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0);
MOVD_xmm(XMM0, R(ECX)); MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
PXOR(XMM1, R(XMM1)); PXOR(XMM1, R(XMM1));
PUNPCKLBW(XMM0, R(XMM1)); PUNPCKLBW(XMM0, R(XMM1));
PUNPCKLWD(XMM0, R(XMM1)); PUNPCKLWD(XMM0, R(XMM1));
CVTDQ2PS(XMM0, R(XMM0)); CVTDQ2PS(XMM0, R(XMM0));
SHR(32, R(EAX), Imm8(6)); SHR(32, R(RSCRATCH), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1)); PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1)); MULPS(XMM0, R(XMM1));
RET(); RET();
const u8* loadPairedU8One = AlignCode4(); const u8* loadPairedU8One = AlignCode4();
UnsafeLoadRegToRegNoSwap(ECX, ECX, 8, 0); // ECX = 0x000000xx UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 8, 0); // RSCRATCH_EXTRA = 0x000000xx
MOVD_xmm(XMM0, R(ECX)); MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
CVTDQ2PS(XMM0, R(XMM0)); // Is CVTSI2SS better? CVTDQ2PS(XMM0, R(XMM0)); // Is CVTSI2SS better?
SHR(32, R(EAX), Imm8(6)); SHR(32, R(RSCRATCH), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS));
MULSS(XMM0, R(XMM1)); MULSS(XMM0, R(XMM1));
UNPCKLPS(XMM0, M((void*)m_one)); UNPCKLPS(XMM0, M((void*)m_one));
RET(); RET();
const u8* loadPairedS8Two = AlignCode4(); const u8* loadPairedS8Two = AlignCode4();
UnsafeLoadRegToRegNoSwap(ECX, ECX, 16, 0); UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0);
MOVD_xmm(XMM0, R(ECX)); MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
PUNPCKLBW(XMM0, R(XMM0)); PUNPCKLBW(XMM0, R(XMM0));
PUNPCKLWD(XMM0, R(XMM0)); PUNPCKLWD(XMM0, R(XMM0));
PSRAD(XMM0, 24); PSRAD(XMM0, 24);
CVTDQ2PS(XMM0, R(XMM0)); CVTDQ2PS(XMM0, R(XMM0));
SHR(32, R(EAX), Imm8(6)); SHR(32, R(RSCRATCH), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1)); PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1)); MULPS(XMM0, R(XMM1));
RET(); RET();
const u8* loadPairedS8One = AlignCode4(); const u8* loadPairedS8One = AlignCode4();
UnsafeLoadRegToRegNoSwap(ECX, ECX, 8, 0); UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 8, 0);
SHL(32, R(ECX), Imm8(24)); SHL(32, R(RSCRATCH_EXTRA), Imm8(24));
SAR(32, R(ECX), Imm8(24)); SAR(32, R(RSCRATCH_EXTRA), Imm8(24));
MOVD_xmm(XMM0, R(ECX)); MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
CVTDQ2PS(XMM0, R(XMM0)); CVTDQ2PS(XMM0, R(XMM0));
SHR(32, R(EAX), Imm8(6)); SHR(32, R(RSCRATCH), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS));
MULSS(XMM0, R(XMM1)); MULSS(XMM0, R(XMM1));
UNPCKLPS(XMM0, M((void*)m_one)); UNPCKLPS(XMM0, M((void*)m_one));
RET(); RET();
const u8* loadPairedU16Two = AlignCode4(); const u8* loadPairedU16Two = AlignCode4();
UnsafeLoadRegToReg(ECX, ECX, 32, 0, false); UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 32, 0, false);
ROL(32, R(ECX), Imm8(16)); ROL(32, R(RSCRATCH_EXTRA), Imm8(16));
MOVD_xmm(XMM0, R(ECX)); MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
PXOR(XMM1, R(XMM1)); PXOR(XMM1, R(XMM1));
PUNPCKLWD(XMM0, R(XMM1)); PUNPCKLWD(XMM0, R(XMM1));
CVTDQ2PS(XMM0, R(XMM0)); CVTDQ2PS(XMM0, R(XMM0));
SHR(32, R(EAX), Imm8(6)); SHR(32, R(RSCRATCH), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1)); PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1)); MULPS(XMM0, R(XMM1));
RET(); RET();
const u8* loadPairedU16One = AlignCode4(); const u8* loadPairedU16One = AlignCode4();
UnsafeLoadRegToReg(ECX, ECX, 32, 0, false); UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 32, 0, false);
SHR(32, R(ECX), Imm8(16)); SHR(32, R(RSCRATCH_EXTRA), Imm8(16));
MOVD_xmm(XMM0, R(ECX)); MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
CVTDQ2PS(XMM0, R(XMM0)); CVTDQ2PS(XMM0, R(XMM0));
SHR(32, R(EAX), Imm8(6)); SHR(32, R(RSCRATCH), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS));
MULSS(XMM0, R(XMM1)); MULSS(XMM0, R(XMM1));
UNPCKLPS(XMM0, M((void*)m_one)); UNPCKLPS(XMM0, M((void*)m_one));
RET(); RET();
const u8* loadPairedS16Two = AlignCode4(); const u8* loadPairedS16Two = AlignCode4();
UnsafeLoadRegToReg(ECX, ECX, 32, 0, false); UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 32, 0, false);
ROL(32, R(ECX), Imm8(16)); ROL(32, R(RSCRATCH_EXTRA), Imm8(16));
MOVD_xmm(XMM0, R(ECX)); MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
PUNPCKLWD(XMM0, R(XMM0)); PUNPCKLWD(XMM0, R(XMM0));
PSRAD(XMM0, 16); PSRAD(XMM0, 16);
CVTDQ2PS(XMM0, R(XMM0)); CVTDQ2PS(XMM0, R(XMM0));
SHR(32, R(EAX), Imm8(6)); SHR(32, R(RSCRATCH), Imm8(6));
AND(32, R(EAX), Imm32(0xFC)); AND(32, R(RSCRATCH), Imm32(0xFC));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1)); PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1)); MULPS(XMM0, R(XMM1));
RET(); RET();
const u8* loadPairedS16One = AlignCode4(); const u8* loadPairedS16One = AlignCode4();
UnsafeLoadRegToReg(ECX, ECX, 32, 0, false); UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 32, 0, false);
SAR(32, R(ECX), Imm8(16)); SAR(32, R(RSCRATCH_EXTRA), Imm8(16));
MOVD_xmm(XMM0, R(ECX)); MOVD_xmm(XMM0, R(RSCRATCH_EXTRA));
CVTDQ2PS(XMM0, R(XMM0)); CVTDQ2PS(XMM0, R(XMM0));
SHR(32, R(EAX), Imm8(6)); SHR(32, R(RSCRATCH), Imm8(6));
AND(32, R(EAX), Imm32(0xFC)); AND(32, R(RSCRATCH), Imm32(0xFC));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS));
MULSS(XMM0, R(XMM1)); MULSS(XMM0, R(XMM1));
UNPCKLPS(XMM0, M((void*)m_one)); UNPCKLPS(XMM0, M((void*)m_one));
RET(); RET();

View File

@ -19,9 +19,9 @@ public:
const u8 *dispatcher; const u8 *dispatcher;
const u8 *dispatcherNoCheck; const u8 *dispatcherNoCheck;
const u8 *dispatcherPcInEAX; const u8 *dispatcherPcInRSCRATCH;
const u8 *dispatchPcInEAX; const u8 *dispatchPcInRSCRATCH;
const u8 *doTiming; const u8 *doTiming;
const u8 *frsqrte; const u8 *frsqrte;
@ -31,14 +31,14 @@ public:
// In: ECX: Address to read from. // In: ECX: Address to read from.
// Out: XMM0: Bottom two 32-bit slots hold the read value, // Out: XMM0: Bottom two 32-bit slots hold the read value,
// converted to a pair of floats. // converted to a pair of floats.
// Trashes: EAX ECX EDX // Trashes: all three RSCRATCH
const u8 **pairedLoadQuantized; const u8 **pairedLoadQuantized;
// In: array index: GQR to use. // In: array index: GQR to use.
// In: ECX: Address to write to. // In: ECX: Address to write to.
// In: XMM0: Bottom two 32-bit slots hold the pair of floats to be written. // In: XMM0: Bottom two 32-bit slots hold the pair of floats to be written.
// Out: Nothing. // Out: Nothing.
// Trashes: EAX ECX EDX // Trashes: all three RSCRATCH
const u8 **pairedStoreQuantized; const u8 **pairedStoreQuantized;
// In: array index: GQR to use. // In: array index: GQR to use.

View File

@ -59,6 +59,7 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re
// It ought to be necessary to align the stack here. Since it seems to not // It ought to be necessary to align the stack here. Since it seems to not
// affect anybody, I'm not going to add it just to be completely safe about // affect anybody, I'm not going to add it just to be completely safe about
// performance. // performance.
ABI_PushRegistersAndAdjustStack(registersInUse, true);
if (addrReg != ABI_PARAM1) if (addrReg != ABI_PARAM1)
MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg)); MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg));
@ -66,7 +67,6 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re
if (info.displacement) if (info.displacement)
ADD(32, R(ABI_PARAM1), Imm32(info.displacement)); ADD(32, R(ABI_PARAM1), Imm32(info.displacement));
ABI_PushRegistersAndAdjustStack(registersInUse, true);
switch (info.operandSize) switch (info.operandSize)
{ {
case 4: case 4:
@ -74,7 +74,7 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re
break; break;
case 2: case 2:
CALL((void *)&Memory::Read_U16); CALL((void *)&Memory::Read_U16);
SHL(32, R(EAX), Imm8(16)); SHL(32, R(ABI_RETURN), Imm8(16));
break; break;
case 1: case 1:
CALL((void *)&Memory::Read_U8); CALL((void *)&Memory::Read_U8);
@ -84,11 +84,11 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re
if (info.signExtend && info.operandSize == 1) if (info.signExtend && info.operandSize == 1)
{ {
// Need to sign extend value from Read_U8. // Need to sign extend value from Read_U8.
MOVSX(32, 8, dataReg, R(EAX)); MOVSX(32, 8, dataReg, R(ABI_RETURN));
} }
else if (dataReg != EAX) else if (dataReg != EAX)
{ {
MOV(32, R(dataReg), R(EAX)); MOV(32, R(dataReg), R(ABI_RETURN));
} }
ABI_PopRegistersAndAdjustStack(registersInUse, true); ABI_PopRegistersAndAdjustStack(registersInUse, true);
@ -113,31 +113,17 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r
// check anyway. // check anyway.
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
MOV(32, M(&PC), Imm32(pc)); MOV(32, PPCSTATE(pc), Imm32(pc));
if (dataReg == ABI_PARAM2) ABI_PushRegistersAndAdjustStack(registersInUse, true);
PanicAlert("Incorrect use of SafeWriteRegToReg");
if (addrReg != ABI_PARAM1) MOVTwo(64, ABI_PARAM1, dataReg, ABI_PARAM2, addrReg, ABI_PARAM3);
{
if (ABI_PARAM1 != dataReg)
MOV(64, R(ABI_PARAM1), R((X64Reg)dataReg));
if (ABI_PARAM2 != addrReg)
MOV(64, R(ABI_PARAM2), R((X64Reg)addrReg));
}
else
{
if (ABI_PARAM2 != addrReg)
MOV(64, R(ABI_PARAM2), R((X64Reg)addrReg));
if (ABI_PARAM1 != dataReg)
MOV(64, R(ABI_PARAM1), R((X64Reg)dataReg));
}
if (info.displacement) if (info.displacement)
{ {
ADD(32, R(ABI_PARAM2), Imm32(info.displacement)); ADD(32, R(ABI_PARAM2), Imm32(info.displacement));
} }
ABI_PushRegistersAndAdjustStack(registersInUse, true);
switch (info.operandSize) switch (info.operandSize)
{ {
case 8: case 8:
@ -180,9 +166,9 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
return nullptr; return nullptr;
} }
if (info.otherReg != RBX) if (info.otherReg != RMEM)
{ {
PanicAlert("BackPatch : Base reg not RBX." PanicAlert("BackPatch : Base reg not RMEM."
"\n\nAttempted to access %08x.", emAddress); "\n\nAttempted to access %08x.", emAddress);
return nullptr; return nullptr;
} }

View File

@ -27,6 +27,23 @@
#include "Core/PowerPC/JitCommon/JitBackpatch.h" #include "Core/PowerPC/JitCommon/JitBackpatch.h"
#include "Core/PowerPC/JitCommon/JitCache.h" #include "Core/PowerPC/JitCommon/JitCache.h"
// TODO: find a better place for x86-specific stuff
// The following register assignments are common to Jit64 and Jit64IL:
// RSCRATCH and RSCRATCH2 are always scratch registers and can be used without
// limitation.
#define RSCRATCH RAX
#define RSCRATCH2 RDX
// RSCRATCH_EXTRA may be in the allocation order, so it has to be flushed
// before use.
#define RSCRATCH_EXTRA RCX
// RMEM points to the start of emulated memory.
#define RMEM RBX
// RCODE_POINTERS does what it says.
#define RCODE_POINTERS R15
// RPPCSTATE points to ppcState + 0x80. It's offset because we want to be able
// to address as much as possible in a one-byte offset form.
#define RPPCSTATE RBP
// Use these to control the instruction selection // Use these to control the instruction selection
// #define INSTRUCTION_START FallBackToInterpreter(inst); return; // #define INSTRUCTION_START FallBackToInterpreter(inst); return;
// #define INSTRUCTION_START PPCTables::CountInstruction(inst); // #define INSTRUCTION_START PPCTables::CountInstruction(inst);

View File

@ -370,6 +370,6 @@ using namespace Gen;
void JitBlockCache::WriteDestroyBlock(const u8* location, u32 address) void JitBlockCache::WriteDestroyBlock(const u8* location, u32 address)
{ {
XEmitter emit((u8 *)location); XEmitter emit((u8 *)location);
emit.MOV(32, M(&PC), Imm32(address)); emit.MOV(32, PPCSTATE(pc), Imm32(address));
emit.JMP(jit->GetAsmRoutines()->dispatcher, true); emit.JMP(jit->GetAsmRoutines()->dispatcher, true);
} }

View File

@ -5,7 +5,6 @@
#include <emmintrin.h> #include <emmintrin.h>
#include "Common/Common.h" #include "Common/Common.h"
#include "Common/CPUDetect.h"
#include "Common/MathUtil.h" #include "Common/MathUtil.h"
#include "Core/HW/MMIO.h" #include "Core/HW/MMIO.h"
@ -42,7 +41,7 @@ void EmuCodeBlock::SwapAndStore(int size, const Gen::OpArg& dst, Gen::X64Reg src
void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend) void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend)
{ {
MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset)); MOVZX(32, accessSize, reg_value, MComplex(RMEM, reg_addr, SCALE_1, offset));
if (accessSize == 32) if (accessSize == 32)
{ {
BSWAP(32, reg_value); BSWAP(32, reg_value);
@ -64,7 +63,7 @@ void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int acc
void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset) void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset)
{ {
MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset)); MOVZX(32, accessSize, reg_value, MComplex(RMEM, reg_addr, SCALE_1, offset));
} }
u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessSize, s32 offset, bool signExtend) u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessSize, s32 offset, bool signExtend)
@ -86,16 +85,16 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessS
offset = 0; offset = 0;
} }
memOperand = MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset); memOperand = MComplex(RMEM, opAddress.GetSimpleReg(), SCALE_1, offset);
} }
else if (opAddress.IsImm()) else if (opAddress.IsImm())
{ {
memOperand = MDisp(RBX, (opAddress.offset + offset) & 0x3FFFFFFF); memOperand = MDisp(RMEM, (opAddress.offset + offset) & 0x3FFFFFFF);
} }
else else
{ {
MOV(32, R(reg_value), opAddress); MOV(32, R(reg_value), opAddress);
memOperand = MComplex(RBX, reg_value, SCALE_1, offset); memOperand = MComplex(RMEM, reg_value, SCALE_1, offset);
} }
result = GetWritableCodePtr(); result = GetWritableCodePtr();
@ -130,7 +129,7 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessS
return result; return result;
} }
// Visitor that generates code to read a MMIO value to EAX. // Visitor that generates code to read a MMIO value.
template <typename T> template <typename T>
class MMIOReadCodeGenerator : public MMIO::ReadHandlingMethodVisitor<T> class MMIOReadCodeGenerator : public MMIO::ReadHandlingMethodVisitor<T>
{ {
@ -182,9 +181,9 @@ private:
void LoadAddrMaskToReg(int sbits, const void* ptr, u32 mask) void LoadAddrMaskToReg(int sbits, const void* ptr, u32 mask)
{ {
#ifdef _ARCH_64 #ifdef _ARCH_64
m_code->MOV(64, R(EAX), ImmPtr(ptr)); m_code->MOV(64, R(RSCRATCH), ImmPtr(ptr));
#else #else
m_code->MOV(32, R(EAX), ImmPtr(ptr)); m_code->MOV(32, R(RSCRATCH), ImmPtr(ptr));
#endif #endif
// If we do not need to mask, we can do the sign extend while loading // If we do not need to mask, we can do the sign extend while loading
// from memory. If masking is required, we have to first zero extend, // from memory. If masking is required, we have to first zero extend,
@ -192,11 +191,11 @@ private:
u32 all_ones = (1ULL << sbits) - 1; u32 all_ones = (1ULL << sbits) - 1;
if ((all_ones & mask) == all_ones) if ((all_ones & mask) == all_ones)
{ {
MoveOpArgToReg(sbits, MDisp(EAX, 0)); MoveOpArgToReg(sbits, MDisp(RSCRATCH, 0));
} }
else else
{ {
m_code->MOVZX(32, sbits, m_dst_reg, MDisp(EAX, 0)); m_code->MOVZX(32, sbits, m_dst_reg, MDisp(RSCRATCH, 0));
m_code->AND(32, R(m_dst_reg), Imm32(mask)); m_code->AND(32, R(m_dst_reg), Imm32(mask));
if (m_sign_extend) if (m_sign_extend)
m_code->MOVSX(32, sbits, m_dst_reg, R(m_dst_reg)); m_code->MOVSX(32, sbits, m_dst_reg, R(m_dst_reg));
@ -208,7 +207,7 @@ private:
m_code->ABI_PushRegistersAndAdjustStack(m_registers_in_use, false); m_code->ABI_PushRegistersAndAdjustStack(m_registers_in_use, false);
m_code->ABI_CallLambdaC(lambda, m_address); m_code->ABI_CallLambdaC(lambda, m_address);
m_code->ABI_PopRegistersAndAdjustStack(m_registers_in_use, false); m_code->ABI_PopRegistersAndAdjustStack(m_registers_in_use, false);
MoveOpArgToReg(sbits, R(EAX)); MoveOpArgToReg(sbits, R(ABI_RETURN));
} }
Gen::X64CodeBlock* m_code; Gen::X64CodeBlock* m_code;
@ -248,13 +247,11 @@ void EmuCodeBlock::MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value,
} }
} }
// Always clobbers EAX. Preserves the address.
// Preserves the value if the load fails and js.memcheck is enabled.
void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend, int flags) void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend, int flags)
{ {
if (!jit->js.memcheck) if (!jit->js.memcheck)
{ {
registersInUse &= ~(1 << RAX | 1 << reg_value); registersInUse &= ~(1 << reg_value);
} }
if (!Core::g_CoreStartupParameter.bMMU && if (!Core::g_CoreStartupParameter.bMMU &&
Core::g_CoreStartupParameter.bFastmem && Core::g_CoreStartupParameter.bFastmem &&
@ -323,11 +320,11 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
if (signExtend && accessSize < 32) if (signExtend && accessSize < 32)
{ {
// Need to sign extend values coming from the Read_U* functions. // Need to sign extend values coming from the Read_U* functions.
MOVSX(32, accessSize, reg_value, R(EAX)); MOVSX(32, accessSize, reg_value, R(ABI_RETURN));
} }
else if (reg_value != EAX) else if (reg_value != ABI_RETURN)
{ {
MOVZX(64, accessSize, reg_value, R(EAX)); MOVZX(64, accessSize, reg_value, R(ABI_RETURN));
} }
MEMCHECK_END MEMCHECK_END
@ -338,15 +335,15 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
OpArg addr_loc = opAddress; OpArg addr_loc = opAddress;
if (offset) if (offset)
{ {
addr_loc = R(EAX); addr_loc = R(RSCRATCH);
if (opAddress.IsSimpleReg()) if (opAddress.IsSimpleReg())
{ {
LEA(32, EAX, MDisp(opAddress.GetSimpleReg(), offset)); LEA(32, RSCRATCH, MDisp(opAddress.GetSimpleReg(), offset));
} }
else else
{ {
MOV(32, R(EAX), opAddress); MOV(32, R(RSCRATCH), opAddress);
ADD(32, R(EAX), Imm32(offset)); ADD(32, R(RSCRATCH), Imm32(offset));
} }
} }
TEST(32, addr_loc, Imm32(mem_mask)); TEST(32, addr_loc, Imm32(mem_mask));
@ -376,11 +373,11 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
if (signExtend && accessSize < 32) if (signExtend && accessSize < 32)
{ {
// Need to sign extend values coming from the Read_U* functions. // Need to sign extend values coming from the Read_U* functions.
MOVSX(32, accessSize, reg_value, R(EAX)); MOVSX(32, accessSize, reg_value, R(ABI_RETURN));
} }
else if (reg_value != EAX) else if (reg_value != ABI_RETURN)
{ {
MOVZX(64, accessSize, reg_value, R(EAX)); MOVZX(64, accessSize, reg_value, R(ABI_RETURN));
} }
MEMCHECK_END MEMCHECK_END
@ -395,13 +392,8 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap) u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap)
{ {
if (accessSize == 8 && reg_value >= 4)
{
PanicAlert("WARNING: likely incorrect use of UnsafeWriteRegToReg!");
}
u8* result = GetWritableCodePtr(); u8* result = GetWritableCodePtr();
OpArg dest = MComplex(RBX, reg_addr, SCALE_1, offset); OpArg dest = MComplex(RMEM, reg_addr, SCALE_1, offset);
if (swap) if (swap)
{ {
if (cpu_info.bMOVBE) if (cpu_info.bMOVBE)
@ -410,6 +402,7 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acc
} }
else else
{ {
if (accessSize > 8)
BSWAP(accessSize, reg_value); BSWAP(accessSize, reg_value);
result = GetWritableCodePtr(); result = GetWritableCodePtr();
MOV(accessSize, dest, R(reg_value)); MOV(accessSize, dest, R(reg_value));
@ -423,10 +416,8 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acc
return result; return result;
} }
// Destroys both arg registers
void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags) void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags)
{ {
registersInUse &= ~(1 << RAX);
if (!Core::g_CoreStartupParameter.bMMU && if (!Core::g_CoreStartupParameter.bMMU &&
Core::g_CoreStartupParameter.bFastmem && Core::g_CoreStartupParameter.bFastmem &&
!(flags & (SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_FASTMEM)) !(flags & (SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_FASTMEM))
@ -449,7 +440,17 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
} }
if (offset) if (offset)
{
if (flags & SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR)
{
LEA(32, RSCRATCH, MDisp(reg_addr, (u32)offset));
reg_addr = RSCRATCH;
}
else
{
ADD(32, R(reg_addr), Imm32((u32)offset)); ADD(32, R(reg_addr), Imm32((u32)offset));
}
}
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS; u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
@ -468,7 +469,7 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
TEST(32, R(reg_addr), Imm32(mem_mask)); TEST(32, R(reg_addr), Imm32(mem_mask));
FixupBranch fast = J_CC(CC_Z, true); FixupBranch fast = J_CC(CC_Z, true);
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
MOV(32, M(&PC), Imm32(jit->js.compilerPC)); MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC));
bool noProlog = (0 != (flags & SAFE_LOADSTORE_NO_PROLOG)); bool noProlog = (0 != (flags & SAFE_LOADSTORE_NO_PROLOG));
bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP); bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP);
ABI_PushRegistersAndAdjustStack(registersInUse, noProlog); ABI_PushRegistersAndAdjustStack(registersInUse, noProlog);
@ -494,20 +495,20 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
SetJumpTarget(exit); SetJumpTarget(exit);
} }
// Destroys both arg registers and EAX // Destroys the same as SafeWrite plus RSCRATCH. TODO: see if we can avoid temporaries here
void EmuCodeBlock::SafeWriteF32ToReg(X64Reg xmm_value, X64Reg reg_addr, s32 offset, u32 registersInUse, int flags) void EmuCodeBlock::SafeWriteF32ToReg(X64Reg xmm_value, X64Reg reg_addr, s32 offset, u32 registersInUse, int flags)
{ {
// TODO: PSHUFB might be faster if fastmem supported MOVSS. // TODO: PSHUFB might be faster if fastmem supported MOVSS.
MOVD_xmm(R(EAX), xmm_value); MOVD_xmm(R(RSCRATCH), xmm_value);
SafeWriteRegToReg(EAX, reg_addr, 32, offset, registersInUse, flags); SafeWriteRegToReg(RSCRATCH, reg_addr, 32, offset, registersInUse, flags);
} }
void EmuCodeBlock::WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap) void EmuCodeBlock::WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap)
{ {
if (swap) if (swap)
SwapAndStore(accessSize, MDisp(RBX, address & 0x3FFFFFFF), arg); SwapAndStore(accessSize, MDisp(RMEM, address & 0x3FFFFFFF), arg);
else else
MOV(accessSize, MDisp(RBX, address & 0x3FFFFFFF), R(arg)); MOV(accessSize, MDisp(RMEM, address & 0x3FFFFFFF), R(arg));
} }
void EmuCodeBlock::ForceSinglePrecisionS(X64Reg xmm) void EmuCodeBlock::ForceSinglePrecisionS(X64Reg xmm)
@ -584,20 +585,20 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
// Grab Exponent // Grab Exponent
PAND(XMM1, M((void *)&double_exponent)); PAND(XMM1, M((void *)&double_exponent));
PSRLQ(XMM1, 52); PSRLQ(XMM1, 52);
MOVD_xmm(R(EAX), XMM1); MOVD_xmm(R(RSCRATCH), XMM1);
// Check if the double is in the range of valid single subnormal // Check if the double is in the range of valid single subnormal
CMP(16, R(EAX), Imm16(896)); CMP(16, R(RSCRATCH), Imm16(896));
FixupBranch NoDenormalize = J_CC(CC_G); FixupBranch NoDenormalize = J_CC(CC_G);
CMP(16, R(EAX), Imm16(874)); CMP(16, R(RSCRATCH), Imm16(874));
FixupBranch NoDenormalize2 = J_CC(CC_L); FixupBranch NoDenormalize2 = J_CC(CC_L);
// Denormalise // Denormalise
// shift = (905 - Exponent) plus the 21 bit double to single shift // shift = (905 - Exponent) plus the 21 bit double to single shift
MOV(16, R(EAX), Imm16(905 + 21)); MOV(16, R(RSCRATCH), Imm16(905 + 21));
MOVD_xmm(XMM0, R(EAX)); MOVD_xmm(XMM0, R(RSCRATCH));
PSUBQ(XMM0, R(XMM1)); PSUBQ(XMM0, R(XMM1));
// xmm1 = fraction | 0x0010000000000000 // xmm1 = fraction | 0x0010000000000000
@ -648,12 +649,12 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
// Changing the FPU mode is very expensive, so we can't do that. // Changing the FPU mode is very expensive, so we can't do that.
// Here, check to see if the exponent is small enough that it will result in a denormal, and pass it to the x87 unit // Here, check to see if the exponent is small enough that it will result in a denormal, and pass it to the x87 unit
// if it is. // if it is.
MOVQ_xmm(R(RAX), src); MOVQ_xmm(R(RSCRATCH), src);
SHR(64, R(RAX), Imm8(55)); SHR(64, R(RSCRATCH), Imm8(55));
// Exponents 0x369 <= x <= 0x380 are denormal. This code accepts the range 0x368 <= x <= 0x387 // Exponents 0x369 <= x <= 0x380 are denormal. This code accepts the range 0x368 <= x <= 0x387
// to save an instruction, since diverting a few more floats to the slow path can't hurt much. // to save an instruction, since diverting a few more floats to the slow path can't hurt much.
SUB(8, R(AL), Imm8(0x6D)); SUB(8, R(RSCRATCH), Imm8(0x6D));
CMP(8, R(AL), Imm8(0x3)); CMP(8, R(RSCRATCH), Imm8(0x3));
FixupBranch x87Conversion = J_CC(CC_BE); FixupBranch x87Conversion = J_CC(CC_BE);
CVTSD2SS(dst, R(src)); CVTSD2SS(dst, R(src));
FixupBranch continue1 = J(); FixupBranch continue1 = J();
@ -674,7 +675,7 @@ void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr
{ {
// If the input isn't denormal, just do things the simple way -- otherwise, go through the x87 unit, which has // If the input isn't denormal, just do things the simple way -- otherwise, go through the x87 unit, which has
// flush-to-zero off. // flush-to-zero off.
X64Reg gprsrc = src_is_gpr ? src : EAX; X64Reg gprsrc = src_is_gpr ? src : RSCRATCH;
if (src_is_gpr) if (src_is_gpr)
{ {
MOVD_xmm(dst, R(src)); MOVD_xmm(dst, R(src));
@ -683,7 +684,7 @@ void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr
{ {
if (dst != src) if (dst != src)
MOVAPD(dst, R(src)); MOVAPD(dst, R(src));
MOVD_xmm(EAX, R(src)); MOVD_xmm(RSCRATCH, R(src));
} }
// A sneaky hack: floating-point zero is rather common and we don't want to confuse it for denormals and // A sneaky hack: floating-point zero is rather common and we don't want to confuse it for denormals and
// needlessly send it through the slow path. If we subtract 1 before doing the comparison, it turns // needlessly send it through the slow path. If we subtract 1 before doing the comparison, it turns
@ -718,19 +719,19 @@ static const u64 GC_ALIGNED16(psDoubleNoSign[2]) = {0x7FFFFFFFFFFFFFFFULL, 0};
// quite that necessary. // quite that necessary.
void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm) void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
{ {
AND(32, M(&FPSCR), Imm32(~FPRF_MASK)); AND(32, PPCSTATE(fpscr), Imm32(~FPRF_MASK));
FixupBranch continue1, continue2, continue3, continue4; FixupBranch continue1, continue2, continue3, continue4;
if (cpu_info.bSSE4_1) if (cpu_info.bSSE4_1)
{ {
MOVQ_xmm(R(RAX), xmm); MOVQ_xmm(R(RSCRATCH), xmm);
SHR(64, R(RAX), Imm8(63)); // Get the sign bit; almost all the branches need it. SHR(64, R(RSCRATCH), Imm8(63)); // Get the sign bit; almost all the branches need it.
PTEST(xmm, M((void*)psDoubleExp)); PTEST(xmm, M((void*)psDoubleExp));
FixupBranch maxExponent = J_CC(CC_C); FixupBranch maxExponent = J_CC(CC_C);
FixupBranch zeroExponent = J_CC(CC_Z); FixupBranch zeroExponent = J_CC(CC_Z);
// Nice normalized number: sign ? PPC_FPCLASS_NN : PPC_FPCLASS_PN; // Nice normalized number: sign ? PPC_FPCLASS_NN : PPC_FPCLASS_PN;
LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_NN - MathUtil::PPC_FPCLASS_PN, MathUtil::PPC_FPCLASS_PN)); LEA(32, RSCRATCH, MScaled(RSCRATCH, MathUtil::PPC_FPCLASS_NN - MathUtil::PPC_FPCLASS_PN, MathUtil::PPC_FPCLASS_PN));
continue1 = J(); continue1 = J();
SetJumpTarget(maxExponent); SetJumpTarget(maxExponent);
@ -738,12 +739,12 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
FixupBranch notNAN = J_CC(CC_Z); FixupBranch notNAN = J_CC(CC_Z);
// Max exponent + mantissa: PPC_FPCLASS_QNAN // Max exponent + mantissa: PPC_FPCLASS_QNAN
MOV(32, R(EAX), Imm32(MathUtil::PPC_FPCLASS_QNAN)); MOV(32, R(RSCRATCH), Imm32(MathUtil::PPC_FPCLASS_QNAN));
continue2 = J(); continue2 = J();
// Max exponent + no mantissa: sign ? PPC_FPCLASS_NINF : PPC_FPCLASS_PINF; // Max exponent + no mantissa: sign ? PPC_FPCLASS_NINF : PPC_FPCLASS_PINF;
SetJumpTarget(notNAN); SetJumpTarget(notNAN);
LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_NINF - MathUtil::PPC_FPCLASS_PINF, MathUtil::PPC_FPCLASS_NINF)); LEA(32, RSCRATCH, MScaled(RSCRATCH, MathUtil::PPC_FPCLASS_NINF - MathUtil::PPC_FPCLASS_PINF, MathUtil::PPC_FPCLASS_NINF));
continue3 = J(); continue3 = J();
SetJumpTarget(zeroExponent); SetJumpTarget(zeroExponent);
@ -751,72 +752,72 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
FixupBranch zero = J_CC(CC_Z); FixupBranch zero = J_CC(CC_Z);
// No exponent + mantissa: sign ? PPC_FPCLASS_ND : PPC_FPCLASS_PD; // No exponent + mantissa: sign ? PPC_FPCLASS_ND : PPC_FPCLASS_PD;
LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_ND - MathUtil::PPC_FPCLASS_PD, MathUtil::PPC_FPCLASS_ND)); LEA(32, RSCRATCH, MScaled(RSCRATCH, MathUtil::PPC_FPCLASS_ND - MathUtil::PPC_FPCLASS_PD, MathUtil::PPC_FPCLASS_ND));
continue4 = J(); continue4 = J();
// Zero: sign ? PPC_FPCLASS_NZ : PPC_FPCLASS_PZ; // Zero: sign ? PPC_FPCLASS_NZ : PPC_FPCLASS_PZ;
SetJumpTarget(zero); SetJumpTarget(zero);
SHL(32, R(EAX), Imm8(4)); SHL(32, R(RSCRATCH), Imm8(4));
ADD(32, R(EAX), Imm8(MathUtil::PPC_FPCLASS_PZ)); ADD(32, R(RSCRATCH), Imm8(MathUtil::PPC_FPCLASS_PZ));
} }
else else
{ {
MOVQ_xmm(R(RAX), xmm); MOVQ_xmm(R(RSCRATCH), xmm);
TEST(64, R(RAX), M((void*)psDoubleExp)); TEST(64, R(RSCRATCH), M((void*)psDoubleExp));
FixupBranch zeroExponent = J_CC(CC_Z); FixupBranch zeroExponent = J_CC(CC_Z);
AND(64, R(RAX), M((void*)psDoubleNoSign)); AND(64, R(RSCRATCH), M((void*)psDoubleNoSign));
CMP(64, R(RAX), M((void*)psDoubleExp)); CMP(64, R(RSCRATCH), M((void*)psDoubleExp));
FixupBranch nan = J_CC(CC_G); // This works because if the sign bit is set, RAX is negative FixupBranch nan = J_CC(CC_G); // This works because if the sign bit is set, RSCRATCH is negative
FixupBranch infinity = J_CC(CC_E); FixupBranch infinity = J_CC(CC_E);
MOVQ_xmm(R(RAX), xmm); MOVQ_xmm(R(RSCRATCH), xmm);
SHR(64, R(RAX), Imm8(63)); SHR(64, R(RSCRATCH), Imm8(63));
LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_NN - MathUtil::PPC_FPCLASS_PN, MathUtil::PPC_FPCLASS_PN)); LEA(32, RSCRATCH, MScaled(RSCRATCH, MathUtil::PPC_FPCLASS_NN - MathUtil::PPC_FPCLASS_PN, MathUtil::PPC_FPCLASS_PN));
continue1 = J(); continue1 = J();
SetJumpTarget(nan); SetJumpTarget(nan);
MOVQ_xmm(R(RAX), xmm); MOVQ_xmm(R(RSCRATCH), xmm);
SHR(64, R(RAX), Imm8(63)); SHR(64, R(RSCRATCH), Imm8(63));
MOV(32, R(EAX), Imm32(MathUtil::PPC_FPCLASS_QNAN)); MOV(32, R(RSCRATCH), Imm32(MathUtil::PPC_FPCLASS_QNAN));
continue2 = J(); continue2 = J();
SetJumpTarget(infinity); SetJumpTarget(infinity);
MOVQ_xmm(R(RAX), xmm); MOVQ_xmm(R(RSCRATCH), xmm);
SHR(64, R(RAX), Imm8(63)); SHR(64, R(RSCRATCH), Imm8(63));
LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_NINF - MathUtil::PPC_FPCLASS_PINF, MathUtil::PPC_FPCLASS_NINF)); LEA(32, RSCRATCH, MScaled(RSCRATCH, MathUtil::PPC_FPCLASS_NINF - MathUtil::PPC_FPCLASS_PINF, MathUtil::PPC_FPCLASS_NINF));
continue3 = J(); continue3 = J();
SetJumpTarget(zeroExponent); SetJumpTarget(zeroExponent);
TEST(64, R(RAX), R(RAX)); TEST(64, R(RSCRATCH), R(RSCRATCH));
FixupBranch zero = J_CC(CC_Z); FixupBranch zero = J_CC(CC_Z);
SHR(64, R(RAX), Imm8(63)); SHR(64, R(RSCRATCH), Imm8(63));
LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_ND - MathUtil::PPC_FPCLASS_PD, MathUtil::PPC_FPCLASS_ND)); LEA(32, RSCRATCH, MScaled(RSCRATCH, MathUtil::PPC_FPCLASS_ND - MathUtil::PPC_FPCLASS_PD, MathUtil::PPC_FPCLASS_ND));
continue4 = J(); continue4 = J();
SetJumpTarget(zero); SetJumpTarget(zero);
SHR(64, R(RAX), Imm8(63)); SHR(64, R(RSCRATCH), Imm8(63));
SHL(32, R(EAX), Imm8(4)); SHL(32, R(RSCRATCH), Imm8(4));
ADD(32, R(EAX), Imm8(MathUtil::PPC_FPCLASS_PZ)); ADD(32, R(RSCRATCH), Imm8(MathUtil::PPC_FPCLASS_PZ));
} }
SetJumpTarget(continue1); SetJumpTarget(continue1);
SetJumpTarget(continue2); SetJumpTarget(continue2);
SetJumpTarget(continue3); SetJumpTarget(continue3);
SetJumpTarget(continue4); SetJumpTarget(continue4);
SHL(32, R(EAX), Imm8(FPRF_SHIFT)); SHL(32, R(RSCRATCH), Imm8(FPRF_SHIFT));
OR(32, M(&FPSCR), R(EAX)); OR(32, PPCSTATE(fpscr), R(RSCRATCH));
} }
void EmuCodeBlock::JitClearCA() void EmuCodeBlock::JitClearCA()
{ {
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0 AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0
} }
void EmuCodeBlock::JitSetCA() void EmuCodeBlock::JitSetCA()
{ {
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1 OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1
} }
void EmuCodeBlock::JitClearCAOV(bool oe) void EmuCodeBlock::JitClearCAOV(bool oe)
{ {
if (oe) if (oe)
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK & ~XER_OV_MASK)); //XER.CA, XER.OV = 0 AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_CA_MASK & ~XER_OV_MASK)); //XER.CA, XER.OV = 0
else else
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0 AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0
} }

View File

@ -6,6 +6,7 @@
#include <unordered_map> #include <unordered_map>
#include "Common/CPUDetect.h"
#include "Common/x64Emitter.h" #include "Common/x64Emitter.h"
namespace MMIO { class Mapping; } namespace MMIO { class Mapping; }
@ -13,13 +14,23 @@ namespace MMIO { class Mapping; }
#define MEMCHECK_START \ #define MEMCHECK_START \
Gen::FixupBranch memException; \ Gen::FixupBranch memException; \
if (jit->js.memcheck) \ if (jit->js.memcheck) \
{ TEST(32, Gen::M((void *)&PowerPC::ppcState.Exceptions), Gen::Imm32(EXCEPTION_DSI)); \ { TEST(32, PPCSTATE(Exceptions), Gen::Imm32(EXCEPTION_DSI)); \
memException = J_CC(Gen::CC_NZ, true); } memException = J_CC(Gen::CC_NZ, true); }
#define MEMCHECK_END \ #define MEMCHECK_END \
if (jit->js.memcheck) \ if (jit->js.memcheck) \
SetJumpTarget(memException); SetJumpTarget(memException);
// We offset by 0x80 because the range of one byte memory offsets is
// -0x80..0x7f.
#define PPCSTATE(x) MDisp(RPPCSTATE, \
(int) ((char *) &PowerPC::ppcState.x - (char *) &PowerPC::ppcState) - 0x80)
// In case you want to disable the ppcstate register:
// #define PPCSTATE(x) M((void*) &PowerPC::ppcState.x)
#define PPCSTATE_LR PPCSTATE(spr[SPR_LR])
#define PPCSTATE_CTR PPCSTATE(spr[SPR_CTR])
#define PPCSTATE_SRR0 PPCSTATE(spr[SPR_SRR0])
#define PPCSTATE_SRR1 PPCSTATE(spr[SPR_SRR1])
// Like XCodeBlock but has some utilities for memory access. // Like XCodeBlock but has some utilities for memory access.
class EmuCodeBlock : public Gen::X64CodeBlock class EmuCodeBlock : public Gen::X64CodeBlock
@ -42,11 +53,21 @@ public:
{ {
SAFE_LOADSTORE_NO_SWAP = 1, SAFE_LOADSTORE_NO_SWAP = 1,
SAFE_LOADSTORE_NO_PROLOG = 2, SAFE_LOADSTORE_NO_PROLOG = 2,
SAFE_LOADSTORE_NO_FASTMEM = 4 SAFE_LOADSTORE_NO_FASTMEM = 4,
SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR = 8
}; };
void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend, int flags = 0); void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend, int flags = 0);
// Clobbers RSCRATCH or reg_addr depending on the relevant flag. Preserves
// reg_value if the load fails and js.memcheck is enabled.
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags = 0); void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags = 0);
// applies to safe and unsafe WriteRegToReg
bool WriteClobbersRegValue(int accessSize, bool swap)
{
return swap && !cpu_info.bMOVBE && accessSize > 8;
}
void SafeWriteF32ToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, s32 offset, u32 registersInUse, int flags = 0); void SafeWriteF32ToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, s32 offset, u32 registersInUse, int flags = 0);
void WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap = false); void WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap = false);
@ -58,9 +79,8 @@ public:
void ForceSinglePrecisionP(Gen::X64Reg xmm); void ForceSinglePrecisionP(Gen::X64Reg xmm);
void Force25BitPrecision(Gen::X64Reg xmm, Gen::X64Reg tmp); void Force25BitPrecision(Gen::X64Reg xmm, Gen::X64Reg tmp);
// EAX might get trashed // RSCRATCH might get trashed
void ConvertSingleToDouble(Gen::X64Reg dst, Gen::X64Reg src, bool src_is_gpr = false); void ConvertSingleToDouble(Gen::X64Reg dst, Gen::X64Reg src, bool src_is_gpr = false);
// EAX might get trashed
void ConvertDoubleToSingle(Gen::X64Reg dst, Gen::X64Reg src); void ConvertDoubleToSingle(Gen::X64Reg dst, Gen::X64Reg src);
void SetFPRF(Gen::X64Reg xmm); void SetFPRF(Gen::X64Reg xmm);
protected: protected:

View File

@ -40,7 +40,7 @@ instruction and generates code. Dead code elimination works in this step,
by simply skipping unused instructions. The register allocator is a dumb, by simply skipping unused instructions. The register allocator is a dumb,
greedy allocator: at the moment, it's really a bit too dumb, but it's greedy allocator: at the moment, it's really a bit too dumb, but it's
actually not as bad as it looks: unless a block is relatively long, spills actually not as bad as it looks: unless a block is relatively long, spills
are rarely needed. ECX is used as a scratch register: requiring a scratch are rarely needed. EDX is used as a scratch register: requiring a scratch
register isn't ideal, but the register allocator is too dumb to handle register isn't ideal, but the register allocator is too dumb to handle
instructions that need a specific register at the moment. instructions that need a specific register at the moment.

View File

@ -321,7 +321,7 @@ void JitILBase::divwux(UGeckoInstruction inst)
#if 0 #if 0
int a = inst.RA, b = inst.RB, d = inst.RD; int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.FlushLockX(EDX); gpr.FlushLockX(RSCRATCH1);
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
if (d != a && d != b) if (d != a && d != b)
@ -333,11 +333,11 @@ void JitILBase::divwux(UGeckoInstruction inst)
gpr.LoadToX64(d, true, true); gpr.LoadToX64(d, true, true);
} }
MOV(32, R(EAX), gpr.R(a)); MOV(32, R(RSCRATCH), gpr.R(a));
XOR(32, R(EDX), R(EDX)); XOR(32, R(RSCRATCH2), R(RSCRATCH));
gpr.KillImmediate(b); gpr.KillImmediate(b);
DIV(32, gpr.R(b)); DIV(32, gpr.R(b));
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(RSCRATCH));
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();

View File

@ -137,19 +137,13 @@ void JitILBase::dcbz(UGeckoInstruction inst)
return; return;
} }
INSTRUCTION_START; INSTRUCTION_START;
MOV(32, R(EAX), gpr.R(inst.RB)); MOV(32, R(RSCRATCH), gpr.R(inst.RB));
if (inst.RA) if (inst.RA)
ADD(32, R(EAX), gpr.R(inst.RA)); ADD(32, R(RSCRATCH), gpr.R(inst.RA));
AND(32, R(EAX), Imm32(~31)); AND(32, R(RSCRATCH), Imm32(~31));
PXOR(XMM0, R(XMM0)); PXOR(XMM0, R(XMM0));
#if _M_X86_64 MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 0), XMM0);
MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0); MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 16), XMM0);
MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0);
#else
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
MOVAPS(MDisp(EAX, (u32)Memory::base), XMM0);
MOVAPS(MDisp(EAX, (u32)Memory::base + 16), XMM0);
#endif
#endif #endif
} }

View File

@ -4,6 +4,8 @@
#pragma once #pragma once
#include <tuple>
#include "Common/BreakPoints.h" #include "Common/BreakPoints.h"
#include "Common/Common.h" #include "Common/Common.h"
@ -30,11 +32,6 @@ struct GC_ALIGNED64(PowerPCState)
{ {
u32 gpr[32]; // General purpose registers. r1 = stack pointer. u32 gpr[32]; // General purpose registers. r1 = stack pointer.
// The paired singles are strange : PS0 is stored in the full 64 bits of each FPR
// but ps calculations are only done in 32-bit precision, and PS1 is only 32 bits.
// Since we want to use SIMD, SSE2 is the only viable alternative - 2x double.
u64 ps[32][2];
u32 pc; // program counter u32 pc; // program counter
u32 npc; u32 npc;
@ -64,6 +61,20 @@ struct GC_ALIGNED64(PowerPCState)
// This variable should be inside of the CoreTiming namespace if we wanted to be correct. // This variable should be inside of the CoreTiming namespace if we wanted to be correct.
int downcount; int downcount;
#if _M_X86_64
// This member exists for the purpose of an assertion in x86 JitBase.cpp
// that its offset <= 0x100. To minimize code size on x86, we want as much
// useful stuff in the one-byte offset range as possible - which is why ps
// is sitting down here. It currently doesn't make a difference on other
// supported architectures.
std::tuple<> above_fits_in_first_0x100;
#endif
// The paired singles are strange : PS0 is stored in the full 64 bits of each FPR
// but ps calculations are only done in 32-bit precision, and PS1 is only 32 bits.
// Since we want to use SIMD, SSE2 is the only viable alternative - 2x double.
GC_ALIGNED16(u64 ps[32][2]);
u32 sr[16]; // Segment registers. u32 sr[16]; // Segment registers.
// special purpose registers - controls quantizers, DMA, and lots of other misc extensions. // special purpose registers - controls quantizers, DMA, and lots of other misc extensions.
@ -84,6 +95,10 @@ struct GC_ALIGNED64(PowerPCState)
InstructionCache iCache; InstructionCache iCache;
}; };
#if _M_X86_64
static_assert(offsetof(PowerPC::PowerPCState, above_fits_in_first_0x100) <= 0x100, "top of PowerPCState too big");
#endif
enum CPUState enum CPUState
{ {
CPU_RUNNING = 0, CPU_RUNNING = 0,