diff --git a/Source/Core/Common/x64ABI.cpp b/Source/Core/Common/x64ABI.cpp index c86c8f8b24..45465619bd 100644 --- a/Source/Core/Common/x64ABI.cpp +++ b/Source/Core/Common/x64ABI.cpp @@ -353,20 +353,7 @@ void XEmitter::ABI_CallFunctionR(void *func, X64Reg reg1) void XEmitter::ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2, bool noProlog) { ABI_AlignStack(0, noProlog); - if (reg2 != ABI_PARAM1) - { - if (reg1 != ABI_PARAM1) - MOV(64, R(ABI_PARAM1), R(reg1)); - if (reg2 != ABI_PARAM2) - MOV(64, R(ABI_PARAM2), R(reg2)); - } - else - { - if (reg2 != ABI_PARAM2) - MOV(64, R(ABI_PARAM2), R(reg2)); - if (reg1 != ABI_PARAM1) - MOV(64, R(ABI_PARAM1), R(reg1)); - } + MOVTwo(64, ABI_PARAM1, reg1, ABI_PARAM2, reg2, ABI_PARAM3); u64 distance = u64(func) - (u64(code) + 5); if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) @@ -382,6 +369,30 @@ void XEmitter::ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2, bool noP ABI_RestoreStack(0, noProlog); } +void XEmitter::MOVTwo(int bits, Gen::X64Reg dst1, Gen::X64Reg src1, Gen::X64Reg dst2, Gen::X64Reg src2, X64Reg temp) +{ + if (dst1 == src2 && dst2 == src1) + { + // need a temporary + MOV(bits, R(temp), R(src1)); + src1 = temp; + } + if (src2 != dst1) + { + if (dst1 != src1) + MOV(bits, R(dst1), R(src1)); + if (dst2 != src2) + MOV(bits, R(dst2), R(src2)); + } + else + { + if (dst2 != src2) + MOV(bits, R(dst2), R(src2)); + if (dst1 != src1) + MOV(bits, R(dst1), R(src1)); + } +} + void XEmitter::ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2) { ABI_AlignStack(0); diff --git a/Source/Core/Common/x64ABI.h b/Source/Core/Common/x64ABI.h index 66abeee5ef..abc9236ef7 100644 --- a/Source/Core/Common/x64ABI.h +++ b/Source/Core/Common/x64ABI.h @@ -53,3 +53,5 @@ #endif // WIN32 +#define ABI_RETURN RAX + diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h index 558af41767..d6f0699e84 100644 --- a/Source/Core/Common/x64Emitter.h +++ b/Source/Core/Common/x64Emitter.h @@ -753,6 +753,9 @@ public: void ABI_CallFunctionR(void *func, X64Reg reg1); void ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2, bool noProlog = false); + // Helper method for the above, or can be used separately. + void MOVTwo(int bits, Gen::X64Reg dst1, Gen::X64Reg src1, Gen::X64Reg dst2, Gen::X64Reg src2, Gen::X64Reg temp); + // A function that doesn't have any control over what it will do to regs, // such as the dispatcher, should be surrounded by these. void ABI_PushAllCalleeSavedRegsAndAdjustStack(); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 102900ebe9..a3707dbbe1 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -39,14 +39,6 @@ using namespace PowerPC; // Various notes below -// Register allocation -// RAX - Generic quicktemp register -// RBX - point to base of memory map -// RSI RDI R12 R13 R14 R15 - free for allocation -// RCX RDX R8 R9 R10 R11 - allocate in emergencies. These need to be flushed before functions are called. -// RSP - stack pointer, do not generally use, very dangerous -// RBP - ? - // IMPORTANT: // Make sure that all generated code and all emulator state sits under the 2GB boundary so that // RIP addressing can be used easily. Windows will always allocate static code under the 2GB boundary. @@ -210,8 +202,8 @@ void Jit64::WriteCallInterpreter(UGeckoInstruction inst) fpr.Flush(); if (js.isLastInstruction) { - MOV(32, M(&PC), Imm32(js.compilerPC)); - MOV(32, M(&NPC), Imm32(js.compilerPC + 4)); + MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); + MOV(32, PPCSTATE(npc), Imm32(js.compilerPC + 4)); } Interpreter::_interpreterInstruction instr = GetInterpreterOp(inst); ABI_CallFunctionC((void*)instr, inst.hex); @@ -279,7 +271,7 @@ void Jit64::WriteExit(u32 destination) { Cleanup(); - SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); //If nobody has taken care of this yet (this can be removed when all branches are done) JitBlock *b = js.curBlock; @@ -298,48 +290,48 @@ void Jit64::WriteExit(u32 destination) } else { - MOV(32, M(&PC), Imm32(destination)); + MOV(32, PPCSTATE(pc), Imm32(destination)); JMP(asm_routines.dispatcher, true); } b->linkData.push_back(linkData); } -void Jit64::WriteExitDestInEAX() +void Jit64::WriteExitDestInRSCRATCH() { - MOV(32, M(&PC), R(EAX)); + MOV(32, PPCSTATE(pc), R(RSCRATCH)); Cleanup(); - SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } -void Jit64::WriteRfiExitDestInEAX() +void Jit64::WriteRfiExitDestInRSCRATCH() { - MOV(32, M(&PC), R(EAX)); - MOV(32, M(&NPC), R(EAX)); + MOV(32, PPCSTATE(pc), R(RSCRATCH)); + MOV(32, PPCSTATE(npc), R(RSCRATCH)); Cleanup(); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExceptions)); - SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } void Jit64::WriteExceptionExit() { Cleanup(); - MOV(32, R(EAX), M(&PC)); - MOV(32, M(&NPC), R(EAX)); + MOV(32, R(RSCRATCH), PPCSTATE(pc)); + MOV(32, PPCSTATE(npc), R(RSCRATCH)); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExceptions)); - SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } void Jit64::WriteExternalExceptionExit() { Cleanup(); - MOV(32, R(EAX), M(&PC)); - MOV(32, M(&NPC), R(EAX)); + MOV(32, R(RSCRATCH), PPCSTATE(pc)); + MOV(32, PPCSTATE(npc), R(RSCRATCH)); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExternalExceptions)); - SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } @@ -426,7 +418,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc // Downcount flag check. The last block decremented downcounter, and the flag should still be available. FixupBranch skip = J_CC(CC_NBE); - MOV(32, M(&PC), Imm32(js.blockStart)); + MOV(32, PPCSTATE(pc), Imm32(js.blockStart)); JMP(asm_routines.doTiming, true); // downcount hit zero - go doTiming. SetJumpTarget(skip); @@ -452,7 +444,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc } #if defined(_DEBUG) || defined(DEBUGFAST) || defined(NAN_CHECK) // should help logged stack-traces become more accurate - MOV(32, M(&PC), Imm32(js.blockStart)); + MOV(32, PPCSTATE(pc), Imm32(js.blockStart)); #endif // Start up the register allocators @@ -501,7 +493,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32) { js.fifoBytesThisBlock -= 32; - MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write + MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write u32 registersInUse = CallerSavedRegistersInUse(); ABI_PushRegistersAndAdjustStack(registersInUse, false); ABI_CallFunction((void *)&GPFifo::CheckGatherPipe); @@ -520,9 +512,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc HLEFunction(function); if (type == HLE::HLE_HOOK_REPLACE) { - MOV(32, R(EAX), M(&NPC)); + MOV(32, R(RSCRATCH), PPCSTATE(npc)); js.downcountAmount += js.st.numCycles; - WriteExitDestInEAX(); + WriteExitDestInRSCRATCH(); break; } } @@ -537,13 +529,13 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc fpr.Flush(); //This instruction uses FPU - needs to add FP exception bailout - TEST(32, M(&PowerPC::ppcState.msr), Imm32(1 << 13)); // Test FP enabled bit + TEST(32, PPCSTATE(msr), Imm32(1 << 13)); // Test FP enabled bit FixupBranch b1 = J_CC(CC_NZ, true); // If a FPU exception occurs, the exception handler will read // from PC. Update PC with the latest value in case that happens. - MOV(32, M(&PC), Imm32(ops[i].address)); - OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); + MOV(32, PPCSTATE(pc), Imm32(ops[i].address)); + OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); WriteExceptionExit(); SetJumpTarget(b1); @@ -557,16 +549,16 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc gpr.Flush(); fpr.Flush(); - TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_ISI | EXCEPTION_PROGRAM | EXCEPTION_SYSCALL | EXCEPTION_FPU_UNAVAILABLE | EXCEPTION_DSI | EXCEPTION_ALIGNMENT)); + TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_ISI | EXCEPTION_PROGRAM | EXCEPTION_SYSCALL | EXCEPTION_FPU_UNAVAILABLE | EXCEPTION_DSI | EXCEPTION_ALIGNMENT)); FixupBranch clearInt = J_CC(CC_NZ, true); - TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_EXTERNAL_INT)); + TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_EXTERNAL_INT)); FixupBranch noExtException = J_CC(CC_Z, true); - TEST(32, M((void *)&PowerPC::ppcState.msr), Imm32(0x0008000)); + TEST(32, PPCSTATE(msr), Imm32(0x0008000)); FixupBranch noExtIntEnable = J_CC(CC_Z, true); TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH)); FixupBranch noCPInt = J_CC(CC_Z, true); - MOV(32, M(&PC), Imm32(ops[i].address)); + MOV(32, PPCSTATE(pc), Imm32(ops[i].address)); WriteExternalExceptionExit(); SetJumpTarget(noCPInt); @@ -580,7 +572,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc gpr.Flush(); fpr.Flush(); - MOV(32, M(&PC), Imm32(ops[i].address)); + MOV(32, PPCSTATE(pc), Imm32(ops[i].address)); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckBreakPoints)); TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); FixupBranch noBreakpoint = J_CC(CC_Z); @@ -597,12 +589,12 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc gpr.Flush(); fpr.Flush(); - TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_DSI)); + TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI)); FixupBranch noMemException = J_CC(CC_Z, true); // If a memory exception occurs, the exception handler will read // from PC. Update PC with the latest value in case that happens. - MOV(32, M(&PC), Imm32(ops[i].address)); + MOV(32, PPCSTATE(pc), Imm32(ops[i].address)); WriteExceptionExit(); SetJumpTarget(noMemException); } @@ -645,13 +637,13 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc if (code_block.m_memory_exception) { // Address of instruction could not be translated - MOV(32, M(&NPC), Imm32(js.compilerPC)); + MOV(32, PPCSTATE(npc), Imm32(js.compilerPC)); - OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_ISI)); + OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_ISI)); // Remove the invalid instruction from the icache, forcing a recompile - MOV(64, R(RAX), ImmPtr(jit->GetBlockCache()->GetICachePtr(js.compilerPC))); - MOV(32,MatR(RAX),Imm32(JIT_ICACHE_INVALID_WORD)); + MOV(64, R(RSCRATCH), ImmPtr(jit->GetBlockCache()->GetICachePtr(js.compilerPC))); + MOV(32,MatR(RSCRATCH),Imm32(JIT_ICACHE_INVALID_WORD)); WriteExceptionExit(); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index c0b5c73260..de95967df0 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -90,10 +90,10 @@ public: // Utilities for use by opcodes void WriteExit(u32 destination); - void WriteExitDestInEAX(); + void WriteExitDestInRSCRATCH(); void WriteExceptionExit(); void WriteExternalExceptionExit(); - void WriteRfiExitDestInEAX(); + void WriteRfiExitDestInRSCRATCH(); void WriteCallInterpreter(UGeckoInstruction _inst); void Cleanup(); @@ -101,16 +101,15 @@ public: void GenerateConstantOverflow(s64 val); void GenerateOverflow(); void FinalizeCarryOverflow(bool oe, bool inv = false); - void GetCarryEAXAndClear(); - void FinalizeCarryGenerateOverflowEAX(bool oe, bool inv = false); + void GetCarryRSCRATCHAndClear(); + void FinalizeCarryGenerateOverflowRSCRATCH(bool oe, bool inv = false); void GenerateCarry(); void GenerateRC(); void ComputeRC(const Gen::OpArg & arg); - // Reads a given bit of a given CR register part. Clobbers ABI_PARAM1, - // don't forget to xlock it before. + // Reads a given bit of a given CR register part. void GetCRFieldBit(int field, int bit, Gen::X64Reg out, bool negate = false); - // Clobbers ABI_PARAM1, xlock it before. + // Clobbers RDX. void SetCRFieldBit(int field, int bit, Gen::X64Reg in); // Generates a branch that will check if a given bit of a CR register part diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index 2682ea80f2..1c5b78666f 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -9,14 +9,6 @@ using namespace Gen; -//GLOBAL STATIC ALLOCATIONS x86 -//EAX - ubiquitous scratch register - EVERYBODY scratches this - -//GLOBAL STATIC ALLOCATIONS x64 -//EAX - ubiquitous scratch register - EVERYBODY scratches this -//RBX - Base pointer of memory -//R15 - Pointer to array of block pointers - // PLAN: no more block numbers - crazy opcodes just contain offset within // dynarec buffer // At this offset - 4, there is an int specifying the block number. @@ -27,8 +19,9 @@ void Jit64AsmRoutineManager::Generate() ABI_PushAllCalleeSavedRegsAndAdjustStack(); // Two statically allocated registers. - MOV(64, R(RBX), Imm64((u64)Memory::base)); - MOV(64, R(R15), Imm64((u64)jit->GetBlockCache()->GetCodePointers())); //It's below 2GB so 32 bits are good enough + MOV(64, R(RMEM), Imm64((u64)Memory::base)); + MOV(64, R(RCODE_POINTERS), Imm64((u64)jit->GetBlockCache()->GetCodePointers())); //It's below 2GB so 32 bits are good enough + MOV(64, R(RPPCSTATE), Imm64((u64)&PowerPC::ppcState + 0x80)); const u8* outerLoop = GetCodePtr(); ABI_CallFunction(reinterpret_cast(&CoreTiming::Advance)); @@ -55,8 +48,8 @@ void Jit64AsmRoutineManager::Generate() SetJumpTarget(skipToRealDispatch); dispatcherNoCheck = GetCodePtr(); - MOV(32, R(EAX), M(&PowerPC::ppcState.pc)); - dispatcherPcInEAX = GetCodePtr(); + MOV(32, R(RSCRATCH), PPCSTATE(pc)); + dispatcherPcInRSCRATCH = GetCodePtr(); u32 mask = 0; FixupBranch no_mem; @@ -68,12 +61,12 @@ void Jit64AsmRoutineManager::Generate() mask |= JIT_ICACHE_VMEM_BIT; if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack) { - TEST(32, R(EAX), Imm32(mask)); + TEST(32, R(RSCRATCH), Imm32(mask)); no_mem = J_CC(CC_NZ); } - AND(32, R(EAX), Imm32(JIT_ICACHE_MASK)); - MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->iCache)); - MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0)); + AND(32, R(RSCRATCH), Imm32(JIT_ICACHE_MASK)); + MOV(64, R(RSCRATCH2), Imm64((u64)jit->GetBlockCache()->iCache)); + MOV(32, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_1, 0)); if (Core::g_CoreStartupParameter.bWii || Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack) { @@ -82,22 +75,22 @@ void Jit64AsmRoutineManager::Generate() } if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack) { - TEST(32, R(EAX), Imm32(JIT_ICACHE_VMEM_BIT)); + TEST(32, R(RSCRATCH), Imm32(JIT_ICACHE_VMEM_BIT)); FixupBranch no_vmem = J_CC(CC_Z); - AND(32, R(EAX), Imm32(JIT_ICACHE_MASK)); - MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->iCacheVMEM)); - MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0)); + AND(32, R(RSCRATCH), Imm32(JIT_ICACHE_MASK)); + MOV(64, R(RSCRATCH2), Imm64((u64)jit->GetBlockCache()->iCacheVMEM)); + MOV(32, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_1, 0)); if (Core::g_CoreStartupParameter.bWii) exit_vmem = J(); SetJumpTarget(no_vmem); } if (Core::g_CoreStartupParameter.bWii) { - TEST(32, R(EAX), Imm32(JIT_ICACHE_EXRAM_BIT)); + TEST(32, R(RSCRATCH), Imm32(JIT_ICACHE_EXRAM_BIT)); FixupBranch no_exram = J_CC(CC_Z); - AND(32, R(EAX), Imm32(JIT_ICACHEEX_MASK)); - MOV(64, R(RSI), Imm64((u64)jit->GetBlockCache()->iCacheEx)); - MOV(32, R(EAX), MComplex(RSI, EAX, SCALE_1, 0)); + AND(32, R(RSCRATCH), Imm32(JIT_ICACHEEX_MASK)); + MOV(64, R(RSCRATCH2), Imm64((u64)jit->GetBlockCache()->iCacheEx)); + MOV(32, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_1, 0)); SetJumpTarget(no_exram); } @@ -106,14 +99,14 @@ void Jit64AsmRoutineManager::Generate() if (Core::g_CoreStartupParameter.bWii && (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack)) SetJumpTarget(exit_vmem); - TEST(32, R(EAX), R(EAX)); + TEST(32, R(RSCRATCH), R(RSCRATCH)); FixupBranch notfound = J_CC(CC_L); //grab from list and jump to it - JMPptr(MComplex(R15, RAX, 8, 0)); + JMPptr(MComplex(RCODE_POINTERS, RSCRATCH, 8, 0)); SetJumpTarget(notfound); //Ok, no block, let's jit - MOV(32, R(ABI_PARAM1), M(&PowerPC::ppcState.pc)); + MOV(32, R(ABI_PARAM1), PPCSTATE(pc)); CALL((void *)&Jit); JMP(dispatcherNoCheck); // no point in special casing this @@ -122,10 +115,10 @@ void Jit64AsmRoutineManager::Generate() doTiming = GetCodePtr(); // Test external exceptions. - TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_EXTERNAL_INT | EXCEPTION_PERFORMANCE_MONITOR | EXCEPTION_DECREMENTER)); + TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_EXTERNAL_INT | EXCEPTION_PERFORMANCE_MONITOR | EXCEPTION_DECREMENTER)); FixupBranch noExtException = J_CC(CC_Z); - MOV(32, R(EAX), M(&PC)); - MOV(32, M(&NPC), R(EAX)); + MOV(32, R(RSCRATCH), PPCSTATE(pc)); + MOV(32, PPCSTATE(npc), R(RSCRATCH)); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExternalExceptions)); SetJumpTarget(noExtException); @@ -168,8 +161,8 @@ void Jit64AsmRoutineManager::GenerateCommon() const u8 *fastMemWrite8 = AlignCode16(); CMP(32, R(ABI_PARAM2), Imm32(0xCC008000)); FixupBranch skip_fast_write = J_CC(CC_NE, false); - MOV(32, EAX, M(&m_gatherPipeCount)); - MOV(8, MDisp(EAX, (u32)&m_gatherPipe), ABI_PARAM1); + MOV(32, RSCRATCH, M(&m_gatherPipeCount)); + MOV(8, MDisp(RSCRATCH, (u32)&m_gatherPipe), ABI_PARAM1); ADD(32, 1, M(&m_gatherPipeCount)); RET(); SetJumpTarget(skip_fast_write); diff --git a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp index e2e0ed6a6c..11eb9de2c7 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp @@ -200,9 +200,9 @@ const int* GPRRegCache::GetAllocationOrder(size_t& count) { // R12, when used as base register, for example in a LEA, can generate bad code! Need to look into this. #ifdef _WIN32 - RSI, RDI, R13, R14, R8, R9, R10, R11, R12, //, RCX + RSI, RDI, R13, R14, R8, R9, R10, R11, R12, RCX #else - RBP, R13, R14, R8, R9, R10, R11, R12, //, RCX + R12, R13, R14, RSI, RDI, R8, R9, R10, R11, RCX #endif }; count = sizeof(allocationOrder) / sizeof(const int); @@ -221,12 +221,12 @@ const int* FPURegCache::GetAllocationOrder(size_t& count) OpArg GPRRegCache::GetDefaultLocation(size_t reg) const { - return M(&ppcState.gpr[reg]); + return PPCSTATE(gpr[reg]); } OpArg FPURegCache::GetDefaultLocation(size_t reg) const { - return M(&ppcState.ps[reg][0]); + return PPCSTATE(ps[reg][0]); } void RegCache::KillImmediate(size_t preg, bool doLoad, bool makeDirty) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp index 8bef37cb51..ddeddb1fb4 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp @@ -28,9 +28,9 @@ void Jit64::sc(UGeckoInstruction inst) gpr.Flush(); fpr.Flush(); - MOV(32, M(&PC), Imm32(js.compilerPC + 4)); + MOV(32, PPCSTATE(pc), Imm32(js.compilerPC + 4)); LOCK(); - OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_SYSCALL)); + OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_SYSCALL)); WriteExceptionExit(); } @@ -45,13 +45,13 @@ void Jit64::rfi(UGeckoInstruction inst) const u32 mask = 0x87C0FFFF; const u32 clearMSR13 = 0xFFFBFFFF; // Mask used to clear the bit MSR[13] // MSR = ((MSR & ~mask) | (SRR1 & mask)) & clearMSR13; - AND(32, M(&MSR), Imm32((~mask) & clearMSR13)); - MOV(32, R(EAX), M(&SRR1)); - AND(32, R(EAX), Imm32(mask & clearMSR13)); - OR(32, M(&MSR), R(EAX)); + AND(32, PPCSTATE(msr), Imm32((~mask) & clearMSR13)); + MOV(32, R(RSCRATCH), PPCSTATE_SRR1); + AND(32, R(RSCRATCH), Imm32(mask & clearMSR13)); + OR(32, PPCSTATE(msr), R(RSCRATCH)); // NPC = SRR0; - MOV(32, R(EAX), M(&SRR0)); - WriteRfiExitDestInEAX(); + MOV(32, R(RSCRATCH), PPCSTATE_SRR0); + WriteRfiExitDestInRSCRATCH(); } void Jit64::bx(UGeckoInstruction inst) @@ -62,7 +62,7 @@ void Jit64::bx(UGeckoInstruction inst) // We must always process the following sentence // even if the blocks are merged by PPCAnalyst::Flatten(). if (inst.LK) - MOV(32, M(&LR), Imm32(js.compilerPC + 4)); + MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // If this is not the last instruction of a block, // we will skip the rest process. @@ -82,7 +82,7 @@ void Jit64::bx(UGeckoInstruction inst) destination = js.compilerPC + SignExt26(inst.LI << 2); #ifdef ACID_TEST if (inst.LK) - AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xFF000000))); + AND(32, PPCSTATE(cr), Imm32(~(0xFF000000))); #endif if (destination == js.compilerPC) { @@ -108,7 +108,7 @@ void Jit64::bcx(UGeckoInstruction inst) FixupBranch pCTRDontBranch; if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR { - SUB(32, M(&CTR), Imm8(1)); + SUB(32, PPCSTATE_CTR, Imm8(1)); if (inst.BO & BO_BRANCH_IF_CTR_0) pCTRDontBranch = J_CC(CC_NZ, true); else @@ -123,7 +123,7 @@ void Jit64::bcx(UGeckoInstruction inst) } if (inst.LK) - MOV(32, M(&LR), Imm32(js.compilerPC + 4)); + MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); u32 destination; if (inst.AA) @@ -164,11 +164,11 @@ void Jit64::bcctrx(UGeckoInstruction inst) gpr.Flush(); fpr.Flush(); - MOV(32, R(EAX), M(&CTR)); + MOV(32, R(RSCRATCH), PPCSTATE_CTR); if (inst.LK_3) - MOV(32, M(&LR), Imm32(js.compilerPC + 4)); // LR = PC + 4; - AND(32, R(EAX), Imm32(0xFFFFFFFC)); - WriteExitDestInEAX(); + MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // LR = PC + 4; + AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); + WriteExitDestInRSCRATCH(); } else { @@ -179,15 +179,15 @@ void Jit64::bcctrx(UGeckoInstruction inst) FixupBranch b = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), !(inst.BO_2 & BO_BRANCH_IF_TRUE)); - MOV(32, R(EAX), M(&CTR)); - AND(32, R(EAX), Imm32(0xFFFFFFFC)); - //MOV(32, M(&PC), R(EAX)); => Already done in WriteExitDestInEAX() + MOV(32, R(RSCRATCH), PPCSTATE_CTR); + AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); + //MOV(32, PPCSTATE(pc), R(RSCRATCH)); => Already done in WriteExitDestInRSCRATCH() if (inst.LK_3) - MOV(32, M(&LR), Imm32(js.compilerPC + 4)); // LR = PC + 4; + MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // LR = PC + 4; gpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE); - WriteExitDestInEAX(); + WriteExitDestInRSCRATCH(); // Would really like to continue the block here, but it ends. TODO. SetJumpTarget(b); @@ -204,7 +204,7 @@ void Jit64::bclrx(UGeckoInstruction inst) FixupBranch pCTRDontBranch; if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR { - SUB(32, M(&CTR), Imm8(1)); + SUB(32, PPCSTATE_CTR, Imm8(1)); if (inst.BO & BO_BRANCH_IF_CTR_0) pCTRDontBranch = J_CC(CC_NZ, true); else @@ -221,17 +221,17 @@ void Jit64::bclrx(UGeckoInstruction inst) // This below line can be used to prove that blr "eats flags" in practice. // This observation will let us do a lot of fun observations. #ifdef ACID_TEST - AND(32, M(&PowerPC::ppcState.cr), Imm32(~(0xFF000000))); + AND(32, PPCSTATE(cr), Imm32(~(0xFF000000))); #endif - MOV(32, R(EAX), M(&LR)); - AND(32, R(EAX), Imm32(0xFFFFFFFC)); + MOV(32, R(RSCRATCH), PPCSTATE_LR); + AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); if (inst.LK) - MOV(32, M(&LR), Imm32(js.compilerPC + 4)); + MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); gpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE); - WriteExitDestInEAX(); + WriteExitDestInRSCRATCH(); if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) SetJumpTarget( pConditionDontBranch ); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index 537f02db0d..54c5f22275 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -248,7 +248,7 @@ void Jit64::fcmpx(UGeckoInstruction inst) fpr.BindToRegister(b, true); if (fprf) - AND(32, M(&FPSCR), Imm32(~FPRF_MASK)); + AND(32, PPCSTATE(fpscr), Imm32(~FPRF_MASK)); // Are we masking sNaN invalid floating point exceptions? If not this could crash if we don't handle the exception? UCOMISD(fpr.R(b).GetSimpleReg(), fpr.R(a)); @@ -271,31 +271,31 @@ void Jit64::fcmpx(UGeckoInstruction inst) pGreater = J_CC(CC_B); } - MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_EQ))); + MOV(64, R(RSCRATCH), Imm64(PPCCRToInternal(CR_EQ))); if (fprf) - OR(32, M(&FPSCR), Imm32(CR_EQ << FPRF_SHIFT)); + OR(32, PPCSTATE(fpscr), Imm32(CR_EQ << FPRF_SHIFT)); continue1 = J(); SetJumpTarget(pNaN); - MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_SO))); + MOV(64, R(RSCRATCH), Imm64(PPCCRToInternal(CR_SO))); if (fprf) - OR(32, M(&FPSCR), Imm32(CR_SO << FPRF_SHIFT)); + OR(32, PPCSTATE(fpscr), Imm32(CR_SO << FPRF_SHIFT)); if (a != b) { continue2 = J(); SetJumpTarget(pGreater); - MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_GT))); + MOV(64, R(RSCRATCH), Imm64(PPCCRToInternal(CR_GT))); if (fprf) - OR(32, M(&FPSCR), Imm32(CR_GT << FPRF_SHIFT)); + OR(32, PPCSTATE(fpscr), Imm32(CR_GT << FPRF_SHIFT)); continue3 = J(); SetJumpTarget(pLesser); - MOV(64, R(RAX), Imm64(PPCCRToInternal(CR_LT))); + MOV(64, R(RSCRATCH), Imm64(PPCCRToInternal(CR_LT))); if (fprf) - OR(32, M(&FPSCR), Imm32(CR_LT << FPRF_SHIFT)); + OR(32, PPCSTATE(fpscr), Imm32(CR_LT << FPRF_SHIFT)); } SetJumpTarget(continue1); @@ -305,7 +305,7 @@ void Jit64::fcmpx(UGeckoInstruction inst) SetJumpTarget(continue3); } - MOV(64, M(&PowerPC::ppcState.cr_val[crf]), R(RAX)); + MOV(64, PPCSTATE(cr_val[crf]), R(RSCRATCH)); fpr.UnlockAll(); } @@ -375,8 +375,7 @@ void Jit64::frsqrtex(UGeckoInstruction inst) int b = inst.FB; int d = inst.FD; - // rsqrtex requires ECX and EDX free - gpr.FlushLockX(ECX, EDX); + gpr.FlushLockX(RSCRATCH_EXTRA); fpr.Lock(b, d); fpr.BindToRegister(d, d == b); MOVSD(XMM0, fpr.R(b)); @@ -395,8 +394,7 @@ void Jit64::fresx(UGeckoInstruction inst) int b = inst.FB; int d = inst.FD; - // resx requires ECX and EDX free - gpr.FlushLockX(ECX, EDX); + gpr.FlushLockX(RSCRATCH_EXTRA); fpr.Lock(b, d); fpr.BindToRegister(d, d == b); MOVSD(XMM0, fpr.R(b)); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index a6b60f8f23..6a37e9bd29 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -21,12 +21,12 @@ void Jit64::GenerateConstantOverflow(bool overflow) if (overflow) { //XER[OV/SO] = 1 - OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); + OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); } else { //XER[OV] = 0 - AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_OV_MASK)); + AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_OV_MASK)); } } @@ -34,11 +34,11 @@ void Jit64::GenerateOverflow() { FixupBranch jno = J_CC(CC_NO); //XER[OV/SO] = 1 - OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); + OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); FixupBranch exit = J(); SetJumpTarget(jno); //XER[OV] = 0 - AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_OV_MASK)); + AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_OV_MASK)); SetJumpTarget(exit); } @@ -54,7 +54,7 @@ void Jit64::FinalizeCarryOverflow(bool oe, bool inv) JitSetCA(); SetJumpTarget(carry1); //XER[OV/SO] = 1 - OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); + OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); FixupBranch exit = J(); SetJumpTarget(jno); // Do carry @@ -72,14 +72,14 @@ void Jit64::FinalizeCarryOverflow(bool oe, bool inv) } } -void Jit64::GetCarryEAXAndClear() +void Jit64::GetCarryRSCRATCHAndClear() { - MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); - BTR(32, R(EAX), Imm8(29)); + MOV(32, R(RSCRATCH), PPCSTATE(spr[SPR_XER])); + BTR(32, R(RSCRATCH), Imm8(29)); } -// Assumes that XER is in EAX and that the CA bit is clear. -void Jit64::FinalizeCarryGenerateOverflowEAX(bool oe, bool inv) +// Assumes that XER is in RSCRATCH and that the CA bit is clear. +void Jit64::FinalizeCarryGenerateOverflowRSCRATCH(bool oe, bool inv) { // USES_XER if (oe) @@ -87,29 +87,29 @@ void Jit64::FinalizeCarryGenerateOverflowEAX(bool oe, bool inv) FixupBranch jno = J_CC(CC_NO); // Do carry FixupBranch carry1 = J_CC(inv ? CC_C : CC_NC); - OR(32, R(EAX), Imm32(XER_CA_MASK)); + OR(32, R(RSCRATCH), Imm32(XER_CA_MASK)); SetJumpTarget(carry1); //XER[OV/SO] = 1 - OR(32, R(EAX), Imm32(XER_SO_MASK | XER_OV_MASK)); + OR(32, R(RSCRATCH), Imm32(XER_SO_MASK | XER_OV_MASK)); FixupBranch exit = J(); SetJumpTarget(jno); // Do carry FixupBranch carry2 = J_CC(inv ? CC_C : CC_NC); - OR(32, R(EAX), Imm32(XER_CA_MASK)); + OR(32, R(RSCRATCH), Imm32(XER_CA_MASK)); SetJumpTarget(carry2); //XER[OV] = 0 - AND(32, R(EAX), Imm32(~XER_OV_MASK)); + AND(32, R(RSCRATCH), Imm32(~XER_OV_MASK)); SetJumpTarget(exit); } else { // Do carry FixupBranch carry1 = J_CC(inv ? CC_C : CC_NC); - OR(32, R(EAX), Imm32(XER_CA_MASK)); + OR(32, R(RSCRATCH), Imm32(XER_CA_MASK)); SetJumpTarget(carry1); } - // Dump EAX back into XER - MOV(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(EAX)); + // Dump RSCRATCH back into XER + MOV(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); } // Assumes that the flags were just set through an addition. @@ -117,10 +117,10 @@ void Jit64::GenerateCarry() { // USES_XER FixupBranch pNoCarry = J_CC(CC_NC); - OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_CA_MASK)); + OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_CA_MASK)); FixupBranch pContinue = J(); SetJumpTarget(pNoCarry); - AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~(XER_CA_MASK))); + AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~(XER_CA_MASK))); SetJumpTarget(pContinue); } @@ -128,12 +128,12 @@ void Jit64::ComputeRC(const Gen::OpArg & arg) { if (arg.IsImm()) { - MOV(64, M(&PowerPC::ppcState.cr_val[0]), Imm32((s32)arg.offset)); + MOV(64, PPCSTATE(cr_val[0]), Imm32((s32)arg.offset)); } else { - MOVSX(64, 32, RAX, arg); - MOV(64, M(&PowerPC::ppcState.cr_val[0]), R(RAX)); + MOVSX(64, 32, RSCRATCH, arg); + MOV(64, PPCSTATE(cr_val[0]), R(RSCRATCH)); } } @@ -374,8 +374,8 @@ void Jit64::cmpXX(UGeckoInstruction inst) else compareResult = CR_LT; } - MOV(64, R(RAX), Imm64(PPCCRToInternal(compareResult))); - MOV(64, M(&PowerPC::ppcState.cr_val[crf]), R(RAX)); + MOV(64, R(RSCRATCH), Imm64(PPCCRToInternal(compareResult))); + MOV(64, PPCSTATE(cr_val[crf]), R(RSCRATCH)); gpr.UnlockAll(); if (merge_branch) @@ -393,7 +393,7 @@ void Jit64::cmpXX(UGeckoInstruction inst) if (js.next_inst.OPCD == 16) // bcx { if (js.next_inst.LK) - MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); + MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4)); u32 destination; if (js.next_inst.AA) @@ -405,17 +405,17 @@ void Jit64::cmpXX(UGeckoInstruction inst) else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx { if (js.next_inst.LK) - MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); - MOV(32, R(EAX), M(&CTR)); - AND(32, R(EAX), Imm32(0xFFFFFFFC)); - WriteExitDestInEAX(); + MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4)); + MOV(32, R(RSCRATCH), PPCSTATE_CTR); + AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); + WriteExitDestInRSCRATCH(); } else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx { - MOV(32, R(EAX), M(&LR)); + MOV(32, R(RSCRATCH), PPCSTATE_LR); if (js.next_inst.LK) - MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); - WriteExitDestInEAX(); + MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4)); + WriteExitDestInRSCRATCH(); } else { @@ -436,32 +436,32 @@ void Jit64::cmpXX(UGeckoInstruction inst) if (signedCompare) { if (gpr.R(a).IsImm()) - MOV(64, R(RAX), Imm32((s32)gpr.R(a).offset)); + MOV(64, R(RSCRATCH), Imm32((s32)gpr.R(a).offset)); else - MOVSX(64, 32, RAX, gpr.R(a)); + MOVSX(64, 32, RSCRATCH, gpr.R(a)); if (!comparand.IsImm()) { - MOVSX(64, 32, ABI_PARAM1, comparand); - comparand = R(ABI_PARAM1); + MOVSX(64, 32, RSCRATCH2, comparand); + comparand = R(RSCRATCH2); } } else { if (gpr.R(a).IsImm()) - MOV(32, R(RAX), Imm32((u32)gpr.R(a).offset)); + MOV(32, R(RSCRATCH), Imm32((u32)gpr.R(a).offset)); else - MOVZX(64, 32, RAX, gpr.R(a)); + MOVZX(64, 32, RSCRATCH, gpr.R(a)); if (comparand.IsImm()) - MOV(32, R(ABI_PARAM1), comparand); + MOV(32, R(RSCRATCH2), comparand); else - MOVZX(64, 32, ABI_PARAM1, comparand); + MOVZX(64, 32, RSCRATCH2, comparand); - comparand = R(ABI_PARAM1); + comparand = R(RSCRATCH2); } - SUB(64, R(RAX), comparand); - MOV(64, M(&PowerPC::ppcState.cr_val[crf]), R(RAX)); + SUB(64, R(RSCRATCH), comparand); + MOV(64, PPCSTATE(cr_val[crf]), R(RSCRATCH)); if (merge_branch) { @@ -492,7 +492,7 @@ void Jit64::cmpXX(UGeckoInstruction inst) if (js.next_inst.OPCD == 16) // bcx { if (js.next_inst.LK) - MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); + MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4)); u32 destination; if (js.next_inst.AA) @@ -504,21 +504,21 @@ void Jit64::cmpXX(UGeckoInstruction inst) else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx { if (js.next_inst.LK) - MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); + MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4)); - MOV(32, R(EAX), M(&CTR)); - AND(32, R(EAX), Imm32(0xFFFFFFFC)); - WriteExitDestInEAX(); + MOV(32, R(RSCRATCH), PPCSTATE_CTR); + AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); + WriteExitDestInRSCRATCH(); } else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx { - MOV(32, R(EAX), M(&LR)); - AND(32, R(EAX), Imm32(0xFFFFFFFC)); + MOV(32, R(RSCRATCH), PPCSTATE_LR); + AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); if (js.next_inst.LK) - MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); + MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4)); - WriteExitDestInEAX(); + WriteExitDestInRSCRATCH(); } else { @@ -636,9 +636,9 @@ void Jit64::boolX(UGeckoInstruction inst) } else { - MOV(32, R(EAX), operand); - NOT(32, R(EAX)); - AND(32, gpr.R(a), R(EAX)); + MOV(32, R(RSCRATCH), operand); + NOT(32, R(RSCRATCH)); + AND(32, gpr.R(a), R(RSCRATCH)); } } else if (inst.SUBOP10 == 444) // orx @@ -659,9 +659,9 @@ void Jit64::boolX(UGeckoInstruction inst) } else { - MOV(32, R(EAX), operand); - NOT(32, R(EAX)); - OR(32, gpr.R(a), R(EAX)); + MOV(32, R(RSCRATCH), operand); + NOT(32, R(RSCRATCH)); + OR(32, gpr.R(a), R(RSCRATCH)); } } else if (inst.SUBOP10 == 316) // xorx @@ -755,11 +755,7 @@ void Jit64::extsbx(UGeckoInstruction inst) { gpr.Lock(a, s); gpr.BindToRegister(a, a == s, true); - // Always force moving to EAX because it isn't possible - // to refer to the lowest byte of some registers, at least in - // 32-bit mode. - MOV(32, R(EAX), gpr.R(s)); - MOVSX(32, 8, gpr.RX(a), R(AL)); // watch out for ah and friends + MOVSX(32, 8, gpr.RX(a), gpr.R(s)); gpr.UnlockAll(); } @@ -863,9 +859,9 @@ void Jit64::subfcx(UGeckoInstruction inst) } else if (d == a) { - MOV(32, R(EAX), gpr.R(a)); + MOV(32, R(RSCRATCH), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(b)); - SUB(32, gpr.R(d), R(EAX)); + SUB(32, gpr.R(d), R(RSCRATCH)); } else { @@ -887,7 +883,7 @@ void Jit64::subfex(UGeckoInstruction inst) gpr.Lock(a, b, d); gpr.BindToRegister(d, (d == a || d == b), true); - GetCarryEAXAndClear(); + GetCarryRSCRATCHAndClear(); bool invertedCarry = false; if (d == b) @@ -908,7 +904,7 @@ void Jit64::subfex(UGeckoInstruction inst) NOT(32, gpr.R(d)); ADC(32, gpr.R(d), gpr.R(b)); } - FinalizeCarryGenerateOverflowEAX(inst.OE, invertedCarry); + FinalizeCarryGenerateOverflowRSCRATCH(inst.OE, invertedCarry); if (inst.Rc) ComputeRC(gpr.R(d)); @@ -924,14 +920,14 @@ void Jit64::subfmex(UGeckoInstruction inst) gpr.Lock(a, d); gpr.BindToRegister(d, d == a); - GetCarryEAXAndClear(); + GetCarryRSCRATCHAndClear(); if (d != a) { MOV(32, gpr.R(d), gpr.R(a)); } NOT(32, gpr.R(d)); ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); - FinalizeCarryGenerateOverflowEAX(inst.OE); + FinalizeCarryGenerateOverflowRSCRATCH(inst.OE); if (inst.Rc) ComputeRC(gpr.R(d)); gpr.UnlockAll(); @@ -947,14 +943,14 @@ void Jit64::subfzex(UGeckoInstruction inst) gpr.Lock(a, d); gpr.BindToRegister(d, d == a); - GetCarryEAXAndClear(); + GetCarryRSCRATCHAndClear(); if (d != a) { MOV(32, gpr.R(d), gpr.R(a)); } NOT(32, gpr.R(d)); ADC(32, gpr.R(d), Imm8(0)); - FinalizeCarryGenerateOverflowEAX(inst.OE); + FinalizeCarryGenerateOverflowRSCRATCH(inst.OE); if (inst.Rc) ComputeRC(gpr.R(d)); @@ -990,9 +986,9 @@ void Jit64::subfx(UGeckoInstruction inst) } else if (d == a) { - MOV(32, R(EAX), gpr.R(a)); + MOV(32, R(RSCRATCH), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(b)); - SUB(32, gpr.R(d), R(EAX)); + SUB(32, gpr.R(d), R(RSCRATCH)); } else { @@ -1170,11 +1166,10 @@ void Jit64::mulhwXx(UGeckoInstruction inst) } else { - gpr.FlushLockX(EDX); gpr.Lock(a, b, d); + // no register choice + gpr.FlushLockX(EDX, EAX); gpr.BindToRegister(d, (d == a || d == b), true); - if (gpr.RX(d) == EDX) - PanicAlert("mulhwux : WTF"); MOV(32, R(EAX), gpr.R(a)); gpr.KillImmediate(b, true, false); if (sign) @@ -1253,11 +1248,11 @@ void Jit64::divwux(UGeckoInstruction inst) // If failed, use slower round-down method gpr.Lock(a, b, d); gpr.BindToRegister(d, d == a, true); - MOV(32, R(EAX), Imm32(magic)); + MOV(32, R(RSCRATCH), Imm32(magic)); if (d != a) MOV(32, gpr.R(d), gpr.R(a)); - IMUL(64, gpr.RX(d), R(RAX)); - ADD(64, gpr.R(d), R(RAX)); + IMUL(64, gpr.RX(d), R(RSCRATCH)); + ADD(64, gpr.R(d), R(RSCRATCH)); SHR(64, gpr.R(d), Imm8(shift+32)); } else @@ -1268,8 +1263,8 @@ void Jit64::divwux(UGeckoInstruction inst) gpr.BindToRegister(d, false, true); if (d == a) { - MOV(32, R(EAX), Imm32(magic+1)); - IMUL(64, gpr.RX(d), R(RAX)); + MOV(32, R(RSCRATCH), Imm32(magic+1)); + IMUL(64, gpr.RX(d), R(RSCRATCH)); } else { @@ -1288,8 +1283,9 @@ void Jit64::divwux(UGeckoInstruction inst) } else { - gpr.FlushLockX(EDX); gpr.Lock(a, b, d); + // no register choice (do we need to do this?) + gpr.FlushLockX(EAX, EDX); gpr.BindToRegister(d, (d == a || d == b), true); MOV(32, R(EAX), gpr.R(a)); XOR(32, R(EDX), R(EDX)); @@ -1301,7 +1297,7 @@ void Jit64::divwux(UGeckoInstruction inst) { GenerateConstantOverflow(true); } - //MOV(32, R(EAX), gpr.R(d)); + //MOV(32, R(RAX), gpr.R(d)); FixupBranch end = J(); SetJumpTarget(not_div_by_zero); DIV(32, gpr.R(b)); @@ -1349,8 +1345,9 @@ void Jit64::divwx(UGeckoInstruction inst) } else { - gpr.FlushLockX(EDX); gpr.Lock(a, b, d); + // no register choice + gpr.FlushLockX(EAX, EDX); gpr.BindToRegister(d, (d == a || d == b), true); MOV(32, R(EAX), gpr.R(a)); CDQ(); @@ -1459,9 +1456,9 @@ void Jit64::addex(UGeckoInstruction inst) gpr.Lock(a, b, d); gpr.BindToRegister(d, true); - GetCarryEAXAndClear(); + GetCarryRSCRATCHAndClear(); ADC(32, gpr.R(d), gpr.R((d == a) ? b : a)); - FinalizeCarryGenerateOverflowEAX(inst.OE); + FinalizeCarryGenerateOverflowRSCRATCH(inst.OE); if (inst.Rc) ComputeRC(gpr.R(d)); gpr.UnlockAll(); @@ -1471,10 +1468,10 @@ void Jit64::addex(UGeckoInstruction inst) gpr.Lock(a, b, d); gpr.BindToRegister(d, false); - GetCarryEAXAndClear(); + GetCarryRSCRATCHAndClear(); MOV(32, gpr.R(d), gpr.R(a)); ADC(32, gpr.R(d), gpr.R(b)); - FinalizeCarryGenerateOverflowEAX(inst.OE); + FinalizeCarryGenerateOverflowRSCRATCH(inst.OE); if (inst.Rc) ComputeRC(gpr.R(d)); gpr.UnlockAll(); @@ -1525,9 +1522,9 @@ void Jit64::addmex(UGeckoInstruction inst) gpr.Lock(d); gpr.BindToRegister(d, true); - GetCarryEAXAndClear(); + GetCarryRSCRATCHAndClear(); ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); - FinalizeCarryGenerateOverflowEAX(inst.OE); + FinalizeCarryGenerateOverflowRSCRATCH(inst.OE); if (inst.Rc) ComputeRC(gpr.R(d)); gpr.UnlockAll(); @@ -1537,10 +1534,10 @@ void Jit64::addmex(UGeckoInstruction inst) gpr.Lock(a, d); gpr.BindToRegister(d, false); - GetCarryEAXAndClear(); + GetCarryRSCRATCHAndClear(); MOV(32, gpr.R(d), gpr.R(a)); ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); - FinalizeCarryGenerateOverflowEAX(inst.OE); + FinalizeCarryGenerateOverflowRSCRATCH(inst.OE); if (inst.Rc) ComputeRC(gpr.R(d)); gpr.UnlockAll(); @@ -1559,9 +1556,9 @@ void Jit64::addzex(UGeckoInstruction inst) gpr.Lock(d); gpr.BindToRegister(d, true); - GetCarryEAXAndClear(); + GetCarryRSCRATCHAndClear(); ADC(32, gpr.R(d), Imm8(0)); - FinalizeCarryGenerateOverflowEAX(inst.OE); + FinalizeCarryGenerateOverflowRSCRATCH(inst.OE); if (inst.Rc) ComputeRC(gpr.R(d)); gpr.UnlockAll(); @@ -1571,10 +1568,10 @@ void Jit64::addzex(UGeckoInstruction inst) gpr.Lock(a, d); gpr.BindToRegister(d, false); - GetCarryEAXAndClear(); + GetCarryRSCRATCHAndClear(); MOV(32, gpr.R(d), gpr.R(a)); ADC(32, gpr.R(d), Imm8(0)); - FinalizeCarryGenerateOverflowEAX(inst.OE); + FinalizeCarryGenerateOverflowRSCRATCH(inst.OE); if (inst.Rc) ComputeRC(gpr.R(d)); gpr.UnlockAll(); @@ -1692,25 +1689,25 @@ void Jit64::rlwimix(UGeckoInstruction inst) { if (mask == 0U - (1U << inst.SH)) { - MOV(32, R(EAX), gpr.R(s)); - SHL(32, R(EAX), Imm8(inst.SH)); + MOV(32, R(RSCRATCH), gpr.R(s)); + SHL(32, R(RSCRATCH), Imm8(inst.SH)); AND(32, gpr.R(a), Imm32(~mask)); - OR(32, gpr.R(a), R(EAX)); + OR(32, gpr.R(a), R(RSCRATCH)); } else if (mask == (1U << inst.SH) - 1) { - MOV(32, R(EAX), gpr.R(s)); - SHR(32, R(EAX), Imm8(32-inst.SH)); + MOV(32, R(RSCRATCH), gpr.R(s)); + SHR(32, R(RSCRATCH), Imm8(32-inst.SH)); AND(32, gpr.R(a), Imm32(~mask)); - OR(32, gpr.R(a), R(EAX)); + OR(32, gpr.R(a), R(RSCRATCH)); } else { - MOV(32, R(EAX), gpr.R(s)); - ROL(32, R(EAX), Imm8(inst.SH)); - XOR(32, R(EAX), gpr.R(a)); - AND(32, R(EAX), Imm32(mask)); - XOR(32, gpr.R(a), R(EAX)); + MOV(32, R(RSCRATCH), gpr.R(s)); + ROL(32, R(RSCRATCH), Imm8(inst.SH)); + XOR(32, R(RSCRATCH), gpr.R(a)); + AND(32, R(RSCRATCH), Imm32(mask)); + XOR(32, gpr.R(a), R(RSCRATCH)); } if (inst.Rc) @@ -1745,6 +1742,7 @@ void Jit64::rlwnmx(UGeckoInstruction inst) } else { + // no register choice gpr.FlushLockX(ECX); gpr.Lock(a, b, s); gpr.BindToRegister(a, (a == b || a == s), true); @@ -1812,6 +1810,7 @@ void Jit64::srwx(UGeckoInstruction inst) } else { + // no register choice gpr.FlushLockX(ECX); gpr.Lock(a, b, s); gpr.BindToRegister(a, (a == b || a == s), true); @@ -1850,6 +1849,7 @@ void Jit64::slwx(UGeckoInstruction inst) } else { + // no register choice gpr.FlushLockX(ECX); gpr.Lock(a, b, s); gpr.BindToRegister(a, (a == b || a == s), true); @@ -1881,8 +1881,8 @@ void Jit64::srawx(UGeckoInstruction inst) int a = inst.RA; int b = inst.RB; int s = inst.RS; - gpr.Lock(a, s, b); gpr.FlushLockX(ECX); + gpr.Lock(a, s, b); gpr.BindToRegister(a, (a == s || a == b), true); JitClearCA(); MOV(32, R(ECX), gpr.R(b)); @@ -1890,9 +1890,9 @@ void Jit64::srawx(UGeckoInstruction inst) MOV(32, gpr.R(a), gpr.R(s)); SHL(64, gpr.R(a), Imm8(32)); SAR(64, gpr.R(a), R(ECX)); - MOV(32, R(EAX), gpr.R(a)); + MOV(32, R(RSCRATCH), gpr.R(a)); SHR(64, gpr.R(a), Imm8(32)); - TEST(32, gpr.R(a), R(EAX)); + TEST(32, gpr.R(a), R(RSCRATCH)); FixupBranch nocarry = J_CC(CC_Z); JitSetCA(); SetJumpTarget(nocarry); @@ -1917,16 +1917,16 @@ void Jit64::srawix(UGeckoInstruction inst) gpr.Lock(a, s); gpr.BindToRegister(a, a == s, true); JitClearCA(); - MOV(32, R(EAX), gpr.R(s)); + MOV(32, R(RSCRATCH), gpr.R(s)); if (a != s) { - MOV(32, gpr.R(a), R(EAX)); + MOV(32, gpr.R(a), R(RSCRATCH)); } SAR(32, gpr.R(a), Imm8(amount)); if (inst.Rc) ComputeRC(gpr.R(a)); - SHL(32, R(EAX), Imm8(32-amount)); - TEST(32, R(EAX), gpr.R(a)); + SHL(32, R(RSCRATCH), Imm8(32-amount)); + TEST(32, R(RSCRATCH), gpr.R(a)); FixupBranch nocarry = J_CC(CC_Z); JitSetCA(); SetJumpTarget(nocarry); @@ -2020,7 +2020,7 @@ void Jit64::twx(UGeckoInstruction inst) SetJumpTarget(fixup); } LOCK(); - OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_PROGRAM)); + OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_PROGRAM)); gpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index f085284ed8..ba9cf8b293 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -123,7 +123,7 @@ void Jit64::lXXx(UGeckoInstruction inst) ABI_PopRegistersAndAdjustStack(registersInUse, false); // ! we must continue executing of the loop after exception handling, maybe there is still 0 in r0 - //MOV(32, M(&PowerPC::ppcState.pc), Imm32(js.compilerPC)); + //MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); WriteExceptionExit(); SetJumpTarget(noIdle); @@ -197,14 +197,13 @@ void Jit64::lXXx(UGeckoInstruction inst) else { // In this case we need an extra temporary register. - gpr.FlushLockX(ABI_PARAM1); - opAddress = R(ABI_PARAM1); + opAddress = R(RSCRATCH2); storeAddress = true; if (use_constant_offset) { if (gpr.R(a).IsSimpleReg() && offset != 0) { - LEA(32, ABI_PARAM1, MDisp(gpr.RX(a), offset)); + LEA(32, RSCRATCH2, MDisp(gpr.RX(a), offset)); } else { @@ -215,7 +214,7 @@ void Jit64::lXXx(UGeckoInstruction inst) } else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) { - LEA(32, ABI_PARAM1, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); + LEA(32, RSCRATCH2, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); } else { @@ -228,7 +227,13 @@ void Jit64::lXXx(UGeckoInstruction inst) gpr.Lock(a, b, d); gpr.BindToRegister(d, js.memcheck, true); - SafeLoadToReg(gpr.RX(d), opAddress, accessSize, loadOffset, CallerSavedRegistersInUse(), signExtend); + u32 registersInUse = CallerSavedRegistersInUse(); + if (update && storeAddress) + { + // We need to save the (usually scratch) address register for the update. + registersInUse |= (1 << RSCRATCH2); + } + SafeLoadToReg(gpr.RX(d), opAddress, accessSize, loadOffset, registersInUse, signExtend); if (update && storeAddress) { @@ -269,11 +274,11 @@ void Jit64::dcbz(UGeckoInstruction inst) if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack) mem_mask |= Memory::ADDR_MASK_MEM1; - MOV(32, R(EAX), gpr.R(b)); + MOV(32, R(RSCRATCH), gpr.R(b)); if (a) - ADD(32, R(EAX), gpr.R(a)); - AND(32, R(EAX), Imm32(~31)); - TEST(32, R(EAX), Imm32(mem_mask)); + ADD(32, R(RSCRATCH), gpr.R(a)); + AND(32, R(RSCRATCH), Imm32(~31)); + TEST(32, R(RSCRATCH), Imm32(mem_mask)); FixupBranch fast = J_CC(CC_Z, true); // Should this code ever run? I can't find any games that use DCBZ on non-physical addresses, but @@ -281,14 +286,14 @@ void Jit64::dcbz(UGeckoInstruction inst) MOV(32, M(&PC), Imm32(jit->js.compilerPC)); u32 registersInUse = CallerSavedRegistersInUse(); ABI_PushRegistersAndAdjustStack(registersInUse, false); - ABI_CallFunctionR((void *)&Memory::ClearCacheLine, EAX); + ABI_CallFunctionR((void *)&Memory::ClearCacheLine, RSCRATCH); ABI_PopRegistersAndAdjustStack(registersInUse, false); FixupBranch exit = J(); SetJumpTarget(fast); PXOR(XMM0, R(XMM0)); - MOVAPS(MComplex(RBX, RAX, SCALE_1, 0), XMM0); - MOVAPS(MComplex(RBX, RAX, SCALE_1, 16), XMM0); + MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 0), XMM0); + MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 16), XMM0); SetJumpTarget(exit); } @@ -331,10 +336,9 @@ void Jit64::stX(UGeckoInstruction inst) if ((addr & 0xFFFFF000) == 0xCC008000 && jo.optimizeGatherPipe) { // Helps external systems know which instruction triggered the write - MOV(32, M(&PC), Imm32(jit->js.compilerPC)); + MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); - gpr.FlushLockX(ABI_PARAM1); - MOV(32, R(ABI_PARAM1), gpr.R(s)); + MOV(32, R(RSCRATCH2), gpr.R(s)); if (update) gpr.SetImmediate32(a, addr); @@ -358,8 +362,8 @@ void Jit64::stX(UGeckoInstruction inst) } else if (Memory::IsRAMAddress(addr)) { - MOV(32, R(EAX), gpr.R(s)); - WriteToConstRamAddress(accessSize, EAX, addr, true); + MOV(32, R(RSCRATCH), gpr.R(s)); + WriteToConstRamAddress(accessSize, RSCRATCH, addr, true); if (update) gpr.SetImmediate32(a, addr); return; @@ -367,7 +371,7 @@ void Jit64::stX(UGeckoInstruction inst) else { // Helps external systems know which instruction triggered the write - MOV(32, M(&PC), Imm32(jit->js.compilerPC)); + MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); u32 registersInUse = CallerSavedRegistersInUse(); ABI_PushRegistersAndAdjustStack(registersInUse, false); @@ -390,24 +394,31 @@ void Jit64::stX(UGeckoInstruction inst) } } - gpr.FlushLockX(ECX, EDX); - gpr.Lock(s, a); - MOV(32, R(EDX), gpr.R(a)); - MOV(32, R(ECX), gpr.R(s)); - SafeWriteRegToReg(ECX, EDX, accessSize, offset, CallerSavedRegistersInUse()); + gpr.Lock(a, s); + gpr.BindToRegister(a, true, false); + X64Reg reg_value; + if (WriteClobbersRegValue(accessSize, /* swap */ true)) + { + MOV(32, R(RSCRATCH2), gpr.R(s)); + reg_value = RSCRATCH2; + } + else + { + gpr.BindToRegister(s, true, false); + reg_value = gpr.RX(s); + } + SafeWriteRegToReg(reg_value, gpr.RX(a), accessSize, offset, CallerSavedRegistersInUse(), SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR); if (update && offset) { - gpr.KillImmediate(a, true, true); MEMCHECK_START + gpr.KillImmediate(a, true, true); ADD(32, gpr.R(a), Imm32((u32)offset)); MEMCHECK_END } - gpr.UnlockAll(); - gpr.UnlockAllX(); } else { @@ -424,24 +435,21 @@ void Jit64::stXx(UGeckoInstruction inst) FALLBACK_IF(!a || a == s || a == b); gpr.Lock(a, b, s); - gpr.FlushLockX(ECX, EDX); if (inst.SUBOP10 & 32) { - MEMCHECK_START gpr.BindToRegister(a, true, true); ADD(32, gpr.R(a), gpr.R(b)); - MOV(32, R(EDX), gpr.R(a)); - MEMCHECK_END + MOV(32, R(RSCRATCH2), gpr.R(a)); } else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) { - LEA(32, EDX, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); + LEA(32, RSCRATCH2, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); } else { - MOV(32, R(EDX), gpr.R(a)); - ADD(32, R(EDX), gpr.R(b)); + MOV(32, R(RSCRATCH2), gpr.R(a)); + ADD(32, R(RSCRATCH2), gpr.R(b)); } int accessSize; @@ -462,8 +470,18 @@ void Jit64::stXx(UGeckoInstruction inst) break; } - MOV(32, R(ECX), gpr.R(s)); - SafeWriteRegToReg(ECX, EDX, accessSize, 0, CallerSavedRegistersInUse()); + X64Reg reg_value; + if (WriteClobbersRegValue(accessSize, /* swap */ true)) + { + MOV(32, R(RSCRATCH), gpr.R(s)); + reg_value = RSCRATCH; + } + else + { + gpr.BindToRegister(s, true, false); + reg_value = gpr.RX(s); + } + SafeWriteRegToReg(reg_value, RSCRATCH2, accessSize, 0, CallerSavedRegistersInUse()); gpr.UnlockAll(); gpr.UnlockAllX(); @@ -476,15 +494,14 @@ void Jit64::lmw(UGeckoInstruction inst) JITDISABLE(bJITLoadStoreOff); // TODO: This doesn't handle rollback on DSI correctly - gpr.FlushLockX(ECX); - MOV(32, R(ECX), Imm32((u32)(s32)inst.SIMM_16)); + MOV(32, R(RSCRATCH2), Imm32((u32)(s32)inst.SIMM_16)); if (inst.RA) - ADD(32, R(ECX), gpr.R(inst.RA)); + ADD(32, R(RSCRATCH2), gpr.R(inst.RA)); for (int i = inst.RD; i < 32; i++) { - SafeLoadToReg(EAX, R(ECX), 32, (i - inst.RD) * 4, CallerSavedRegistersInUse(), false); + SafeLoadToReg(RSCRATCH, R(RSCRATCH2), 32, (i - inst.RD) * 4, CallerSavedRegistersInUse() | (1 << RSCRATCH_EXTRA), false); gpr.BindToRegister(i, false, true); - MOV(32, gpr.R(i), R(EAX)); + MOV(32, gpr.R(i), R(RSCRATCH)); } gpr.UnlockAllX(); } @@ -495,15 +512,14 @@ void Jit64::stmw(UGeckoInstruction inst) JITDISABLE(bJITLoadStoreOff); // TODO: This doesn't handle rollback on DSI correctly - gpr.FlushLockX(ECX); for (int i = inst.RD; i < 32; i++) { if (inst.RA) - MOV(32, R(EAX), gpr.R(inst.RA)); + MOV(32, R(RSCRATCH), gpr.R(inst.RA)); else - XOR(32, R(EAX), R(EAX)); - MOV(32, R(ECX), gpr.R(i)); - SafeWriteRegToReg(ECX, EAX, 32, (i - inst.RD) * 4 + (u32)(s32)inst.SIMM_16, CallerSavedRegistersInUse()); + XOR(32, R(RSCRATCH), R(RSCRATCH)); + MOV(32, R(RSCRATCH2), gpr.R(i)); + SafeWriteRegToReg(RSCRATCH2, RSCRATCH, 32, (i - inst.RD) * 4 + (u32)(s32)inst.SIMM_16, CallerSavedRegistersInUse()); } gpr.UnlockAllX(); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index d1f7ca9f8f..4e6ea7ad09 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -42,9 +42,9 @@ void Jit64::lfXXX(UGeckoInstruction inst) } else { - addr = R(EAX); + addr = R(RSCRATCH); if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) - LEA(32, EAX, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); + LEA(32, RSCRATCH, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); else { MOV(32, addr, gpr.R(b)); @@ -61,18 +61,18 @@ void Jit64::lfXXX(UGeckoInstruction inst) offset = (s32)(s16)inst.SIMM_16; } - SafeLoadToReg(RAX, addr, single ? 32 : 64, offset, CallerSavedRegistersInUse(), false); + SafeLoadToReg(RSCRATCH, addr, single ? 32 : 64, offset, CallerSavedRegistersInUse(), false); fpr.Lock(d); fpr.BindToRegister(d, js.memcheck || !single); MEMCHECK_START if (single) { - ConvertSingleToDouble(fpr.RX(d), EAX, true); + ConvertSingleToDouble(fpr.RX(d), RSCRATCH, true); } else { - MOVQ_xmm(XMM0, R(RAX)); + MOVQ_xmm(XMM0, R(RSCRATCH)); MOVSD(fpr.RX(d), R(XMM0)); } MEMCHECK_END @@ -96,24 +96,23 @@ void Jit64::stfXXX(UGeckoInstruction inst) FALLBACK_IF(!indexed && !a); s32 offset = 0; - gpr.FlushLockX(ABI_PARAM1); if (indexed) { if (update) { gpr.BindToRegister(a, true, true); ADD(32, gpr.R(a), gpr.R(b)); - MOV(32, R(ABI_PARAM1), gpr.R(a)); + MOV(32, R(RSCRATCH2), gpr.R(a)); } else { if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) - LEA(32, ABI_PARAM1, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); + LEA(32, RSCRATCH2, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0)); else { - MOV(32, R(ABI_PARAM1), gpr.R(b)); + MOV(32, R(RSCRATCH2), gpr.R(b)); if (a) - ADD(32, R(ABI_PARAM1), gpr.R(a)); + ADD(32, R(RSCRATCH2), gpr.R(a)); } } } @@ -128,23 +127,23 @@ void Jit64::stfXXX(UGeckoInstruction inst) { offset = (s32)(s16)inst.SIMM_16; } - MOV(32, R(ABI_PARAM1), gpr.R(a)); + MOV(32, R(RSCRATCH2), gpr.R(a)); } if (single) { fpr.BindToRegister(s, true, false); ConvertDoubleToSingle(XMM0, fpr.RX(s)); - SafeWriteF32ToReg(XMM0, ABI_PARAM1, offset, CallerSavedRegistersInUse()); + SafeWriteF32ToReg(XMM0, RSCRATCH2, offset, CallerSavedRegistersInUse()); fpr.UnlockAll(); } else { if (fpr.R(s).IsSimpleReg()) - MOVQ_xmm(R(RAX), fpr.RX(s)); + MOVQ_xmm(R(RSCRATCH), fpr.RX(s)); else - MOV(64, R(RAX), fpr.R(s)); - SafeWriteRegToReg(RAX, ABI_PARAM1, 64, offset, CallerSavedRegistersInUse()); + MOV(64, R(RSCRATCH), fpr.R(s)); + SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 64, offset, CallerSavedRegistersInUse()); } gpr.UnlockAll(); gpr.UnlockAllX(); @@ -160,15 +159,14 @@ void Jit64::stfiwx(UGeckoInstruction inst) int a = inst.RA; int b = inst.RB; - gpr.FlushLockX(ABI_PARAM1); - MOV(32, R(ABI_PARAM1), gpr.R(b)); + MOV(32, R(RSCRATCH2), gpr.R(b)); if (a) - ADD(32, R(ABI_PARAM1), gpr.R(a)); + ADD(32, R(RSCRATCH2), gpr.R(a)); if (fpr.R(s).IsSimpleReg()) - MOVD_xmm(R(EAX), fpr.RX(s)); + MOVD_xmm(R(RSCRATCH), fpr.RX(s)); else - MOV(32, R(EAX), fpr.R(s)); - SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, CallerSavedRegistersInUse()); + MOV(32, R(RSCRATCH), fpr.R(s)); + SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 32, 0, CallerSavedRegistersInUse()); gpr.UnlockAllX(); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp index 1129d5e833..2630395630 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -28,37 +28,36 @@ void Jit64::psq_st(UGeckoInstruction inst) int a = inst.RA; int s = inst.RS; // Fp numbers - gpr.FlushLockX(EAX, EDX); - gpr.FlushLockX(ECX); + gpr.FlushLockX(RSCRATCH, RSCRATCH_EXTRA); if (update) gpr.BindToRegister(inst.RA, true, true); fpr.BindToRegister(inst.RS, true, false); - MOV(32, R(ECX), gpr.R(inst.RA)); + MOV(32, R(RSCRATCH_EXTRA), gpr.R(inst.RA)); if (offset) - ADD(32, R(ECX), Imm32((u32)offset)); + ADD(32, R(RSCRATCH_EXTRA), Imm32((u32)offset)); if (update && offset) - MOV(32, gpr.R(a), R(ECX)); + MOV(32, gpr.R(a), R(RSCRATCH_EXTRA)); // Some games (e.g. Dirt 2) incorrectly set the unused bits which breaks the lookup table code. // Hence, we need to mask out the unused bits. The layout of the GQR register is // UU[SCALE]UUUUU[TYPE] where SCALE is 6 bits and TYPE is 3 bits, so we have to AND with // 0b0011111100000111, or 0x3F07. - MOV(32, R(EAX), Imm32(0x3F07)); - AND(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_GQR0 + inst.I])); - MOVZX(32, 8, EDX, R(AL)); + MOV(32, R(RSCRATCH), Imm32(0x3F07)); + AND(32, R(RSCRATCH), PPCSTATE(spr[SPR_GQR0 + inst.I])); + MOVZX(32, 8, RSCRATCH2, R(RSCRATCH)); - // FIXME: Fix ModR/M encoding to allow [EDX*4+disp32] without a base register! + // FIXME: Fix ModR/M encoding to allow [RSCRATCH2*4+disp32] without a base register! if (inst.W) { // One value PXOR(XMM0, R(XMM0)); // TODO: See if we can get rid of this cheaply by tweaking the code in the singleStore* functions. CVTSD2SS(XMM0, fpr.R(s)); - CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)asm_routines.singleStoreQuantized)); + CALLptr(MScaled(RSCRATCH2, SCALE_8, (u32)(u64)asm_routines.singleStoreQuantized)); } else { // Pair of values CVTPD2PS(XMM0, fpr.R(s)); - CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)asm_routines.pairedStoreQuantized)); + CALLptr(MScaled(RSCRATCH2, SCALE_8, (u32)(u64)asm_routines.pairedStoreQuantized)); } gpr.UnlockAll(); gpr.UnlockAllX(); @@ -73,24 +72,23 @@ void Jit64::psq_l(UGeckoInstruction inst) bool update = inst.OPCD == 57; int offset = inst.SIMM_12; - gpr.FlushLockX(EAX, EDX); - gpr.FlushLockX(ECX); + gpr.FlushLockX(RSCRATCH, RSCRATCH_EXTRA); gpr.BindToRegister(inst.RA, true, update && offset); fpr.BindToRegister(inst.RS, false, true); if (offset) - LEA(32, ECX, MDisp(gpr.RX(inst.RA), offset)); + LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(inst.RA), offset)); else - MOV(32, R(ECX), gpr.R(inst.RA)); + MOV(32, R(RSCRATCH_EXTRA), gpr.R(inst.RA)); if (update && offset) - MOV(32, gpr.R(inst.RA), R(ECX)); - MOV(32, R(EAX), Imm32(0x3F07)); - AND(32, R(EAX), M(((char *)&GQR(inst.I)) + 2)); - MOVZX(32, 8, EDX, R(AL)); + MOV(32, gpr.R(inst.RA), R(RSCRATCH_EXTRA)); + MOV(32, R(RSCRATCH), Imm32(0x3F07)); + AND(32, R(RSCRATCH), M(((char *)&GQR(inst.I)) + 2)); + MOVZX(32, 8, RSCRATCH2, R(RSCRATCH)); if (inst.W) - OR(32, R(EDX), Imm8(8)); + OR(32, R(RSCRATCH2), Imm8(8)); ABI_AlignStack(0); - CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)asm_routines.pairedLoadQuantized)); + CALLptr(MScaled(RSCRATCH2, SCALE_8, (u32)(u64)asm_routines.pairedLoadQuantized)); ABI_RestoreStack(0); // MEMCHECK_START // FIXME: MMU does not work here because of unsafe memory access diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index 9c00e70be9..f7278e9a55 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -16,22 +16,22 @@ void Jit64::GetCRFieldBit(int field, int bit, Gen::X64Reg out, bool negate) switch (bit) { case CR_SO_BIT: // check bit 61 set - BT(64, M(&PowerPC::ppcState.cr_val[field]), Imm8(61)); + BT(64, PPCSTATE(cr_val[field]), Imm8(61)); SETcc(negate ? CC_NC : CC_C, R(out)); break; case CR_EQ_BIT: // check bits 31-0 == 0 - CMP(32, M(&PowerPC::ppcState.cr_val[field]), Imm8(0)); + CMP(32, PPCSTATE(cr_val[field]), Imm8(0)); SETcc(negate ? CC_NZ : CC_Z, R(out)); break; case CR_GT_BIT: // check val > 0 - CMP(64, M(&PowerPC::ppcState.cr_val[field]), Imm8(0)); + CMP(64, PPCSTATE(cr_val[field]), Imm8(0)); SETcc(negate ? CC_NG : CC_G, R(out)); break; case CR_LT_BIT: // check bit 62 set - BT(64, M(&PowerPC::ppcState.cr_val[field]), Imm8(62)); + BT(64, PPCSTATE(cr_val[field]), Imm8(62)); SETcc(negate ? CC_NC : CC_C, R(out)); break; @@ -42,40 +42,40 @@ void Jit64::GetCRFieldBit(int field, int bit, Gen::X64Reg out, bool negate) void Jit64::SetCRFieldBit(int field, int bit, Gen::X64Reg in) { - MOV(64, R(ABI_PARAM1), M(&PowerPC::ppcState.cr_val[field])); + MOV(64, R(RSCRATCH2), PPCSTATE(cr_val[field])); MOVZX(32, 8, in, R(in)); switch (bit) { case CR_SO_BIT: // set bit 61 to input - BTR(64, R(ABI_PARAM1), Imm8(61)); + BTR(64, R(RSCRATCH2), Imm8(61)); SHL(64, R(in), Imm8(61)); - OR(64, R(ABI_PARAM1), R(in)); + OR(64, R(RSCRATCH2), R(in)); break; case CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input - SHR(64, R(ABI_PARAM1), Imm8(32)); - SHL(64, R(ABI_PARAM1), Imm8(32)); + SHR(64, R(RSCRATCH2), Imm8(32)); + SHL(64, R(RSCRATCH2), Imm8(32)); XOR(32, R(in), Imm8(1)); - OR(64, R(ABI_PARAM1), R(in)); + OR(64, R(RSCRATCH2), R(in)); break; case CR_GT_BIT: // set bit 63 to !input - BTR(64, R(ABI_PARAM1), Imm8(63)); + BTR(64, R(RSCRATCH2), Imm8(63)); NOT(32, R(in)); SHL(64, R(in), Imm8(63)); - OR(64, R(ABI_PARAM1), R(in)); + OR(64, R(RSCRATCH2), R(in)); break; case CR_LT_BIT: // set bit 62 to input - BTR(64, R(ABI_PARAM1), Imm8(62)); + BTR(64, R(RSCRATCH2), Imm8(62)); SHL(64, R(in), Imm8(62)); - OR(64, R(ABI_PARAM1), R(in)); + OR(64, R(RSCRATCH2), R(in)); break; } - BTS(64, R(ABI_PARAM1), Imm8(32)); - MOV(64, M(&PowerPC::ppcState.cr_val[field]), R(ABI_PARAM1)); + BTS(64, R(RSCRATCH2), Imm8(32)); + MOV(64, PPCSTATE(cr_val[field]), R(RSCRATCH2)); } FixupBranch Jit64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) @@ -83,19 +83,19 @@ FixupBranch Jit64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) switch (bit) { case CR_SO_BIT: // check bit 61 set - BT(64, M(&PowerPC::ppcState.cr_val[field]), Imm8(61)); + BT(64, PPCSTATE(cr_val[field]), Imm8(61)); return J_CC(jump_if_set ? CC_C : CC_NC, true); case CR_EQ_BIT: // check bits 31-0 == 0 - CMP(32, M(&PowerPC::ppcState.cr_val[field]), Imm8(0)); + CMP(32, PPCSTATE(cr_val[field]), Imm8(0)); return J_CC(jump_if_set ? CC_Z : CC_NZ, true); case CR_GT_BIT: // check val > 0 - CMP(64, M(&PowerPC::ppcState.cr_val[field]), Imm8(0)); + CMP(64, PPCSTATE(cr_val[field]), Imm8(0)); return J_CC(jump_if_set ? CC_G : CC_LE, true); case CR_LT_BIT: // check bit 62 set - BT(64, M(&PowerPC::ppcState.cr_val[field]), Imm8(62)); + BT(64, PPCSTATE(cr_val[field]), Imm8(62)); return J_CC(jump_if_set ? CC_C : CC_NC, true); default: @@ -154,7 +154,7 @@ void Jit64::mtspr(UGeckoInstruction inst) gpr.Lock(d); gpr.BindToRegister(d, true, false); } - MOV(32, M(&PowerPC::ppcState.spr[iIndex]), gpr.R(d)); + MOV(32, PPCSTATE(spr[iIndex]), gpr.R(d)); gpr.UnlockAll(); } @@ -173,8 +173,10 @@ void Jit64::mfspr(UGeckoInstruction inst) // typical use of this instruction is to call it three times, e.g. mftbu/mftbl/mftbu/cmpw/bne // to deal with possible timer wraparound. This makes the second two (out of three) completely // redundant for the JIT. + // no register choice + + gpr.FlushLockX(RDX, RAX); u32 offset = js.downcountAmount / SystemTimers::TIMER_RATIO; - gpr.FlushLockX(EDX); // An inline implementation of CoreTiming::GetFakeTimeBase, since in timer-heavy games the // cost of calling out to C for this is actually significant. @@ -190,7 +192,7 @@ void Jit64::mfspr(UGeckoInstruction inst) LEA(64, RAX, MComplex(RAX, RDX, SCALE_1, offset)); else ADD(64, R(RAX), R(RDX)); - MOV(64, M(&TL), R(RAX)); + MOV(64, PPCSTATE(spr[SPR_TL]), R(RAX)); // Two calls of TU/TL next to each other are extremely common in typical usage, so merge them // if we can. @@ -205,14 +207,14 @@ void Jit64::mfspr(UGeckoInstruction inst) gpr.BindToRegister(d, false); gpr.BindToRegister(n, false); if (iIndex == SPR_TL) - MOV(32, gpr.R(d), R(EAX)); + MOV(32, gpr.R(d), R(RAX)); if (nextIndex == SPR_TL) - MOV(32, gpr.R(n), R(EAX)); + MOV(32, gpr.R(n), R(RAX)); SHR(64, R(RAX), Imm8(32)); if (iIndex == SPR_TU) - MOV(32, gpr.R(d), R(EAX)); + MOV(32, gpr.R(d), R(RAX)); if (nextIndex == SPR_TU) - MOV(32, gpr.R(n), R(EAX)); + MOV(32, gpr.R(n), R(RAX)); } else { @@ -220,8 +222,9 @@ void Jit64::mfspr(UGeckoInstruction inst) gpr.BindToRegister(d, false); if (iIndex == SPR_TU) SHR(64, R(RAX), Imm8(32)); - MOV(32, gpr.R(d), R(EAX)); + MOV(32, gpr.R(d), R(RAX)); } + gpr.UnlockAllX(); break; } case SPR_WPAR: @@ -234,11 +237,10 @@ void Jit64::mfspr(UGeckoInstruction inst) default: gpr.Lock(d); gpr.BindToRegister(d, false); - MOV(32, gpr.R(d), M(&PowerPC::ppcState.spr[iIndex])); + MOV(32, gpr.R(d), PPCSTATE(spr[iIndex])); break; } gpr.UnlockAll(); - gpr.UnlockAllX(); } void Jit64::mtmsr(UGeckoInstruction inst) @@ -251,7 +253,7 @@ void Jit64::mtmsr(UGeckoInstruction inst) gpr.Lock(inst.RS); gpr.BindToRegister(inst.RS, true, false); } - MOV(32, M(&MSR), gpr.R(inst.RS)); + MOV(32, PPCSTATE(msr), gpr.R(inst.RS)); gpr.UnlockAll(); gpr.Flush(); fpr.Flush(); @@ -259,17 +261,17 @@ void Jit64::mtmsr(UGeckoInstruction inst) // If some exceptions are pending and EE are now enabled, force checking // external exceptions when going out of mtmsr in order to execute delayed // interrupts as soon as possible. - TEST(32, M(&MSR), Imm32(0x8000)); + TEST(32, PPCSTATE(msr), Imm32(0x8000)); FixupBranch eeDisabled = J_CC(CC_Z); - TEST(32, M((void*)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_EXTERNAL_INT | EXCEPTION_PERFORMANCE_MONITOR | EXCEPTION_DECREMENTER)); + TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_EXTERNAL_INT | EXCEPTION_PERFORMANCE_MONITOR | EXCEPTION_DECREMENTER)); FixupBranch noExceptionsPending = J_CC(CC_Z); // Check if a CP interrupt is waiting and keep the GPU emulation in sync (issue 4336) TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP)); FixupBranch cpInt = J_CC(CC_NZ); - MOV(32, M(&PC), Imm32(js.compilerPC + 4)); + MOV(32, PPCSTATE(pc), Imm32(js.compilerPC + 4)); WriteExternalExceptionExit(); SetJumpTarget(cpInt); @@ -288,7 +290,7 @@ void Jit64::mfmsr(UGeckoInstruction inst) //Privileged? gpr.Lock(inst.RD); gpr.BindToRegister(inst.RD, false, true); - MOV(32, gpr.R(inst.RD), M(&MSR)); + MOV(32, gpr.R(inst.RD), PPCSTATE(msr)); gpr.UnlockAll(); } @@ -308,33 +310,32 @@ void Jit64::mfcr(UGeckoInstruction inst) gpr.BindToRegister(d, false, true); XOR(32, gpr.R(d), gpr.R(d)); - gpr.FlushLockX(ABI_PARAM1); - X64Reg cr_val = ABI_PARAM1; - // we only need to zero the high bits of EAX once - XOR(32, R(EAX), R(EAX)); + X64Reg cr_val = RSCRATCH2; + // we only need to zero the high bits of RSCRATCH once + XOR(32, R(RSCRATCH), R(RSCRATCH)); for (int i = 0; i < 8; i++) { static const u8 m_flagTable[8] = {0x0,0x1,0x8,0x9,0x0,0x1,0x8,0x9}; if (i != 0) SHL(32, gpr.R(d), Imm8(4)); - MOV(64, R(cr_val), M(&PowerPC::ppcState.cr_val[i])); + MOV(64, R(cr_val), PPCSTATE(cr_val[i])); // EQ: Bits 31-0 == 0; set flag bit 1 TEST(32, R(cr_val), R(cr_val)); - SETcc(CC_Z, R(EAX)); - LEA(32, gpr.RX(d), MComplex(gpr.RX(d), EAX, SCALE_2, 0)); + SETcc(CC_Z, R(RSCRATCH)); + LEA(32, gpr.RX(d), MComplex(gpr.RX(d), RSCRATCH, SCALE_2, 0)); // GT: Value > 0; set flag bit 2 TEST(64, R(cr_val), R(cr_val)); - SETcc(CC_G, R(EAX)); - LEA(32, gpr.RX(d), MComplex(gpr.RX(d), EAX, SCALE_4, 0)); + SETcc(CC_G, R(RSCRATCH)); + LEA(32, gpr.RX(d), MComplex(gpr.RX(d), RSCRATCH, SCALE_4, 0)); // SO: Bit 61 set; set flag bit 0 // LT: Bit 62 set; set flag bit 3 SHR(64, R(cr_val), Imm8(61)); - MOVZX(32, 8, EAX, MDisp(cr_val, (u32)(u64)m_flagTable)); - OR(32, gpr.R(d), R(EAX)); + MOVZX(32, 8, RSCRATCH, MDisp(cr_val, (u32)(u64)m_flagTable)); + OR(32, gpr.R(d), R(RSCRATCH)); } gpr.UnlockAll(); @@ -360,12 +361,12 @@ void Jit64::mtcrf(UGeckoInstruction inst) u64 newcrval = PPCCRToInternal(newcr); if ((s64)newcrval == (s32)newcrval) { - MOV(64, M(&PowerPC::ppcState.cr_val[i]), Imm32((s32)newcrval)); + MOV(64, PPCSTATE(cr_val[i]), Imm32((s32)newcrval)); } else { - MOV(64, R(RAX), Imm64(newcrval)); - MOV(64, M(&PowerPC::ppcState.cr_val[i]), R(RAX)); + MOV(64, R(RSCRATCH), Imm64(newcrval)); + MOV(64, PPCSTATE(cr_val[i]), R(RSCRATCH)); } } } @@ -378,13 +379,13 @@ void Jit64::mtcrf(UGeckoInstruction inst) { if ((crm & (0x80 >> i)) != 0) { - MOV(32, R(EAX), gpr.R(inst.RS)); + MOV(32, R(RSCRATCH), gpr.R(inst.RS)); if (i != 7) - SHR(32, R(EAX), Imm8(28 - (i * 4))); + SHR(32, R(RSCRATCH), Imm8(28 - (i * 4))); if (i != 0) - AND(32, R(EAX), Imm8(0xF)); - MOV(64, R(EAX), MScaled(EAX, SCALE_8, (u32)(u64)m_crTable)); - MOV(64, M(&PowerPC::ppcState.cr_val[i]), R(EAX)); + AND(32, R(RSCRATCH), Imm8(0xF)); + MOV(64, R(RSCRATCH), MScaled(RSCRATCH, SCALE_8, (u32)(u64)m_crTable)); + MOV(64, PPCSTATE(cr_val[i]), R(RSCRATCH)); } } gpr.UnlockAll(); @@ -400,8 +401,8 @@ void Jit64::mcrf(UGeckoInstruction inst) // USES_CR if (inst.CRFS != inst.CRFD) { - MOV(64, R(EAX), M(&PowerPC::ppcState.cr_val[inst.CRFS])); - MOV(64, M(&PowerPC::ppcState.cr_val[inst.CRFD]), R(EAX)); + MOV(64, R(RSCRATCH), PPCSTATE(cr_val[inst.CRFS])); + MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(RSCRATCH)); } } @@ -413,14 +414,14 @@ void Jit64::mcrxr(UGeckoInstruction inst) // USES_CR // Copy XER[0-3] into CR[inst.CRFD] - MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); - SHR(32, R(EAX), Imm8(28)); + MOV(32, R(RSCRATCH), PPCSTATE(spr[SPR_XER])); + SHR(32, R(RSCRATCH), Imm8(28)); - MOV(64, R(EAX), MScaled(EAX, SCALE_8, (u32)(u64)m_crTable)); - MOV(64, M(&PowerPC::ppcState.cr_val[inst.CRFD]), R(EAX)); + MOV(64, R(RSCRATCH), MScaled(RSCRATCH, SCALE_8, (u32)(u64)m_crTable)); + MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(RSCRATCH)); // Clear XER[0-3] - AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(0x0FFFFFFF)); + AND(32, PPCSTATE(spr[SPR_XER]), Imm32(0x0FFFFFFF)); } void Jit64::crXXX(UGeckoInstruction inst) @@ -439,9 +440,8 @@ void Jit64::crXXX(UGeckoInstruction inst) // crnand or crnor bool negateB = inst.SUBOP10 == 225 || inst.SUBOP10 == 33; - gpr.FlushLockX(ABI_PARAM1); - GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), ABI_PARAM1, negateA); - GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), EAX, negateB); + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), RSCRATCH2, negateA); + GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), RSCRATCH, negateB); // Compute combined bit switch (inst.SUBOP10) @@ -449,23 +449,23 @@ void Jit64::crXXX(UGeckoInstruction inst) case 33: // crnor: ~(A || B) == (~A && ~B) case 129: // crandc case 257: // crand - AND(8, R(EAX), R(ABI_PARAM1)); + AND(8, R(RSCRATCH), R(RSCRATCH2)); break; case 193: // crxor case 289: // creqv - XOR(8, R(EAX), R(ABI_PARAM1)); + XOR(8, R(RSCRATCH), R(RSCRATCH2)); break; case 225: // crnand: ~(A && B) == (~A || ~B) case 417: // crorc case 449: // cror - OR(8, R(EAX), R(ABI_PARAM1)); + OR(8, R(RSCRATCH), R(RSCRATCH2)); break; } // Store result bit in CRBD - SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), EAX); + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), RSCRATCH); gpr.UnlockAllX(); } diff --git a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp index 6798f390cc..d266023df5 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp @@ -157,13 +157,15 @@ static void fregSpill(RegInfo& RI, X64Reg reg) RI.fregs[reg] = nullptr; } -// ECX is scratch, so we don't allocate it +// RAX and RDX are scratch, so we don't allocate them +// (TODO: if we could lock RCX here too then we could allocate it - needed for +// shifts) // 64-bit - calling conventions differ between linux & windows, so... #ifdef _WIN32 static const X64Reg RegAllocOrder[] = {RSI, RDI, R12, R13, R14, R8, R9, R10, R11}; #else -static const X64Reg RegAllocOrder[] = {RBP, R12, R13, R14, R8, R9, R10, R11}; +static const X64Reg RegAllocOrder[] = {R12, R13, R14, R8, R9, R10, R11}; #endif static const int RegAllocSize = sizeof(RegAllocOrder) / sizeof(X64Reg); static const X64Reg FRegAllocOrder[] = {XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, XMM2, XMM3, XMM4, XMM5}; @@ -602,22 +604,22 @@ static void regEmitMemStore(RegInfo& RI, InstLoc I, unsigned Size) { auto info = regBuildMemAddress(RI, I, getOp2(I), 2, Size, nullptr); if (info.first.IsImm()) - RI.Jit->MOV(32, R(ECX), info.first); + RI.Jit->MOV(32, R(RSCRATCH2), info.first); else - RI.Jit->LEA(32, ECX, MDisp(info.first.GetSimpleReg(), info.second)); + RI.Jit->LEA(32, RSCRATCH2, MDisp(info.first.GetSimpleReg(), info.second)); - regSpill(RI, EAX); + regSpill(RI, RSCRATCH); if (isImm(*getOp1(I))) { - RI.Jit->MOV(Size, R(EAX), regImmForConst(RI, getOp1(I), Size)); + RI.Jit->MOV(Size, R(RSCRATCH), regImmForConst(RI, getOp1(I), Size)); } else { - RI.Jit->MOV(32, R(EAX), regLocForInst(RI, getOp1(I))); + RI.Jit->MOV(32, R(RSCRATCH), regLocForInst(RI, getOp1(I))); } - RI.Jit->SafeWriteRegToReg(EAX, ECX, Size, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); + RI.Jit->SafeWriteRegToReg(RSCRATCH, RSCRATCH2, Size, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); if (RI.IInfo[I - RI.FirstI] & 4) regClearInst(RI, getOp1(I)); } @@ -675,9 +677,9 @@ static void regEmitCmp(RegInfo& RI, InstLoc I) static void regEmitICmpInst(RegInfo& RI, InstLoc I, CCFlags flag) { regEmitCmp(RI, I); - RI.Jit->SETcc(flag, R(ECX)); // Caution: SETCC uses 8-bit regs! + RI.Jit->SETcc(flag, R(RSCRATCH2)); // Caution: SETCC uses 8-bit regs! X64Reg reg = regBinReg(RI, I); - RI.Jit->MOVZX(32, 8, reg, R(ECX)); + RI.Jit->MOVZX(32, 8, reg, R(RSCRATCH2)); RI.regs[reg] = I; regNormalRegClear(RI, I); } @@ -707,8 +709,8 @@ static void regEmitICmpCRInst(RegInfo& RI, InstLoc I) unsigned RHS = RI.Build->GetImmValue(getOp2(I)); if (!signed_compare && (RHS & 0x80000000U)) { - RI.Jit->MOV(32, R(EAX), Imm32(RHS)); - RI.Jit->SUB(64, R(reg), R(RAX)); + RI.Jit->MOV(32, R(RSCRATCH), Imm32(RHS)); + RI.Jit->SUB(64, R(reg), R(RSCRATCH)); } else if (RHS) { @@ -718,10 +720,10 @@ static void regEmitICmpCRInst(RegInfo& RI, InstLoc I) else { if (signed_compare) - RI.Jit->MOVSX(64, 32, RAX, regLocForInst(RI, getOp2(I))); + RI.Jit->MOVSX(64, 32, RSCRATCH, regLocForInst(RI, getOp2(I))); else - RI.Jit->MOV(32, R(EAX), regLocForInst(RI, getOp2(I))); - RI.Jit->SUB(64, R(reg), R(RAX)); + RI.Jit->MOV(32, R(RSCRATCH), regLocForInst(RI, getOp2(I))); + RI.Jit->SUB(64, R(reg), R(RSCRATCH)); } RI.regs[reg] = I; @@ -949,8 +951,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) // interpreter call at the moment, but optimizing interpreter // calls isn't completely out of the question... regSpillCallerSaved(RI); - Jit->MOV(32, M(&PC), Imm32(InstLoc)); - Jit->MOV(32, M(&NPC), Imm32(InstLoc+4)); + Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc)); + Jit->MOV(32, PPCSTATE(npc), Imm32(InstLoc+4)); Jit->ABI_CallFunctionC((void*)GetInterpreterOp(InstCode), InstCode); break; @@ -962,7 +964,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg reg = regFindFreeReg(RI); unsigned ppcreg = *I >> 8; - Jit->MOV(32, R(reg), M(&PowerPC::ppcState.gpr[ppcreg])); + Jit->MOV(32, R(reg), PPCSTATE(gpr[ppcreg])); RI.regs[reg] = I; break; } @@ -973,7 +975,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg reg = regFindFreeReg(RI); unsigned ppcreg = *I >> 8; - Jit->MOV(64, R(reg), M(&PowerPC::ppcState.cr_val[ppcreg])); + Jit->MOV(64, R(reg), PPCSTATE(cr_val[ppcreg])); RI.regs[reg] = I; break; } @@ -983,7 +985,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) break; X64Reg reg = regFindFreeReg(RI); - Jit->MOV(32, R(reg), M(&CTR)); + Jit->MOV(32, R(reg), PPCSTATE_CTR); RI.regs[reg] = I; break; } @@ -993,7 +995,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) break; X64Reg reg = regFindFreeReg(RI); - Jit->MOV(32, R(reg), M(&LR)); + Jit->MOV(32, R(reg), PPCSTATE_LR); RI.regs[reg] = I; break; } @@ -1003,7 +1005,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) break; X64Reg reg = regFindFreeReg(RI); - Jit->MOV(32, R(reg), M(&MSR)); + Jit->MOV(32, R(reg), PPCSTATE(msr)); RI.regs[reg] = I; break; } @@ -1014,7 +1016,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg reg = regFindFreeReg(RI); unsigned gqr = *I >> 8; - Jit->MOV(32, R(reg), M(&GQR(gqr))); + Jit->MOV(32, R(reg), PPCSTATE(spr[SPR_GQR0 + gqr])); RI.regs[reg] = I; break; } @@ -1024,7 +1026,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) break; X64Reg reg = regFindFreeReg(RI); - Jit->MOV(32, R(reg), M(&PowerPC::ppcState.spr[SPR_XER])); + Jit->MOV(32, R(reg), PPCSTATE(spr[SPR_XER])); Jit->SHR(32, R(reg), Imm8(29)); Jit->AND(32, R(reg), Imm8(1)); RI.regs[reg] = I; @@ -1042,7 +1044,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { X64Reg reg = regEnsureInReg(RI, getOp1(I)); unsigned ppcreg = *I >> 16; - Jit->MOV(64, M(&PowerPC::ppcState.cr_val[ppcreg]), R(reg)); + Jit->MOV(64, PPCSTATE(cr_val[ppcreg]), R(reg)); regNormalRegClear(RI, I); break; } @@ -1067,15 +1069,15 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) // If some exceptions are pending and EE are now enabled, force checking // external exceptions when going out of mtmsr in order to execute delayed // interrupts as soon as possible. - Jit->MOV(32, R(EAX), M(&MSR)); - Jit->TEST(32, R(EAX), Imm32(0x8000)); + Jit->MOV(32, R(RSCRATCH), PPCSTATE(msr)); + Jit->TEST(32, R(RSCRATCH), Imm32(0x8000)); FixupBranch eeDisabled = Jit->J_CC(CC_Z); - Jit->MOV(32, R(EAX), M((void*)&PowerPC::ppcState.Exceptions)); - Jit->TEST(32, R(EAX), R(EAX)); + Jit->MOV(32, R(RSCRATCH), PPCSTATE(Exceptions)); + Jit->TEST(32, R(RSCRATCH), R(RSCRATCH)); FixupBranch noExceptionsPending = Jit->J_CC(CC_Z); - Jit->MOV(32, M(&PC), Imm32(InstLoc + 4)); + Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc + 4)); Jit->WriteExceptionExit(); // TODO: Implement WriteExternalExceptionExit for JitIL Jit->SetJumpTarget(eeDisabled); @@ -1111,11 +1113,11 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) } case StoreFPRF: { - Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); - Jit->AND(32, R(ECX), Imm8(0x1F)); - Jit->SHL(32, R(ECX), Imm8(12)); - Jit->AND(32, M(&FPSCR), Imm32(~(0x1F << 12))); - Jit->OR(32, M(&FPSCR), R(ECX)); + Jit->MOV(32, R(RSCRATCH2), regLocForInst(RI, getOp1(I))); + Jit->AND(32, R(RSCRATCH2), Imm8(0x1F)); + Jit->SHL(32, R(RSCRATCH2), Imm8(12)); + Jit->AND(32, PPCSTATE(fpscr), Imm32(~(0x1F << 12))); + Jit->OR(32, PPCSTATE(fpscr), R(RSCRATCH2)); regNormalRegClear(RI, I); break; } @@ -1155,8 +1157,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) break; X64Reg reg = regUReg(RI, I); - Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); - Jit->MOVSX(32, 8, reg, R(ECX)); + Jit->MOV(32, R(RSCRATCH2), regLocForInst(RI, getOp1(I))); + Jit->MOVSX(32, 8, reg, R(RSCRATCH2)); RI.regs[reg] = I; regNormalRegClear(RI, I); break; @@ -1178,9 +1180,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) break; X64Reg reg = regUReg(RI, I); - Jit->MOV(32, R(ECX), Imm32(63)); + Jit->MOV(32, R(RSCRATCH2), Imm32(63)); Jit->BSR(32, reg, regLocForInst(RI, getOp1(I))); - Jit->CMOVcc(32, reg, R(ECX), CC_Z); + Jit->CMOVcc(32, reg, R(RSCRATCH2), CC_Z); Jit->XOR(32, R(reg), Imm8(31)); RI.regs[reg] = I; regNormalRegClear(RI, I); @@ -1265,6 +1267,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) if (!thisUsed) break; + // no register choice regSpill(RI, EAX); regSpill(RI, EDX); X64Reg reg = regBinReg(RI, I); @@ -1419,35 +1422,35 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg cr_val = regUReg(RI, I); Jit->MOV(64, R(cr_val), regLocForInst(RI, getOp1(I))); - Jit->XOR(32, R(EAX), R(EAX)); + Jit->XOR(32, R(RSCRATCH), R(RSCRATCH)); // SO: Bit 61 set. - Jit->MOV(64, R(RCX), R(cr_val)); - Jit->SHR(64, R(RCX), Imm8(61)); - Jit->AND(32, R(ECX), Imm8(1)); - Jit->OR(32, R(EAX), R(ECX)); + Jit->MOV(64, R(RSCRATCH2), R(cr_val)); + Jit->SHR(64, R(RSCRATCH2), Imm8(61)); + Jit->AND(32, R(RSCRATCH2), Imm8(1)); + Jit->OR(32, R(RSCRATCH), R(RSCRATCH2)); // EQ: Bits 31-0 == 0. - Jit->XOR(32, R(ECX), R(ECX)); + Jit->XOR(32, R(RSCRATCH2), R(RSCRATCH2)); Jit->TEST(32, R(cr_val), R(cr_val)); - Jit->SETcc(CC_Z, R(ECX)); - Jit->SHL(32, R(ECX), Imm8(1)); - Jit->OR(32, R(EAX), R(ECX)); + Jit->SETcc(CC_Z, R(RSCRATCH2)); + Jit->SHL(32, R(RSCRATCH2), Imm8(1)); + Jit->OR(32, R(RSCRATCH), R(RSCRATCH2)); // GT: Value > 0. - Jit->XOR(32, R(ECX), R(ECX)); + Jit->XOR(32, R(RSCRATCH2), R(RSCRATCH2)); Jit->TEST(64, R(cr_val), R(cr_val)); - Jit->SETcc(CC_G, R(ECX)); - Jit->SHL(32, R(ECX), Imm8(2)); - Jit->OR(32, R(EAX), R(ECX)); + Jit->SETcc(CC_G, R(RSCRATCH2)); + Jit->SHL(32, R(RSCRATCH2), Imm8(2)); + Jit->OR(32, R(RSCRATCH), R(RSCRATCH2)); // LT: Bit 62 set. - Jit->MOV(64, R(ECX), R(cr_val)); - Jit->SHR(64, R(ECX), Imm8(62 - 3)); - Jit->AND(32, R(ECX), Imm8(0x8)); - Jit->OR(32, R(EAX), R(ECX)); + Jit->MOV(64, R(RSCRATCH2), R(cr_val)); + Jit->SHR(64, R(RSCRATCH2), Imm8(62 - 3)); + Jit->AND(32, R(RSCRATCH2), Imm8(0x8)); + Jit->OR(32, R(RSCRATCH), R(RSCRATCH2)); - Jit->MOV(32, R(cr_val), R(EAX)); + Jit->MOV(32, R(cr_val), R(RSCRATCH)); RI.regs[cr_val] = I; regNormalRegClear(RI, I); break; @@ -1460,34 +1463,34 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg cr_val = regUReg(RI, I); Jit->MOV(64, R(cr_val), regLocForInst(RI, getOp1(I))); - Jit->MOV(64, R(RCX), Imm64(1ull << 32)); + Jit->MOV(64, R(RSCRATCH2), Imm64(1ull << 32)); // SO - Jit->MOV(64, R(RAX), R(cr_val)); - Jit->SHL(64, R(RAX), Imm8(63)); - Jit->SHR(64, R(RAX), Imm8(63 - 61)); - Jit->OR(64, R(RCX), R(RAX)); + Jit->MOV(64, R(RSCRATCH), R(cr_val)); + Jit->SHL(64, R(RSCRATCH), Imm8(63)); + Jit->SHR(64, R(RSCRATCH), Imm8(63 - 61)); + Jit->OR(64, R(RSCRATCH2), R(RSCRATCH)); // EQ - Jit->MOV(64, R(RAX), R(cr_val)); - Jit->NOT(64, R(RAX)); - Jit->AND(64, R(RAX), Imm8(CR_EQ)); - Jit->OR(64, R(RCX), R(RAX)); + Jit->MOV(64, R(RSCRATCH), R(cr_val)); + Jit->NOT(64, R(RSCRATCH)); + Jit->AND(64, R(RSCRATCH), Imm8(CR_EQ)); + Jit->OR(64, R(RSCRATCH2), R(RSCRATCH)); // GT - Jit->MOV(64, R(RAX), R(cr_val)); - Jit->NOT(64, R(RAX)); - Jit->AND(64, R(RAX), Imm8(CR_GT)); - Jit->SHL(64, R(RAX), Imm8(63 - 2)); - Jit->OR(64, R(RCX), R(RAX)); + Jit->MOV(64, R(RSCRATCH), R(cr_val)); + Jit->NOT(64, R(RSCRATCH)); + Jit->AND(64, R(RSCRATCH), Imm8(CR_GT)); + Jit->SHL(64, R(RSCRATCH), Imm8(63 - 2)); + Jit->OR(64, R(RSCRATCH2), R(RSCRATCH)); // LT - Jit->MOV(64, R(RAX), R(cr_val)); - Jit->AND(64, R(RAX), Imm8(CR_LT)); - Jit->SHL(64, R(RAX), Imm8(62 - 3)); - Jit->OR(64, R(RCX), R(RAX)); + Jit->MOV(64, R(RSCRATCH), R(cr_val)); + Jit->AND(64, R(RSCRATCH), Imm8(CR_LT)); + Jit->SHL(64, R(RSCRATCH), Imm8(62 - 3)); + Jit->OR(64, R(RSCRATCH2), R(RSCRATCH)); - Jit->MOV(64, R(cr_val), R(RCX)); + Jit->MOV(64, R(cr_val), R(RSCRATCH2)); RI.regs[cr_val] = I; regNormalRegClear(RI, I); @@ -1499,10 +1502,10 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) break; X64Reg reg = regUReg(RI, I); - Jit->MOV(64, R(RAX), Imm64(1ull << 61)); - Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RAX)); - Jit->SETcc(CC_NZ, R(AL)); - Jit->MOVZX(32, 8, reg, R(AL)); + Jit->MOV(64, R(RSCRATCH), Imm64(1ull << 61)); + Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RSCRATCH)); + Jit->SETcc(CC_NZ, R(RSCRATCH)); + Jit->MOVZX(32, 8, reg, R(RSCRATCH)); RI.regs[reg] = I; regNormalRegClear(RI, I); break; @@ -1514,8 +1517,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg reg = regUReg(RI, I); Jit->CMP(32, regLocForInst(RI, getOp1(I)), Imm32(0)); - Jit->SETcc(CC_Z, R(AL)); - Jit->MOVZX(32, 8, reg, R(AL)); + Jit->SETcc(CC_Z, R(RSCRATCH)); + Jit->MOVZX(32, 8, reg, R(RSCRATCH)); RI.regs[reg] = I; regNormalRegClear(RI, I); break; @@ -1527,8 +1530,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg reg = regUReg(RI, I); Jit->CMP(64, regLocForInst(RI, getOp1(I)), Imm8(0)); - Jit->SETcc(CC_G, R(AL)); - Jit->MOVZX(32, 8, reg, R(AL)); + Jit->SETcc(CC_G, R(RSCRATCH)); + Jit->MOVZX(32, 8, reg, R(RSCRATCH)); RI.regs[reg] = I; regNormalRegClear(RI, I); break; @@ -1539,10 +1542,10 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) break; X64Reg reg = regUReg(RI, I); - Jit->MOV(64, R(RAX), Imm64(1ull << 62)); - Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RAX)); - Jit->SETcc(CC_NZ, R(AL)); - Jit->MOVZX(32, 8, reg, R(AL)); + Jit->MOV(64, R(RSCRATCH), Imm64(1ull << 62)); + Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RSCRATCH)); + Jit->SETcc(CC_NZ, R(RSCRATCH)); + Jit->MOVZX(32, 8, reg, R(RSCRATCH)); RI.regs[reg] = I; regNormalRegClear(RI, I); break; @@ -1553,9 +1556,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) break; X64Reg reg = fregFindFreeReg(RI); - Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); - RI.Jit->SafeLoadToReg(ECX, R(ECX), 32, 0, regsInUse(RI), false, EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); - Jit->MOVD_xmm(reg, R(ECX)); + Jit->MOV(32, R(RSCRATCH2), regLocForInst(RI, getOp1(I))); + RI.Jit->SafeLoadToReg(RSCRATCH2, R(RSCRATCH2), 32, 0, regsInUse(RI), false, EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); + Jit->MOVD_xmm(reg, R(RSCRATCH2)); RI.fregs[reg] = I; regNormalRegClear(RI, I); break; @@ -1567,9 +1570,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg reg = fregFindFreeReg(RI); const OpArg loc = regLocForInst(RI, getOp1(I)); - Jit->MOV(32, R(ECX), loc); - RI.Jit->SafeLoadToReg(RCX, R(ECX), 64, 0, regsInUse(RI), false, EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); - Jit->MOVQ_xmm(reg, R(RCX)); + Jit->MOV(32, R(RSCRATCH2), loc); + RI.Jit->SafeLoadToReg(RSCRATCH2, R(RSCRATCH2), 64, 0, regsInUse(RI), false, EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); + Jit->MOVQ_xmm(reg, R(RSCRATCH2)); RI.fregs[reg] = I; regNormalRegClear(RI, I); break; @@ -1579,8 +1582,6 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) if (!thisUsed) break; - regSpill(RI, EAX); - regSpill(RI, EDX); X64Reg reg = fregFindFreeReg(RI); // The lower 3 bits is for GQR index. The next 1 bit is for inst.W unsigned int quantreg = (*I >> 16) & 0x7; @@ -1589,13 +1590,12 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) // Hence, we need to mask out the unused bits. The layout of the GQR register is // UU[SCALE]UUUUU[TYPE] where SCALE is 6 bits and TYPE is 3 bits, so we have to AND with // 0b0011111100000111, or 0x3F07. - Jit->MOV(32, R(EAX), Imm32(0x3F07)); - Jit->AND(32, R(EAX), M(((char *)&GQR(quantreg)) + 2)); - Jit->MOVZX(32, 8, EDX, R(AL)); - Jit->OR(32, R(EDX), Imm8(w << 3)); + Jit->MOV(32, R(RSCRATCH), Imm32(0x3F07)); + Jit->AND(32, R(RSCRATCH), M(((char *)&GQR(quantreg)) + 2)); + Jit->OR(32, R(RSCRATCH), Imm8(w << 3)); - Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I))); - Jit->CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedLoadQuantized))); + Jit->MOV(32, R(RSCRATCH2), regLocForInst(RI, getOp1(I))); + Jit->CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedLoadQuantized))); Jit->MOVAPD(reg, R(XMM0)); RI.fregs[reg] = I; regNormalRegClear(RI, I); @@ -1603,15 +1603,15 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) } case StoreSingle: { - regSpill(RI, EAX); + regSpill(RI, RSCRATCH); const OpArg loc1 = fregLocForInst(RI, getOp1(I)); if (loc1.IsSimpleReg()) - Jit->MOVD_xmm(R(EAX), loc1.GetSimpleReg()); + Jit->MOVD_xmm(R(RSCRATCH), loc1.GetSimpleReg()); else - Jit->MOV(32, R(EAX), loc1); + Jit->MOV(32, R(RSCRATCH), loc1); - Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I))); - RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); + Jit->MOV(32, R(RSCRATCH2), regLocForInst(RI, getOp2(I))); + RI.Jit->SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 32, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); if (RI.IInfo[I - RI.FirstI] & 4) fregClearInst(RI, getOp1(I)); if (RI.IInfo[I - RI.FirstI] & 8) @@ -1620,14 +1620,14 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) } case StoreDouble: { - regSpill(RI, EAX); + regSpill(RI, RSCRATCH); OpArg value = fregLocForInst(RI, getOp1(I)); OpArg address = regLocForInst(RI, getOp2(I)); Jit->MOVAPD(XMM0, value); - Jit->MOVQ_xmm(R(RAX), XMM0); - Jit->MOV(32, R(ECX), address); - RI.Jit->SafeWriteRegToReg(RAX, ECX, 64, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); + Jit->MOVQ_xmm(R(RSCRATCH), XMM0); + Jit->MOV(32, R(RSCRATCH2), address); + RI.Jit->SafeWriteRegToReg(RSCRATCH, RSCRATCH2, 64, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM); if (RI.IInfo[I - RI.FirstI] & 4) fregClearInst(RI, getOp1(I)); @@ -1637,16 +1637,16 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) } case StorePaired: { - regSpill(RI, EAX); - regSpill(RI, EDX); + regSpill(RI, RSCRATCH); + regSpill(RI, RSCRATCH2); u32 quantreg = *I >> 24; - Jit->MOV(32, R(EAX), Imm32(0x3F07)); - Jit->AND(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_GQR0 + quantreg])); - Jit->MOVZX(32, 8, EDX, R(AL)); + Jit->MOV(32, R(RSCRATCH), Imm32(0x3F07)); + Jit->AND(32, R(RSCRATCH), PPCSTATE(spr[SPR_GQR0 + quantreg])); + Jit->MOVZX(32, 8, RSCRATCH2, R(RSCRATCH)); - Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I))); + Jit->MOV(32, R(RSCRATCH2), regLocForInst(RI, getOp2(I))); Jit->MOVAPD(XMM0, fregLocForInst(RI, getOp1(I))); - Jit->CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedStoreQuantized))); + Jit->CALLptr(MScaled(RSCRATCH2, SCALE_8, (u32)(u64)(((JitIL *)jit)->asm_routines.pairedStoreQuantized))); if (RI.IInfo[I - RI.FirstI] & 4) fregClearInst(RI, getOp1(I)); if (RI.IInfo[I - RI.FirstI] & 8) @@ -1778,7 +1778,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg reg = fregFindFreeReg(RI); unsigned ppcreg = *I >> 8; - Jit->MOVAPD(reg, M(&PowerPC::ppcState.ps[ppcreg])); + Jit->MOVAPD(reg, PPCSTATE(ps[ppcreg])); RI.fregs[reg] = I; break; } @@ -1790,21 +1790,21 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) X64Reg reg = fregFindFreeReg(RI); unsigned ppcreg = *I >> 8; char *p = (char*)&(PowerPC::ppcState.ps[ppcreg][0]); - Jit->MOV(32, R(ECX), M(p+4)); - Jit->AND(32, R(ECX), Imm32(0x7ff00000)); - Jit->CMP(32, R(ECX), Imm32(0x38000000)); + Jit->MOV(32, R(RSCRATCH2), M(p+4)); + Jit->AND(32, R(RSCRATCH2), Imm32(0x7ff00000)); + Jit->CMP(32, R(RSCRATCH2), Imm32(0x38000000)); FixupBranch ok = Jit->J_CC(CC_AE); Jit->AND(32, M(p+4), Imm32(0x80000000)); Jit->MOV(32, M(p), Imm32(0)); Jit->SetJumpTarget(ok); - Jit->MOVAPD(reg, M(&PowerPC::ppcState.ps[ppcreg])); + Jit->MOVAPD(reg, PPCSTATE(ps[ppcreg])); RI.fregs[reg] = I; break; } case StoreFReg: { unsigned ppcreg = *I >> 16; - Jit->MOVAPD(M(&PowerPC::ppcState.ps[ppcreg]), + Jit->MOVAPD(PPCSTATE(ps[ppcreg]), fregEnsureInReg(RI, getOp1(I))); fregNormalRegClear(RI, I); break; @@ -1911,17 +1911,17 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) Jit->MOVSD(M(isSNANTemp[1]), XMM0); } Jit->ABI_CallFunction((void*)checkIsSNAN); - Jit->TEST(8, R(EAX), R(EAX)); + Jit->TEST(8, R(ABI_RETURN), R(ABI_RETURN)); FixupBranch ok = Jit->J_CC(CC_Z); - Jit->OR(32, M(&FPSCR), Imm32(FPSCR_FX)); // FPSCR.FX = 1; - Jit->OR(32, M(&FPSCR), Imm32(FPSCR_VXSNAN)); // FPSCR.Hex |= mask; - Jit->TEST(32, M(&FPSCR), Imm32(FPSCR_VE)); + Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX)); // FPSCR.FX = 1; + Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_VXSNAN)); // FPSCR.Hex |= mask; + Jit->TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_VE)); FixupBranch finish0 = Jit->J_CC(CC_NZ); - Jit->OR(32, M(&FPSCR), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask; + Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask; FixupBranch finish1 = Jit->J(); Jit->SetJumpTarget(ok); - Jit->OR(32, M(&FPSCR), Imm32(FPSCR_FX)); // FPSCR.FX = 1; - Jit->OR(32, M(&FPSCR), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask; + Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX)); // FPSCR.FX = 1; + Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask; Jit->SetJumpTarget(finish0); Jit->SetJumpTarget(finish1); } @@ -1940,10 +1940,10 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) Jit->MOVSD(M(isSNANTemp[1]), XMM0); } Jit->ABI_CallFunction((void*)checkIsSNAN); - Jit->TEST(8, R(EAX), R(EAX)); + Jit->TEST(8, R(ABI_RETURN), R(ABI_RETURN)); FixupBranch finish = Jit->J_CC(CC_Z); - Jit->OR(32, M(&FPSCR), Imm32(FPSCR_FX)); // FPSCR.FX = 1; - Jit->OR(32, M(&FPSCR), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask; + Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX)); // FPSCR.FX = 1; + Jit->OR(32, PPCSTATE(fpscr), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask; Jit->SetJumpTarget(finish); } @@ -2094,7 +2094,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) RI.Jit->Cleanup(); // is it needed? Jit->ABI_CallFunction((void *)&PowerPC::OnIdleIL); - Jit->MOV(32, M(&PC), Imm32(ibuild->GetImmValue( getOp2(I) ))); + Jit->MOV(32, PPCSTATE(pc), Imm32(ibuild->GetImmValue( getOp2(I) ))); Jit->WriteExceptionExit(); Jit->SetJumpTarget(cont); @@ -2179,7 +2179,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); Jit->ABI_CallFunction((void *)&CoreTiming::Idle); - Jit->MOV(32, M(&PC), Imm32(InstLoc)); + Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc)); Jit->WriteExceptionExit(); break; } @@ -2187,15 +2187,15 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); Jit->LOCK(); - Jit->OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_SYSCALL)); - Jit->MOV(32, M(&PC), Imm32(InstLoc + 4)); + Jit->OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_SYSCALL)); + Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc + 4)); Jit->WriteExceptionExit(); break; } case InterpreterBranch: { - Jit->MOV(32, R(EAX), M(&NPC)); - Jit->WriteExitDestInOpArg(R(EAX)); + Jit->MOV(32, R(RSCRATCH), PPCSTATE(npc)); + Jit->WriteExitDestInOpArg(R(RSCRATCH)); break; } case RFIExit: @@ -2203,31 +2203,31 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) // See Interpreter rfi for details const u32 mask = 0x87C0FFFF; // MSR = (MSR & ~mask) | (SRR1 & mask); - Jit->MOV(32, R(EAX), M(&MSR)); - Jit->MOV(32, R(ECX), M(&SRR1)); - Jit->AND(32, R(EAX), Imm32(~mask)); - Jit->AND(32, R(ECX), Imm32(mask)); - Jit->OR(32, R(EAX), R(ECX)); + Jit->MOV(32, R(RSCRATCH), PPCSTATE(msr)); + Jit->MOV(32, R(RSCRATCH2), PPCSTATE_SRR1); + Jit->AND(32, R(RSCRATCH), Imm32(~mask)); + Jit->AND(32, R(RSCRATCH2), Imm32(mask)); + Jit->OR(32, R(RSCRATCH), R(RSCRATCH2)); // MSR &= 0xFFFBFFFF; // Mask used to clear the bit MSR[13] - Jit->AND(32, R(EAX), Imm32(0xFFFBFFFF)); - Jit->MOV(32, M(&MSR), R(EAX)); + Jit->AND(32, R(RSCRATCH), Imm32(0xFFFBFFFF)); + Jit->MOV(32, PPCSTATE(msr), R(RSCRATCH)); // NPC = SRR0; - Jit->MOV(32, R(EAX), M(&SRR0)); - Jit->WriteRfiExitDestInOpArg(R(EAX)); + Jit->MOV(32, R(RSCRATCH), PPCSTATE_SRR0); + Jit->WriteRfiExitDestInOpArg(R(RSCRATCH)); break; } case FPExceptionCheck: { unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); //This instruction uses FPU - needs to add FP exception bailout - Jit->TEST(32, M(&PowerPC::ppcState.msr), Imm32(1 << 13)); // Test FP enabled bit + Jit->TEST(32, PPCSTATE(msr), Imm32(1 << 13)); // Test FP enabled bit FixupBranch b1 = Jit->J_CC(CC_NZ); // If a FPU exception occurs, the exception handler will read // from PC. Update PC with the latest value in case that happens. - Jit->MOV(32, M(&PC), Imm32(InstLoc)); - Jit->SUB(32, M(&PowerPC::ppcState.downcount), Imm32(Jit->js.downcountAmount)); - Jit->OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); + Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc)); + Jit->SUB(32, PPCSTATE(downcount), Imm32(Jit->js.downcountAmount)); + Jit->OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); Jit->WriteExceptionExit(); Jit->SetJumpTarget(b1); break; @@ -2235,12 +2235,12 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) case DSIExceptionCheck: { unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); - Jit->TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_DSI)); + Jit->TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI)); FixupBranch noMemException = Jit->J_CC(CC_Z); // If a memory exception occurs, the exception handler will read // from PC. Update PC with the latest value in case that happens. - Jit->MOV(32, M(&PC), Imm32(InstLoc)); + Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc)); Jit->WriteExceptionExit(); Jit->SetJumpTarget(noMemException); break; @@ -2250,12 +2250,12 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); // Address of instruction could not be translated - Jit->MOV(32, M(&NPC), Imm32(InstLoc)); - Jit->OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_ISI)); + Jit->MOV(32, PPCSTATE(npc), Imm32(InstLoc)); + Jit->OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_ISI)); // Remove the invalid instruction from the icache, forcing a recompile - Jit->MOV(64, R(RAX), ImmPtr(jit->GetBlockCache()->GetICachePtr(InstLoc))); - Jit->MOV(32, MatR(RAX), Imm32(JIT_ICACHE_INVALID_WORD)); + Jit->MOV(64, R(RSCRATCH), ImmPtr(jit->GetBlockCache()->GetICachePtr(InstLoc))); + Jit->MOV(32, MatR(RSCRATCH), Imm32(JIT_ICACHE_INVALID_WORD)); Jit->WriteExceptionExit(); break; } @@ -2263,16 +2263,16 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); - Jit->TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_ISI | EXCEPTION_PROGRAM | EXCEPTION_SYSCALL | EXCEPTION_FPU_UNAVAILABLE | EXCEPTION_DSI | EXCEPTION_ALIGNMENT)); + Jit->TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_ISI | EXCEPTION_PROGRAM | EXCEPTION_SYSCALL | EXCEPTION_FPU_UNAVAILABLE | EXCEPTION_DSI | EXCEPTION_ALIGNMENT)); FixupBranch clearInt = Jit->J_CC(CC_NZ); - Jit->TEST(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_EXTERNAL_INT)); + Jit->TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_EXTERNAL_INT)); FixupBranch noExtException = Jit->J_CC(CC_Z); - Jit->TEST(32, M((void *)&PowerPC::ppcState.msr), Imm32(0x0008000)); + Jit->TEST(32, PPCSTATE(msr), Imm32(0x0008000)); FixupBranch noExtIntEnable = Jit->J_CC(CC_Z); Jit->TEST(32, M((void *)&ProcessorInterface::m_InterruptCause), Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH)); FixupBranch noCPInt = Jit->J_CC(CC_Z); - Jit->MOV(32, M(&PC), Imm32(InstLoc)); + Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc)); Jit->WriteExceptionExit(); Jit->SetJumpTarget(noCPInt); @@ -2285,7 +2285,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); - Jit->MOV(32, M(&PC), Imm32(InstLoc)); + Jit->MOV(32, PPCSTATE(pc), Imm32(InstLoc)); Jit->ABI_CallFunction(reinterpret_cast(&PowerPC::CheckBreakPoints)); Jit->TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); FixupBranch noBreakpoint = Jit->J_CC(CC_Z); diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp index 7b9cd785f2..8f1c36e1fa 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp @@ -313,15 +313,15 @@ void JitIL::WriteCallInterpreter(UGeckoInstruction inst) { if (js.isLastInstruction) { - MOV(32, M(&PC), Imm32(js.compilerPC)); - MOV(32, M(&NPC), Imm32(js.compilerPC + 4)); + MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); + MOV(32, PPCSTATE(npc), Imm32(js.compilerPC + 4)); } Interpreter::_interpreterInstruction instr = GetInterpreterOp(inst); ABI_CallFunctionC((void*)instr, inst.hex); if (js.isLastInstruction) { - MOV(32, R(EAX), M(&NPC)); - WriteRfiExitDestInOpArg(R(EAX)); + MOV(32, R(RSCRATCH), PPCSTATE(npc)); + WriteRfiExitDestInOpArg(R(RSCRATCH)); } } @@ -341,8 +341,8 @@ void JitIL::FallBackToInterpreter(UGeckoInstruction _inst) void JitIL::HLEFunction(UGeckoInstruction _inst) { ABI_CallFunctionCC((void*)&HLE::Execute, js.compilerPC, _inst.hex); - MOV(32, R(EAX), M(&NPC)); - WriteExitDestInOpArg(R(EAX)); + MOV(32, R(RSCRATCH), PPCSTATE(npc)); + WriteExitDestInOpArg(R(RSCRATCH)); } void JitIL::DoNothing(UGeckoInstruction _inst) @@ -398,7 +398,7 @@ void JitIL::WriteExit(u32 destination) { ABI_CallFunction((void *)JitILProfiler::End); } - SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); //If nobody has taken care of this yet (this can be removed when all branches are done) JitBlock *b = js.curBlock; @@ -417,7 +417,7 @@ void JitIL::WriteExit(u32 destination) } else { - MOV(32, M(&PC), Imm32(destination)); + MOV(32, PPCSTATE(pc), Imm32(destination)); JMP(asm_routines.dispatcher, true); } b->linkData.push_back(linkData); @@ -425,27 +425,27 @@ void JitIL::WriteExit(u32 destination) void JitIL::WriteExitDestInOpArg(const Gen::OpArg& arg) { - MOV(32, M(&PC), arg); + MOV(32, PPCSTATE(pc), arg); Cleanup(); if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling) { ABI_CallFunction((void *)JitILProfiler::End); } - SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } void JitIL::WriteRfiExitDestInOpArg(const Gen::OpArg& arg) { - MOV(32, M(&PC), arg); - MOV(32, M(&NPC), arg); + MOV(32, PPCSTATE(pc), arg); + MOV(32, PPCSTATE(npc), arg); Cleanup(); if (SConfig::GetInstance().m_LocalCoreStartupParameter.bJITILTimeProfiling) { ABI_CallFunction((void *)JitILProfiler::End); } ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExceptions)); - SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } @@ -456,10 +456,10 @@ void JitIL::WriteExceptionExit() { ABI_CallFunction((void *)JitILProfiler::End); } - MOV(32, R(EAX), M(&PC)); - MOV(32, M(&NPC), R(EAX)); + MOV(32, R(EAX), PPCSTATE(pc)); + MOV(32, PPCSTATE(npc), R(EAX)); ABI_CallFunction(reinterpret_cast(&PowerPC::CheckExceptions)); - SUB(32, M(&PowerPC::ppcState.downcount), Imm32(js.downcountAmount)); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); JMP(asm_routines.dispatcher, true); } @@ -548,7 +548,7 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc // Downcount flag check. The last block decremented downcounter, and the flag should still be available. FixupBranch skip = J_CC(CC_NBE); - MOV(32, M(&PC), Imm32(js.blockStart)); + MOV(32, PPCSTATE(pc), Imm32(js.blockStart)); JMP(asm_routines.doTiming, true); // downcount hit zero - go doTiming. SetJumpTarget(skip); @@ -561,13 +561,13 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc if (js.fpa.any) { // This block uses FPU - needs to add FP exception bailout - TEST(32, M(&PowerPC::ppcState.msr), Imm32(1 << 13)); //Test FP enabled bit + TEST(32, PPCSTATE(msr), Imm32(1 << 13)); //Test FP enabled bit FixupBranch b1 = J_CC(CC_NZ); // If a FPU exception occurs, the exception handler will read // from PC. Update PC with the latest value in case that happens. - MOV(32, M(&PC), Imm32(js.blockStart)); - OR(32, M((void *)&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); + MOV(32, PPCSTATE(pc), Imm32(js.blockStart)); + OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); WriteExceptionExit(); SetJumpTarget(b1); @@ -635,7 +635,7 @@ const u8* JitIL::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc HLEFunction(function); if (type == HLE::HLE_HOOK_REPLACE) { - MOV(32, R(EAX), M(&NPC)); + MOV(32, R(EAX), PPCSTATE(npc)); jit->js.downcountAmount += jit->js.st.numCycles; WriteExitDestInOpArg(R(EAX)); break; diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp index 7ab095bf36..d5cce9882e 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp @@ -9,8 +9,13 @@ #include "Core/PowerPC/JitCommon/JitAsmCommon.h" #include "Core/PowerPC/JitCommon/JitBase.h" -#define QUANTIZED_REGS_TO_SAVE (ABI_ALL_CALLER_SAVED & ~((1 << RAX) | (1 << RCX) | (1 << RDX) | \ - (1 << (XMM0+16)) | (1 << (XMM1+16)))) +#define QUANTIZED_REGS_TO_SAVE \ + (ABI_ALL_CALLER_SAVED & ~(\ + (1 << RSCRATCH) | \ + (1 << RSCRATCH2) | \ + (1 << RSCRATCH_EXTRA)| \ + (1 << (XMM0+16)) | \ + (1 << (XMM1+16)))) using namespace Gen; @@ -18,19 +23,15 @@ static int temp32; void CommonAsmRoutines::GenFifoWrite(int size) { - // Assume value in ABI_PARAM1 + // Assume value in RSCRATCH2 PUSH(ESI); - if (size != 32) - PUSH(EDX); - MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe)); + MOV(32, R(RSCRATCH), Imm32((u32)(u64)GPFifo::m_gatherPipe)); MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount)); - SwapAndStore(size, MComplex(RAX, RSI, 1, 0), ABI_PARAM1); + SwapAndStore(size, MComplex(RSCRATCH, ESI, 1, 0), RSCRATCH2); ADD(32, R(ESI), Imm8(size >> 3)); MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI)); - if (size != 32) - POP(EDX); POP(ESI); RET(); } @@ -39,15 +40,13 @@ void CommonAsmRoutines::GenFifoFloatWrite() { // Assume value in XMM0 PUSH(ESI); - PUSH(EDX); MOVSS(M(&temp32), XMM0); - MOV(32, R(EDX), M(&temp32)); - MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe)); + MOV(32, R(RSCRATCH2), M(&temp32)); + MOV(32, R(RSCRATCH), Imm32((u32)(u64)GPFifo::m_gatherPipe)); MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount)); - SwapAndStore(32, MComplex(RAX, RSI, 1, 0), EDX); + SwapAndStore(32, MComplex(RSCRATCH, RSI, 1, 0), RSCRATCH2); ADD(32, R(ESI), Imm8(4)); MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI)); - POP(EDX); POP(ESI); RET(); } @@ -55,58 +54,58 @@ void CommonAsmRoutines::GenFifoFloatWrite() void CommonAsmRoutines::GenFrsqrte() { // Assume input in XMM0. - // This function clobbers EAX, ECX, and EDX. - MOVQ_xmm(R(RAX), XMM0); + // This function clobbers all three RSCRATCH. + MOVQ_xmm(R(RSCRATCH), XMM0); // Negative and zero inputs set an exception and take the complex path. - TEST(64, R(RAX), R(RAX)); + TEST(64, R(RSCRATCH), R(RSCRATCH)); FixupBranch zero = J_CC(CC_Z, true); FixupBranch negative = J_CC(CC_S, true); - MOV(64, R(RCX), R(RAX)); - SHR(64, R(RCX), Imm8(52)); + MOV(64, R(RSCRATCH_EXTRA), R(RSCRATCH)); + SHR(64, R(RSCRATCH_EXTRA), Imm8(52)); // Zero and max exponents (non-normal floats) take the complex path. FixupBranch complex1 = J_CC(CC_Z, true); - CMP(32, R(ECX), Imm32(0x7FF)); + CMP(32, R(RSCRATCH_EXTRA), Imm32(0x7FF)); FixupBranch complex2 = J_CC(CC_E, true); - SUB(32, R(ECX), Imm32(0x3FD)); - SAR(32, R(ECX), Imm8(1)); - MOV(32, R(EDX), Imm32(0x3FF)); - SUB(32, R(EDX), R(ECX)); - SHL(64, R(RDX), Imm8(52)); // exponent = ((0x3FFLL << 52) - ((exponent - (0x3FELL << 52)) / 2)) & (0x7FFLL << 52); + SUB(32, R(RSCRATCH_EXTRA), Imm32(0x3FD)); + SAR(32, R(RSCRATCH_EXTRA), Imm8(1)); + MOV(32, R(RSCRATCH2), Imm32(0x3FF)); + SUB(32, R(RSCRATCH2), R(RSCRATCH_EXTRA)); + SHL(64, R(RSCRATCH2), Imm8(52)); // exponent = ((0x3FFLL << 52) - ((exponent - (0x3FELL << 52)) / 2)) & (0x7FFLL << 52); - MOV(64, R(RCX), R(RAX)); - SHR(64, R(RCX), Imm8(48)); - AND(32, R(ECX), Imm8(0x1F)); - XOR(32, R(ECX), Imm8(0x10)); // int index = i / 2048 + (odd_exponent ? 16 : 0); + MOV(64, R(RSCRATCH_EXTRA), R(RSCRATCH)); + SHR(64, R(RSCRATCH_EXTRA), Imm8(48)); + AND(32, R(RSCRATCH_EXTRA), Imm8(0x1F)); + XOR(32, R(RSCRATCH_EXTRA), Imm8(0x10)); // int index = i / 2048 + (odd_exponent ? 16 : 0); - SHR(64, R(RAX), Imm8(37)); - AND(32, R(EAX), Imm32(0x7FF)); - IMUL(32, EAX, MScaled(RCX, SCALE_4, (u32)(u64)MathUtil::frsqrte_expected_dec)); - MOV(32, R(ECX), MScaled(RCX, SCALE_4, (u32)(u64)MathUtil::frsqrte_expected_base)); - SUB(32, R(ECX), R(EAX)); - SHL(64, R(RCX), Imm8(26)); - OR(64, R(RDX), R(RCX)); // vali |= (s64)(frsqrte_expected_base[index] - frsqrte_expected_dec[index] * (i % 2048)) << 26; - MOVQ_xmm(XMM0, R(RDX)); + SHR(64, R(RSCRATCH), Imm8(37)); + AND(32, R(RSCRATCH), Imm32(0x7FF)); + IMUL(32, RSCRATCH, MScaled(RSCRATCH_EXTRA, SCALE_4, (u32)(u64)MathUtil::frsqrte_expected_dec)); + MOV(32, R(RSCRATCH_EXTRA), MScaled(RSCRATCH_EXTRA, SCALE_4, (u32)(u64)MathUtil::frsqrte_expected_base)); + SUB(32, R(RSCRATCH_EXTRA), R(RSCRATCH)); + SHL(64, R(RSCRATCH_EXTRA), Imm8(26)); + OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(frsqrte_expected_base[index] - frsqrte_expected_dec[index] * (i % 2048)) << 26; + MOVQ_xmm(XMM0, R(RSCRATCH2)); RET(); // Exception flags for zero input. SetJumpTarget(zero); - TEST(32, M(&FPSCR), Imm32(FPSCR_ZX)); + TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_ZX)); FixupBranch skip_set_fx1 = J_CC(CC_NZ); - OR(32, M(&FPSCR), Imm32(FPSCR_FX)); + OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX)); SetJumpTarget(skip_set_fx1); - OR(32, M(&FPSCR), Imm32(FPSCR_ZX)); + OR(32, PPCSTATE(fpscr), Imm32(FPSCR_ZX)); FixupBranch complex3 = J(); // Exception flags for negative input. SetJumpTarget(negative); - TEST(32, M(&FPSCR), Imm32(FPSCR_VXSQRT)); + TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_VXSQRT)); FixupBranch skip_set_fx2 = J_CC(CC_NZ); - OR(32, M(&FPSCR), Imm32(FPSCR_FX)); + OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX)); SetJumpTarget(skip_set_fx2); - OR(32, M(&FPSCR), Imm32(FPSCR_VXSQRT)); + OR(32, PPCSTATE(fpscr), Imm32(FPSCR_VXSQRT)); SetJumpTarget(complex1); SetJumpTarget(complex2); @@ -120,53 +119,53 @@ void CommonAsmRoutines::GenFrsqrte() void CommonAsmRoutines::GenFres() { // Assume input in XMM0. - // This function clobbers EAX, ECX, and EDX. - MOVQ_xmm(R(RAX), XMM0); + // This function clobbers all three RSCRATCH. + MOVQ_xmm(R(RSCRATCH), XMM0); // Zero inputs set an exception and take the complex path. - TEST(64, R(RAX), R(RAX)); + TEST(64, R(RSCRATCH), R(RSCRATCH)); FixupBranch zero = J_CC(CC_Z); - MOV(64, R(RCX), R(RAX)); - SHR(64, R(RCX), Imm8(52)); - MOV(32, R(EDX), R(ECX)); - AND(32, R(ECX), Imm32(0x7FF)); // exp - AND(32, R(EDX), Imm32(0x800)); // sign - CMP(32, R(ECX), Imm32(895)); + MOV(64, R(RSCRATCH_EXTRA), R(RSCRATCH)); + SHR(64, R(RSCRATCH_EXTRA), Imm8(52)); + MOV(32, R(RSCRATCH2), R(RSCRATCH_EXTRA)); + AND(32, R(RSCRATCH_EXTRA), Imm32(0x7FF)); // exp + AND(32, R(RSCRATCH2), Imm32(0x800)); // sign + CMP(32, R(RSCRATCH_EXTRA), Imm32(895)); // Take the complex path for very large/small exponents. FixupBranch complex1 = J_CC(CC_L); - CMP(32, R(ECX), Imm32(1149)); + CMP(32, R(RSCRATCH_EXTRA), Imm32(1149)); FixupBranch complex2 = J_CC(CC_GE); - SUB(32, R(ECX), Imm32(0x7FD)); - NEG(32, R(ECX)); - OR(32, R(ECX), R(EDX)); - SHL(64, R(RCX), Imm8(52)); // vali = sign | exponent + SUB(32, R(RSCRATCH_EXTRA), Imm32(0x7FD)); + NEG(32, R(RSCRATCH_EXTRA)); + OR(32, R(RSCRATCH_EXTRA), R(RSCRATCH2)); + SHL(64, R(RSCRATCH_EXTRA), Imm8(52)); // vali = sign | exponent - MOV(64, R(RDX), R(RAX)); - SHR(64, R(RAX), Imm8(37)); - SHR(64, R(RDX), Imm8(47)); - AND(32, R(EAX), Imm32(0x3FF)); // i % 1024 - AND(32, R(RDX), Imm8(0x1F)); // i / 1024 + MOV(64, R(RSCRATCH2), R(RSCRATCH)); + SHR(64, R(RSCRATCH), Imm8(37)); + SHR(64, R(RSCRATCH2), Imm8(47)); + AND(32, R(RSCRATCH), Imm32(0x3FF)); // i % 1024 + AND(32, R(RSCRATCH2), Imm8(0x1F)); // i / 1024 - IMUL(32, EAX, MScaled(RDX, SCALE_4, (u32)(u64)MathUtil::fres_expected_dec)); - ADD(32, R(EAX), Imm8(1)); - SHR(32, R(EAX), Imm8(1)); + IMUL(32, RSCRATCH, MScaled(RSCRATCH2, SCALE_4, (u32)(u64)MathUtil::fres_expected_dec)); + ADD(32, R(RSCRATCH), Imm8(1)); + SHR(32, R(RSCRATCH), Imm8(1)); - MOV(32, R(EDX), MScaled(RDX, SCALE_4, (u32)(u64)MathUtil::fres_expected_base)); - SUB(32, R(EDX), R(EAX)); - SHL(64, R(RDX), Imm8(29)); - OR(64, R(RDX), R(RCX)); // vali |= (s64)(fres_expected_base[i / 1024] - (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2) << 29 - MOVQ_xmm(XMM0, R(RDX)); + MOV(32, R(RSCRATCH2), MScaled(RSCRATCH2, SCALE_4, (u32)(u64)MathUtil::fres_expected_base)); + SUB(32, R(RSCRATCH2), R(RSCRATCH)); + SHL(64, R(RSCRATCH2), Imm8(29)); + OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(fres_expected_base[i / 1024] - (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2) << 29 + MOVQ_xmm(XMM0, R(RSCRATCH2)); RET(); // Exception flags for zero input. SetJumpTarget(zero); - TEST(32, M(&FPSCR), Imm32(FPSCR_ZX)); + TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_ZX)); FixupBranch skip_set_fx1 = J_CC(CC_NZ); - OR(32, M(&FPSCR), Imm32(FPSCR_FX)); + OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX)); SetJumpTarget(skip_set_fx1); - OR(32, M(&FPSCR), Imm32(FPSCR_ZX)); + OR(32, PPCSTATE(fpscr), Imm32(FPSCR_ZX)); SetJumpTarget(complex1); SetJumpTarget(complex2); @@ -253,21 +252,21 @@ void CommonAsmRoutines::GenQuantizedStores() SHUFPS(XMM0, R(XMM0), 1); MOVQ_xmm(M(&psTemp[0]), XMM0); - TEST(32, R(ECX), Imm32(0x0C000000)); + TEST(32, R(RSCRATCH_EXTRA), Imm32(0x0C000000)); FixupBranch too_complex = J_CC(CC_NZ, true); - MOV(64, R(RAX), M(&psTemp[0])); - SwapAndStore(64, MComplex(RBX, RCX, SCALE_1, 0), RAX); + MOV(64, R(RSCRATCH), M(&psTemp[0])); + SwapAndStore(64, MComplex(RMEM, RSCRATCH_EXTRA, SCALE_1, 0), RSCRATCH); FixupBranch skip_complex = J(true); SetJumpTarget(too_complex); ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true); - ABI_CallFunctionR((void *)&WriteDual32, RCX); + ABI_CallFunctionR((void *)&WriteDual32, RSCRATCH_EXTRA); ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true); SetJumpTarget(skip_complex); RET(); const u8* storePairedU8 = AlignCode4(); - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS)); PUNPCKLDQ(XMM1, R(XMM1)); MULPS(XMM0, R(XMM1)); #ifdef QUANTIZE_OVERFLOW_SAFE @@ -278,14 +277,14 @@ void CommonAsmRoutines::GenQuantizedStores() CVTTPS2DQ(XMM0, R(XMM0)); PACKSSDW(XMM0, R(XMM0)); PACKUSWB(XMM0, R(XMM0)); - MOVD_xmm(R(EAX), XMM0); - SafeWriteRegToReg(AX, ECX, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + MOVD_xmm(R(RSCRATCH), XMM0); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); RET(); const u8* storePairedS8 = AlignCode4(); - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS)); PUNPCKLDQ(XMM1, R(XMM1)); MULPS(XMM0, R(XMM1)); #ifdef QUANTIZE_OVERFLOW_SAFE @@ -296,15 +295,15 @@ void CommonAsmRoutines::GenQuantizedStores() CVTTPS2DQ(XMM0, R(XMM0)); PACKSSDW(XMM0, R(XMM0)); PACKSSWB(XMM0, R(XMM0)); - MOVD_xmm(R(EAX), XMM0); + MOVD_xmm(R(RSCRATCH), XMM0); - SafeWriteRegToReg(AX, ECX, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); RET(); const u8* storePairedU16 = AlignCode4(); - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS)); PUNPCKLDQ(XMM1, R(XMM1)); MULPS(XMM0, R(XMM1)); @@ -319,18 +318,18 @@ void CommonAsmRoutines::GenQuantizedStores() MOVQ_xmm(M(psTemp), XMM0); // place ps[0] into the higher word, ps[1] into the lower // so no need in ROL after BSWAP - MOVZX(32, 16, EAX, M((char*)psTemp + 0)); - SHL(32, R(EAX), Imm8(16)); - MOV(16, R(AX), M((char*)psTemp + 4)); + MOVZX(32, 16, RSCRATCH, M((char*)psTemp + 0)); + SHL(32, R(RSCRATCH), Imm8(16)); + MOV(16, R(RSCRATCH), M((char*)psTemp + 4)); - BSWAP(32, EAX); - SafeWriteRegToReg(EAX, ECX, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + BSWAP(32, RSCRATCH); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); RET(); const u8* storePairedS16 = AlignCode4(); - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS)); // SHUFPS or UNPCKLPS might be a better choice here. The last one might just be an alias though. PUNPCKLDQ(XMM1, R(XMM1)); MULPS(XMM0, R(XMM1)); @@ -341,10 +340,10 @@ void CommonAsmRoutines::GenQuantizedStores() #endif CVTTPS2DQ(XMM0, R(XMM0)); PACKSSDW(XMM0, R(XMM0)); - MOVD_xmm(R(EAX), XMM0); - BSWAP(32, EAX); - ROL(32, R(EAX), Imm8(16)); - SafeWriteRegToReg(EAX, ECX, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + MOVD_xmm(R(RSCRATCH), XMM0); + BSWAP(32, RSCRATCH); + ROL(32, R(RSCRATCH), Imm8(16)); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); RET(); @@ -369,7 +368,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() // Easy! const u8* storeSingleFloat = AlignCode4(); - SafeWriteF32ToReg(XMM0, ECX, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + SafeWriteF32ToReg(XMM0, RSCRATCH_EXTRA, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); RET(); /* if (cpu_info.bSSSE3) @@ -377,56 +376,56 @@ void CommonAsmRoutines::GenQuantizedSingleStores() PSHUFB(XMM0, M((void *)pbswapShuffle2x4)); // TODO: SafeWriteFloat MOVSS(M(&psTemp[0]), XMM0); - MOV(32, R(EAX), M(&psTemp[0])); - SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + MOV(32, R(RSCRATCH), M(&psTemp[0])); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 32, 0, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); } else { MOVSS(M(&psTemp[0]), XMM0); - MOV(32, R(EAX), M(&psTemp[0])); - SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + MOV(32, R(RSCRATCH), M(&psTemp[0])); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 32, 0, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); }*/ const u8* storeSingleU8 = AlignCode4(); // Used by MKWii - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS)); MULSS(XMM0, R(XMM1)); PXOR(XMM1, R(XMM1)); MAXSS(XMM0, R(XMM1)); MINSS(XMM0, M((void *)&m_255)); - CVTTSS2SI(EAX, R(XMM0)); - SafeWriteRegToReg(AL, ECX, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + CVTTSS2SI(RSCRATCH, R(XMM0)); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); RET(); const u8* storeSingleS8 = AlignCode4(); - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS)); MULSS(XMM0, R(XMM1)); MAXSS(XMM0, M((void *)&m_m128)); MINSS(XMM0, M((void *)&m_127)); - CVTTSS2SI(EAX, R(XMM0)); - SafeWriteRegToReg(AL, ECX, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + CVTTSS2SI(RSCRATCH, R(XMM0)); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); RET(); const u8* storeSingleU16 = AlignCode4(); // Used by MKWii - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS)); MULSS(XMM0, R(XMM1)); PXOR(XMM1, R(XMM1)); MAXSS(XMM0, R(XMM1)); MINSS(XMM0, M((void *)&m_65535)); - CVTTSS2SI(EAX, R(XMM0)); - SafeWriteRegToReg(EAX, ECX, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + CVTTSS2SI(RSCRATCH, R(XMM0)); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); RET(); const u8* storeSingleS16 = AlignCode4(); - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_quantizeTableS)); MULSS(XMM0, R(XMM1)); MAXSS(XMM0, M((void *)&m_m32768)); MINSS(XMM0, M((void *)&m_32767)); - CVTTSS2SI(EAX, R(XMM0)); - SafeWriteRegToReg(EAX, ECX, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + CVTTSS2SI(RSCRATCH, R(XMM0)); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); RET(); singleStoreQuantized = reinterpret_cast(const_cast(AlignCode16())); @@ -450,126 +449,126 @@ void CommonAsmRoutines::GenQuantizedLoads() const u8* loadPairedFloatTwo = AlignCode4(); if (cpu_info.bSSSE3) { - MOVQ_xmm(XMM0, MComplex(RBX, RCX, 1, 0)); + MOVQ_xmm(XMM0, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0)); PSHUFB(XMM0, M((void *)pbswapShuffle2x4)); } else { - LoadAndSwap(64, RCX, MComplex(RBX, RCX, 1, 0)); - ROL(64, R(RCX), Imm8(32)); - MOVQ_xmm(XMM0, R(RCX)); + LoadAndSwap(64, RSCRATCH_EXTRA, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0)); + ROL(64, R(RSCRATCH_EXTRA), Imm8(32)); + MOVQ_xmm(XMM0, R(RSCRATCH_EXTRA)); } RET(); const u8* loadPairedFloatOne = AlignCode4(); if (cpu_info.bSSSE3) { - MOVD_xmm(XMM0, MComplex(RBX, RCX, 1, 0)); + MOVD_xmm(XMM0, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0)); PSHUFB(XMM0, M((void *)pbswapShuffle1x4)); UNPCKLPS(XMM0, M((void*)m_one)); } else { - LoadAndSwap(32, RCX, MComplex(RBX, RCX, 1, 0)); - MOVD_xmm(XMM0, R(RCX)); + LoadAndSwap(32, RSCRATCH_EXTRA, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0)); + MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); UNPCKLPS(XMM0, M((void*)m_one)); } RET(); const u8* loadPairedU8Two = AlignCode4(); - UnsafeLoadRegToRegNoSwap(ECX, ECX, 16, 0); - MOVD_xmm(XMM0, R(ECX)); + UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0); + MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); PXOR(XMM1, R(XMM1)); PUNPCKLBW(XMM0, R(XMM1)); PUNPCKLWD(XMM0, R(XMM1)); CVTDQ2PS(XMM0, R(XMM0)); - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS)); PUNPCKLDQ(XMM1, R(XMM1)); MULPS(XMM0, R(XMM1)); RET(); const u8* loadPairedU8One = AlignCode4(); - UnsafeLoadRegToRegNoSwap(ECX, ECX, 8, 0); // ECX = 0x000000xx - MOVD_xmm(XMM0, R(ECX)); + UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 8, 0); // RSCRATCH_EXTRA = 0x000000xx + MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); CVTDQ2PS(XMM0, R(XMM0)); // Is CVTSI2SS better? - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS)); MULSS(XMM0, R(XMM1)); UNPCKLPS(XMM0, M((void*)m_one)); RET(); const u8* loadPairedS8Two = AlignCode4(); - UnsafeLoadRegToRegNoSwap(ECX, ECX, 16, 0); - MOVD_xmm(XMM0, R(ECX)); + UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0); + MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); PUNPCKLBW(XMM0, R(XMM0)); PUNPCKLWD(XMM0, R(XMM0)); PSRAD(XMM0, 24); CVTDQ2PS(XMM0, R(XMM0)); - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS)); PUNPCKLDQ(XMM1, R(XMM1)); MULPS(XMM0, R(XMM1)); RET(); const u8* loadPairedS8One = AlignCode4(); - UnsafeLoadRegToRegNoSwap(ECX, ECX, 8, 0); - SHL(32, R(ECX), Imm8(24)); - SAR(32, R(ECX), Imm8(24)); - MOVD_xmm(XMM0, R(ECX)); + UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 8, 0); + SHL(32, R(RSCRATCH_EXTRA), Imm8(24)); + SAR(32, R(RSCRATCH_EXTRA), Imm8(24)); + MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); CVTDQ2PS(XMM0, R(XMM0)); - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS)); MULSS(XMM0, R(XMM1)); UNPCKLPS(XMM0, M((void*)m_one)); RET(); const u8* loadPairedU16Two = AlignCode4(); - UnsafeLoadRegToReg(ECX, ECX, 32, 0, false); - ROL(32, R(ECX), Imm8(16)); - MOVD_xmm(XMM0, R(ECX)); + UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 32, 0, false); + ROL(32, R(RSCRATCH_EXTRA), Imm8(16)); + MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); PXOR(XMM1, R(XMM1)); PUNPCKLWD(XMM0, R(XMM1)); CVTDQ2PS(XMM0, R(XMM0)); - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS)); PUNPCKLDQ(XMM1, R(XMM1)); MULPS(XMM0, R(XMM1)); RET(); const u8* loadPairedU16One = AlignCode4(); - UnsafeLoadRegToReg(ECX, ECX, 32, 0, false); - SHR(32, R(ECX), Imm8(16)); - MOVD_xmm(XMM0, R(ECX)); + UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 32, 0, false); + SHR(32, R(RSCRATCH_EXTRA), Imm8(16)); + MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); CVTDQ2PS(XMM0, R(XMM0)); - SHR(32, R(EAX), Imm8(6)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS)); MULSS(XMM0, R(XMM1)); UNPCKLPS(XMM0, M((void*)m_one)); RET(); const u8* loadPairedS16Two = AlignCode4(); - UnsafeLoadRegToReg(ECX, ECX, 32, 0, false); - ROL(32, R(ECX), Imm8(16)); - MOVD_xmm(XMM0, R(ECX)); + UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 32, 0, false); + ROL(32, R(RSCRATCH_EXTRA), Imm8(16)); + MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); PUNPCKLWD(XMM0, R(XMM0)); PSRAD(XMM0, 16); CVTDQ2PS(XMM0, R(XMM0)); - SHR(32, R(EAX), Imm8(6)); - AND(32, R(EAX), Imm32(0xFC)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + AND(32, R(RSCRATCH), Imm32(0xFC)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS)); PUNPCKLDQ(XMM1, R(XMM1)); MULPS(XMM0, R(XMM1)); RET(); const u8* loadPairedS16One = AlignCode4(); - UnsafeLoadRegToReg(ECX, ECX, 32, 0, false); - SAR(32, R(ECX), Imm8(16)); - MOVD_xmm(XMM0, R(ECX)); + UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 32, 0, false); + SAR(32, R(RSCRATCH_EXTRA), Imm8(16)); + MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); CVTDQ2PS(XMM0, R(XMM0)); - SHR(32, R(EAX), Imm8(6)); - AND(32, R(EAX), Imm32(0xFC)); - MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS)); + SHR(32, R(RSCRATCH), Imm8(6)); + AND(32, R(RSCRATCH), Imm32(0xFC)); + MOVSS(XMM1, MDisp(RSCRATCH, (u32)(u64)m_dequantizeTableS)); MULSS(XMM0, R(XMM1)); UNPCKLPS(XMM0, M((void*)m_one)); RET(); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h index 1ae548bce1..2702db95e1 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h @@ -19,9 +19,9 @@ public: const u8 *dispatcher; const u8 *dispatcherNoCheck; - const u8 *dispatcherPcInEAX; + const u8 *dispatcherPcInRSCRATCH; - const u8 *dispatchPcInEAX; + const u8 *dispatchPcInRSCRATCH; const u8 *doTiming; const u8 *frsqrte; @@ -31,14 +31,14 @@ public: // In: ECX: Address to read from. // Out: XMM0: Bottom two 32-bit slots hold the read value, // converted to a pair of floats. - // Trashes: EAX ECX EDX + // Trashes: all three RSCRATCH const u8 **pairedLoadQuantized; // In: array index: GQR to use. // In: ECX: Address to write to. // In: XMM0: Bottom two 32-bit slots hold the pair of floats to be written. // Out: Nothing. - // Trashes: EAX ECX EDX + // Trashes: all three RSCRATCH const u8 **pairedStoreQuantized; // In: array index: GQR to use. diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp index ac7ed17986..c1a6436e62 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp @@ -59,6 +59,7 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re // It ought to be necessary to align the stack here. Since it seems to not // affect anybody, I'm not going to add it just to be completely safe about // performance. + ABI_PushRegistersAndAdjustStack(registersInUse, true); if (addrReg != ABI_PARAM1) MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg)); @@ -66,7 +67,6 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re if (info.displacement) ADD(32, R(ABI_PARAM1), Imm32(info.displacement)); - ABI_PushRegistersAndAdjustStack(registersInUse, true); switch (info.operandSize) { case 4: @@ -74,7 +74,7 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re break; case 2: CALL((void *)&Memory::Read_U16); - SHL(32, R(EAX), Imm8(16)); + SHL(32, R(ABI_RETURN), Imm8(16)); break; case 1: CALL((void *)&Memory::Read_U8); @@ -84,11 +84,11 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re if (info.signExtend && info.operandSize == 1) { // Need to sign extend value from Read_U8. - MOVSX(32, 8, dataReg, R(EAX)); + MOVSX(32, 8, dataReg, R(ABI_RETURN)); } else if (dataReg != EAX) { - MOV(32, R(dataReg), R(EAX)); + MOV(32, R(dataReg), R(ABI_RETURN)); } ABI_PopRegistersAndAdjustStack(registersInUse, true); @@ -113,31 +113,17 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r // check anyway. // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs - MOV(32, M(&PC), Imm32(pc)); + MOV(32, PPCSTATE(pc), Imm32(pc)); - if (dataReg == ABI_PARAM2) - PanicAlert("Incorrect use of SafeWriteRegToReg"); - if (addrReg != ABI_PARAM1) - { - if (ABI_PARAM1 != dataReg) - MOV(64, R(ABI_PARAM1), R((X64Reg)dataReg)); - if (ABI_PARAM2 != addrReg) - MOV(64, R(ABI_PARAM2), R((X64Reg)addrReg)); - } - else - { - if (ABI_PARAM2 != addrReg) - MOV(64, R(ABI_PARAM2), R((X64Reg)addrReg)); - if (ABI_PARAM1 != dataReg) - MOV(64, R(ABI_PARAM1), R((X64Reg)dataReg)); - } + ABI_PushRegistersAndAdjustStack(registersInUse, true); + + MOVTwo(64, ABI_PARAM1, dataReg, ABI_PARAM2, addrReg, ABI_PARAM3); if (info.displacement) { ADD(32, R(ABI_PARAM2), Imm32(info.displacement)); } - ABI_PushRegistersAndAdjustStack(registersInUse, true); switch (info.operandSize) { case 8: @@ -180,9 +166,9 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void) return nullptr; } - if (info.otherReg != RBX) + if (info.otherReg != RMEM) { - PanicAlert("BackPatch : Base reg not RBX." + PanicAlert("BackPatch : Base reg not RMEM." "\n\nAttempted to access %08x.", emAddress); return nullptr; } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index fa842679e7..95cd723d6d 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -27,6 +27,23 @@ #include "Core/PowerPC/JitCommon/JitBackpatch.h" #include "Core/PowerPC/JitCommon/JitCache.h" +// TODO: find a better place for x86-specific stuff +// The following register assignments are common to Jit64 and Jit64IL: +// RSCRATCH and RSCRATCH2 are always scratch registers and can be used without +// limitation. +#define RSCRATCH RAX +#define RSCRATCH2 RDX +// RSCRATCH_EXTRA may be in the allocation order, so it has to be flushed +// before use. +#define RSCRATCH_EXTRA RCX +// RMEM points to the start of emulated memory. +#define RMEM RBX +// RCODE_POINTERS does what it says. +#define RCODE_POINTERS R15 +// RPPCSTATE points to ppcState + 0x80. It's offset because we want to be able +// to address as much as possible in a one-byte offset form. +#define RPPCSTATE RBP + // Use these to control the instruction selection // #define INSTRUCTION_START FallBackToInterpreter(inst); return; // #define INSTRUCTION_START PPCTables::CountInstruction(inst); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index 2b927ba0d9..46c4be1715 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -370,6 +370,6 @@ using namespace Gen; void JitBlockCache::WriteDestroyBlock(const u8* location, u32 address) { XEmitter emit((u8 *)location); - emit.MOV(32, M(&PC), Imm32(address)); + emit.MOV(32, PPCSTATE(pc), Imm32(address)); emit.JMP(jit->GetAsmRoutines()->dispatcher, true); } diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp index 58340b072e..be43680e88 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp @@ -5,7 +5,6 @@ #include #include "Common/Common.h" -#include "Common/CPUDetect.h" #include "Common/MathUtil.h" #include "Core/HW/MMIO.h" @@ -42,7 +41,7 @@ void EmuCodeBlock::SwapAndStore(int size, const Gen::OpArg& dst, Gen::X64Reg src void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend) { - MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset)); + MOVZX(32, accessSize, reg_value, MComplex(RMEM, reg_addr, SCALE_1, offset)); if (accessSize == 32) { BSWAP(32, reg_value); @@ -64,7 +63,7 @@ void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int acc void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset) { - MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset)); + MOVZX(32, accessSize, reg_value, MComplex(RMEM, reg_addr, SCALE_1, offset)); } u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessSize, s32 offset, bool signExtend) @@ -86,16 +85,16 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessS offset = 0; } - memOperand = MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset); + memOperand = MComplex(RMEM, opAddress.GetSimpleReg(), SCALE_1, offset); } else if (opAddress.IsImm()) { - memOperand = MDisp(RBX, (opAddress.offset + offset) & 0x3FFFFFFF); + memOperand = MDisp(RMEM, (opAddress.offset + offset) & 0x3FFFFFFF); } else { MOV(32, R(reg_value), opAddress); - memOperand = MComplex(RBX, reg_value, SCALE_1, offset); + memOperand = MComplex(RMEM, reg_value, SCALE_1, offset); } result = GetWritableCodePtr(); @@ -130,7 +129,7 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessS return result; } -// Visitor that generates code to read a MMIO value to EAX. +// Visitor that generates code to read a MMIO value. template class MMIOReadCodeGenerator : public MMIO::ReadHandlingMethodVisitor { @@ -182,9 +181,9 @@ private: void LoadAddrMaskToReg(int sbits, const void* ptr, u32 mask) { #ifdef _ARCH_64 - m_code->MOV(64, R(EAX), ImmPtr(ptr)); + m_code->MOV(64, R(RSCRATCH), ImmPtr(ptr)); #else - m_code->MOV(32, R(EAX), ImmPtr(ptr)); + m_code->MOV(32, R(RSCRATCH), ImmPtr(ptr)); #endif // If we do not need to mask, we can do the sign extend while loading // from memory. If masking is required, we have to first zero extend, @@ -192,11 +191,11 @@ private: u32 all_ones = (1ULL << sbits) - 1; if ((all_ones & mask) == all_ones) { - MoveOpArgToReg(sbits, MDisp(EAX, 0)); + MoveOpArgToReg(sbits, MDisp(RSCRATCH, 0)); } else { - m_code->MOVZX(32, sbits, m_dst_reg, MDisp(EAX, 0)); + m_code->MOVZX(32, sbits, m_dst_reg, MDisp(RSCRATCH, 0)); m_code->AND(32, R(m_dst_reg), Imm32(mask)); if (m_sign_extend) m_code->MOVSX(32, sbits, m_dst_reg, R(m_dst_reg)); @@ -208,7 +207,7 @@ private: m_code->ABI_PushRegistersAndAdjustStack(m_registers_in_use, false); m_code->ABI_CallLambdaC(lambda, m_address); m_code->ABI_PopRegistersAndAdjustStack(m_registers_in_use, false); - MoveOpArgToReg(sbits, R(EAX)); + MoveOpArgToReg(sbits, R(ABI_RETURN)); } Gen::X64CodeBlock* m_code; @@ -248,13 +247,11 @@ void EmuCodeBlock::MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value, } } -// Always clobbers EAX. Preserves the address. -// Preserves the value if the load fails and js.memcheck is enabled. void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend, int flags) { if (!jit->js.memcheck) { - registersInUse &= ~(1 << RAX | 1 << reg_value); + registersInUse &= ~(1 << reg_value); } if (!Core::g_CoreStartupParameter.bMMU && Core::g_CoreStartupParameter.bFastmem && @@ -323,11 +320,11 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, if (signExtend && accessSize < 32) { // Need to sign extend values coming from the Read_U* functions. - MOVSX(32, accessSize, reg_value, R(EAX)); + MOVSX(32, accessSize, reg_value, R(ABI_RETURN)); } - else if (reg_value != EAX) + else if (reg_value != ABI_RETURN) { - MOVZX(64, accessSize, reg_value, R(EAX)); + MOVZX(64, accessSize, reg_value, R(ABI_RETURN)); } MEMCHECK_END @@ -338,15 +335,15 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, OpArg addr_loc = opAddress; if (offset) { - addr_loc = R(EAX); + addr_loc = R(RSCRATCH); if (opAddress.IsSimpleReg()) { - LEA(32, EAX, MDisp(opAddress.GetSimpleReg(), offset)); + LEA(32, RSCRATCH, MDisp(opAddress.GetSimpleReg(), offset)); } else { - MOV(32, R(EAX), opAddress); - ADD(32, R(EAX), Imm32(offset)); + MOV(32, R(RSCRATCH), opAddress); + ADD(32, R(RSCRATCH), Imm32(offset)); } } TEST(32, addr_loc, Imm32(mem_mask)); @@ -376,11 +373,11 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, if (signExtend && accessSize < 32) { // Need to sign extend values coming from the Read_U* functions. - MOVSX(32, accessSize, reg_value, R(EAX)); + MOVSX(32, accessSize, reg_value, R(ABI_RETURN)); } - else if (reg_value != EAX) + else if (reg_value != ABI_RETURN) { - MOVZX(64, accessSize, reg_value, R(EAX)); + MOVZX(64, accessSize, reg_value, R(ABI_RETURN)); } MEMCHECK_END @@ -395,13 +392,8 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap) { - if (accessSize == 8 && reg_value >= 4) - { - PanicAlert("WARNING: likely incorrect use of UnsafeWriteRegToReg!"); - } - u8* result = GetWritableCodePtr(); - OpArg dest = MComplex(RBX, reg_addr, SCALE_1, offset); + OpArg dest = MComplex(RMEM, reg_addr, SCALE_1, offset); if (swap) { if (cpu_info.bMOVBE) @@ -410,7 +402,8 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acc } else { - BSWAP(accessSize, reg_value); + if (accessSize > 8) + BSWAP(accessSize, reg_value); result = GetWritableCodePtr(); MOV(accessSize, dest, R(reg_value)); } @@ -423,10 +416,8 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acc return result; } -// Destroys both arg registers void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags) { - registersInUse &= ~(1 << RAX); if (!Core::g_CoreStartupParameter.bMMU && Core::g_CoreStartupParameter.bFastmem && !(flags & (SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_FASTMEM)) @@ -449,7 +440,17 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce } if (offset) - ADD(32, R(reg_addr), Imm32((u32)offset)); + { + if (flags & SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR) + { + LEA(32, RSCRATCH, MDisp(reg_addr, (u32)offset)); + reg_addr = RSCRATCH; + } + else + { + ADD(32, R(reg_addr), Imm32((u32)offset)); + } + } u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS; @@ -468,7 +469,7 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce TEST(32, R(reg_addr), Imm32(mem_mask)); FixupBranch fast = J_CC(CC_Z, true); // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs - MOV(32, M(&PC), Imm32(jit->js.compilerPC)); + MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); bool noProlog = (0 != (flags & SAFE_LOADSTORE_NO_PROLOG)); bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP); ABI_PushRegistersAndAdjustStack(registersInUse, noProlog); @@ -494,20 +495,20 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce SetJumpTarget(exit); } -// Destroys both arg registers and EAX +// Destroys the same as SafeWrite plus RSCRATCH. TODO: see if we can avoid temporaries here void EmuCodeBlock::SafeWriteF32ToReg(X64Reg xmm_value, X64Reg reg_addr, s32 offset, u32 registersInUse, int flags) { // TODO: PSHUFB might be faster if fastmem supported MOVSS. - MOVD_xmm(R(EAX), xmm_value); - SafeWriteRegToReg(EAX, reg_addr, 32, offset, registersInUse, flags); + MOVD_xmm(R(RSCRATCH), xmm_value); + SafeWriteRegToReg(RSCRATCH, reg_addr, 32, offset, registersInUse, flags); } void EmuCodeBlock::WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap) { if (swap) - SwapAndStore(accessSize, MDisp(RBX, address & 0x3FFFFFFF), arg); + SwapAndStore(accessSize, MDisp(RMEM, address & 0x3FFFFFFF), arg); else - MOV(accessSize, MDisp(RBX, address & 0x3FFFFFFF), R(arg)); + MOV(accessSize, MDisp(RMEM, address & 0x3FFFFFFF), R(arg)); } void EmuCodeBlock::ForceSinglePrecisionS(X64Reg xmm) @@ -584,20 +585,20 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src) // Grab Exponent PAND(XMM1, M((void *)&double_exponent)); PSRLQ(XMM1, 52); - MOVD_xmm(R(EAX), XMM1); + MOVD_xmm(R(RSCRATCH), XMM1); // Check if the double is in the range of valid single subnormal - CMP(16, R(EAX), Imm16(896)); + CMP(16, R(RSCRATCH), Imm16(896)); FixupBranch NoDenormalize = J_CC(CC_G); - CMP(16, R(EAX), Imm16(874)); + CMP(16, R(RSCRATCH), Imm16(874)); FixupBranch NoDenormalize2 = J_CC(CC_L); // Denormalise // shift = (905 - Exponent) plus the 21 bit double to single shift - MOV(16, R(EAX), Imm16(905 + 21)); - MOVD_xmm(XMM0, R(EAX)); + MOV(16, R(RSCRATCH), Imm16(905 + 21)); + MOVD_xmm(XMM0, R(RSCRATCH)); PSUBQ(XMM0, R(XMM1)); // xmm1 = fraction | 0x0010000000000000 @@ -648,12 +649,12 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src) // Changing the FPU mode is very expensive, so we can't do that. // Here, check to see if the exponent is small enough that it will result in a denormal, and pass it to the x87 unit // if it is. - MOVQ_xmm(R(RAX), src); - SHR(64, R(RAX), Imm8(55)); + MOVQ_xmm(R(RSCRATCH), src); + SHR(64, R(RSCRATCH), Imm8(55)); // Exponents 0x369 <= x <= 0x380 are denormal. This code accepts the range 0x368 <= x <= 0x387 // to save an instruction, since diverting a few more floats to the slow path can't hurt much. - SUB(8, R(AL), Imm8(0x6D)); - CMP(8, R(AL), Imm8(0x3)); + SUB(8, R(RSCRATCH), Imm8(0x6D)); + CMP(8, R(RSCRATCH), Imm8(0x3)); FixupBranch x87Conversion = J_CC(CC_BE); CVTSD2SS(dst, R(src)); FixupBranch continue1 = J(); @@ -674,7 +675,7 @@ void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr { // If the input isn't denormal, just do things the simple way -- otherwise, go through the x87 unit, which has // flush-to-zero off. - X64Reg gprsrc = src_is_gpr ? src : EAX; + X64Reg gprsrc = src_is_gpr ? src : RSCRATCH; if (src_is_gpr) { MOVD_xmm(dst, R(src)); @@ -683,7 +684,7 @@ void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr { if (dst != src) MOVAPD(dst, R(src)); - MOVD_xmm(EAX, R(src)); + MOVD_xmm(RSCRATCH, R(src)); } // A sneaky hack: floating-point zero is rather common and we don't want to confuse it for denormals and // needlessly send it through the slow path. If we subtract 1 before doing the comparison, it turns @@ -718,19 +719,19 @@ static const u64 GC_ALIGNED16(psDoubleNoSign[2]) = {0x7FFFFFFFFFFFFFFFULL, 0}; // quite that necessary. void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm) { - AND(32, M(&FPSCR), Imm32(~FPRF_MASK)); + AND(32, PPCSTATE(fpscr), Imm32(~FPRF_MASK)); FixupBranch continue1, continue2, continue3, continue4; if (cpu_info.bSSE4_1) { - MOVQ_xmm(R(RAX), xmm); - SHR(64, R(RAX), Imm8(63)); // Get the sign bit; almost all the branches need it. + MOVQ_xmm(R(RSCRATCH), xmm); + SHR(64, R(RSCRATCH), Imm8(63)); // Get the sign bit; almost all the branches need it. PTEST(xmm, M((void*)psDoubleExp)); FixupBranch maxExponent = J_CC(CC_C); FixupBranch zeroExponent = J_CC(CC_Z); // Nice normalized number: sign ? PPC_FPCLASS_NN : PPC_FPCLASS_PN; - LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_NN - MathUtil::PPC_FPCLASS_PN, MathUtil::PPC_FPCLASS_PN)); + LEA(32, RSCRATCH, MScaled(RSCRATCH, MathUtil::PPC_FPCLASS_NN - MathUtil::PPC_FPCLASS_PN, MathUtil::PPC_FPCLASS_PN)); continue1 = J(); SetJumpTarget(maxExponent); @@ -738,12 +739,12 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm) FixupBranch notNAN = J_CC(CC_Z); // Max exponent + mantissa: PPC_FPCLASS_QNAN - MOV(32, R(EAX), Imm32(MathUtil::PPC_FPCLASS_QNAN)); + MOV(32, R(RSCRATCH), Imm32(MathUtil::PPC_FPCLASS_QNAN)); continue2 = J(); // Max exponent + no mantissa: sign ? PPC_FPCLASS_NINF : PPC_FPCLASS_PINF; SetJumpTarget(notNAN); - LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_NINF - MathUtil::PPC_FPCLASS_PINF, MathUtil::PPC_FPCLASS_NINF)); + LEA(32, RSCRATCH, MScaled(RSCRATCH, MathUtil::PPC_FPCLASS_NINF - MathUtil::PPC_FPCLASS_PINF, MathUtil::PPC_FPCLASS_NINF)); continue3 = J(); SetJumpTarget(zeroExponent); @@ -751,72 +752,72 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm) FixupBranch zero = J_CC(CC_Z); // No exponent + mantissa: sign ? PPC_FPCLASS_ND : PPC_FPCLASS_PD; - LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_ND - MathUtil::PPC_FPCLASS_PD, MathUtil::PPC_FPCLASS_ND)); + LEA(32, RSCRATCH, MScaled(RSCRATCH, MathUtil::PPC_FPCLASS_ND - MathUtil::PPC_FPCLASS_PD, MathUtil::PPC_FPCLASS_ND)); continue4 = J(); // Zero: sign ? PPC_FPCLASS_NZ : PPC_FPCLASS_PZ; SetJumpTarget(zero); - SHL(32, R(EAX), Imm8(4)); - ADD(32, R(EAX), Imm8(MathUtil::PPC_FPCLASS_PZ)); + SHL(32, R(RSCRATCH), Imm8(4)); + ADD(32, R(RSCRATCH), Imm8(MathUtil::PPC_FPCLASS_PZ)); } else { - MOVQ_xmm(R(RAX), xmm); - TEST(64, R(RAX), M((void*)psDoubleExp)); + MOVQ_xmm(R(RSCRATCH), xmm); + TEST(64, R(RSCRATCH), M((void*)psDoubleExp)); FixupBranch zeroExponent = J_CC(CC_Z); - AND(64, R(RAX), M((void*)psDoubleNoSign)); - CMP(64, R(RAX), M((void*)psDoubleExp)); - FixupBranch nan = J_CC(CC_G); // This works because if the sign bit is set, RAX is negative + AND(64, R(RSCRATCH), M((void*)psDoubleNoSign)); + CMP(64, R(RSCRATCH), M((void*)psDoubleExp)); + FixupBranch nan = J_CC(CC_G); // This works because if the sign bit is set, RSCRATCH is negative FixupBranch infinity = J_CC(CC_E); - MOVQ_xmm(R(RAX), xmm); - SHR(64, R(RAX), Imm8(63)); - LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_NN - MathUtil::PPC_FPCLASS_PN, MathUtil::PPC_FPCLASS_PN)); + MOVQ_xmm(R(RSCRATCH), xmm); + SHR(64, R(RSCRATCH), Imm8(63)); + LEA(32, RSCRATCH, MScaled(RSCRATCH, MathUtil::PPC_FPCLASS_NN - MathUtil::PPC_FPCLASS_PN, MathUtil::PPC_FPCLASS_PN)); continue1 = J(); SetJumpTarget(nan); - MOVQ_xmm(R(RAX), xmm); - SHR(64, R(RAX), Imm8(63)); - MOV(32, R(EAX), Imm32(MathUtil::PPC_FPCLASS_QNAN)); + MOVQ_xmm(R(RSCRATCH), xmm); + SHR(64, R(RSCRATCH), Imm8(63)); + MOV(32, R(RSCRATCH), Imm32(MathUtil::PPC_FPCLASS_QNAN)); continue2 = J(); SetJumpTarget(infinity); - MOVQ_xmm(R(RAX), xmm); - SHR(64, R(RAX), Imm8(63)); - LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_NINF - MathUtil::PPC_FPCLASS_PINF, MathUtil::PPC_FPCLASS_NINF)); + MOVQ_xmm(R(RSCRATCH), xmm); + SHR(64, R(RSCRATCH), Imm8(63)); + LEA(32, RSCRATCH, MScaled(RSCRATCH, MathUtil::PPC_FPCLASS_NINF - MathUtil::PPC_FPCLASS_PINF, MathUtil::PPC_FPCLASS_NINF)); continue3 = J(); SetJumpTarget(zeroExponent); - TEST(64, R(RAX), R(RAX)); + TEST(64, R(RSCRATCH), R(RSCRATCH)); FixupBranch zero = J_CC(CC_Z); - SHR(64, R(RAX), Imm8(63)); - LEA(32, EAX, MScaled(EAX, MathUtil::PPC_FPCLASS_ND - MathUtil::PPC_FPCLASS_PD, MathUtil::PPC_FPCLASS_ND)); + SHR(64, R(RSCRATCH), Imm8(63)); + LEA(32, RSCRATCH, MScaled(RSCRATCH, MathUtil::PPC_FPCLASS_ND - MathUtil::PPC_FPCLASS_PD, MathUtil::PPC_FPCLASS_ND)); continue4 = J(); SetJumpTarget(zero); - SHR(64, R(RAX), Imm8(63)); - SHL(32, R(EAX), Imm8(4)); - ADD(32, R(EAX), Imm8(MathUtil::PPC_FPCLASS_PZ)); + SHR(64, R(RSCRATCH), Imm8(63)); + SHL(32, R(RSCRATCH), Imm8(4)); + ADD(32, R(RSCRATCH), Imm8(MathUtil::PPC_FPCLASS_PZ)); } SetJumpTarget(continue1); SetJumpTarget(continue2); SetJumpTarget(continue3); SetJumpTarget(continue4); - SHL(32, R(EAX), Imm8(FPRF_SHIFT)); - OR(32, M(&FPSCR), R(EAX)); + SHL(32, R(RSCRATCH), Imm8(FPRF_SHIFT)); + OR(32, PPCSTATE(fpscr), R(RSCRATCH)); } void EmuCodeBlock::JitClearCA() { - AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0 + AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0 } void EmuCodeBlock::JitSetCA() { - OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1 + OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1 } void EmuCodeBlock::JitClearCAOV(bool oe) { if (oe) - AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK & ~XER_OV_MASK)); //XER.CA, XER.OV = 0 + AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_CA_MASK & ~XER_OV_MASK)); //XER.CA, XER.OV = 0 else - AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0 + AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0 } diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h index addce16e93..73eb9ebfe8 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h @@ -6,6 +6,7 @@ #include +#include "Common/CPUDetect.h" #include "Common/x64Emitter.h" namespace MMIO { class Mapping; } @@ -13,13 +14,23 @@ namespace MMIO { class Mapping; } #define MEMCHECK_START \ Gen::FixupBranch memException; \ if (jit->js.memcheck) \ - { TEST(32, Gen::M((void *)&PowerPC::ppcState.Exceptions), Gen::Imm32(EXCEPTION_DSI)); \ + { TEST(32, PPCSTATE(Exceptions), Gen::Imm32(EXCEPTION_DSI)); \ memException = J_CC(Gen::CC_NZ, true); } #define MEMCHECK_END \ if (jit->js.memcheck) \ SetJumpTarget(memException); +// We offset by 0x80 because the range of one byte memory offsets is +// -0x80..0x7f. +#define PPCSTATE(x) MDisp(RPPCSTATE, \ + (int) ((char *) &PowerPC::ppcState.x - (char *) &PowerPC::ppcState) - 0x80) +// In case you want to disable the ppcstate register: +// #define PPCSTATE(x) M((void*) &PowerPC::ppcState.x) +#define PPCSTATE_LR PPCSTATE(spr[SPR_LR]) +#define PPCSTATE_CTR PPCSTATE(spr[SPR_CTR]) +#define PPCSTATE_SRR0 PPCSTATE(spr[SPR_SRR0]) +#define PPCSTATE_SRR1 PPCSTATE(spr[SPR_SRR1]) // Like XCodeBlock but has some utilities for memory access. class EmuCodeBlock : public Gen::X64CodeBlock @@ -42,11 +53,21 @@ public: { SAFE_LOADSTORE_NO_SWAP = 1, SAFE_LOADSTORE_NO_PROLOG = 2, - SAFE_LOADSTORE_NO_FASTMEM = 4 + SAFE_LOADSTORE_NO_FASTMEM = 4, + SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR = 8 }; + void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend, int flags = 0); + // Clobbers RSCRATCH or reg_addr depending on the relevant flag. Preserves + // reg_value if the load fails and js.memcheck is enabled. void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags = 0); + // applies to safe and unsafe WriteRegToReg + bool WriteClobbersRegValue(int accessSize, bool swap) + { + return swap && !cpu_info.bMOVBE && accessSize > 8; + } + void SafeWriteF32ToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, s32 offset, u32 registersInUse, int flags = 0); void WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap = false); @@ -58,9 +79,8 @@ public: void ForceSinglePrecisionP(Gen::X64Reg xmm); void Force25BitPrecision(Gen::X64Reg xmm, Gen::X64Reg tmp); - // EAX might get trashed + // RSCRATCH might get trashed void ConvertSingleToDouble(Gen::X64Reg dst, Gen::X64Reg src, bool src_is_gpr = false); - // EAX might get trashed void ConvertDoubleToSingle(Gen::X64Reg dst, Gen::X64Reg src); void SetFPRF(Gen::X64Reg xmm); protected: diff --git a/Source/Core/Core/PowerPC/JitILCommon/IR.cpp b/Source/Core/Core/PowerPC/JitILCommon/IR.cpp index f078a4cac9..c07e1a1216 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/IR.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/IR.cpp @@ -40,7 +40,7 @@ instruction and generates code. Dead code elimination works in this step, by simply skipping unused instructions. The register allocator is a dumb, greedy allocator: at the moment, it's really a bit too dumb, but it's actually not as bad as it looks: unless a block is relatively long, spills -are rarely needed. ECX is used as a scratch register: requiring a scratch +are rarely needed. EDX is used as a scratch register: requiring a scratch register isn't ideal, but the register allocator is too dumb to handle instructions that need a specific register at the moment. diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Integer.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Integer.cpp index 10449eebb9..c5fcfb1256 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Integer.cpp @@ -321,7 +321,7 @@ void JitILBase::divwux(UGeckoInstruction inst) #if 0 int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.FlushLockX(EDX); + gpr.FlushLockX(RSCRATCH1); gpr.Lock(a, b, d); if (d != a && d != b) @@ -333,11 +333,11 @@ void JitILBase::divwux(UGeckoInstruction inst) gpr.LoadToX64(d, true, true); } - MOV(32, R(EAX), gpr.R(a)); - XOR(32, R(EDX), R(EDX)); + MOV(32, R(RSCRATCH), gpr.R(a)); + XOR(32, R(RSCRATCH2), R(RSCRATCH)); gpr.KillImmediate(b); DIV(32, gpr.R(b)); - MOV(32, gpr.R(d), R(EAX)); + MOV(32, gpr.R(d), R(RSCRATCH)); gpr.UnlockAll(); gpr.UnlockAllX(); diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStore.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStore.cpp index 3801ac7ba7..f889181dc9 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_LoadStore.cpp @@ -137,19 +137,13 @@ void JitILBase::dcbz(UGeckoInstruction inst) return; } INSTRUCTION_START; - MOV(32, R(EAX), gpr.R(inst.RB)); + MOV(32, R(RSCRATCH), gpr.R(inst.RB)); if (inst.RA) - ADD(32, R(EAX), gpr.R(inst.RA)); - AND(32, R(EAX), Imm32(~31)); + ADD(32, R(RSCRATCH), gpr.R(inst.RA)); + AND(32, R(RSCRATCH), Imm32(~31)); PXOR(XMM0, R(XMM0)); -#if _M_X86_64 - MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0); - MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0); -#else - AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); - MOVAPS(MDisp(EAX, (u32)Memory::base), XMM0); - MOVAPS(MDisp(EAX, (u32)Memory::base + 16), XMM0); -#endif + MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 0), XMM0); + MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 16), XMM0); #endif } diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index 7dd59f1573..26e4aa75c4 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -4,6 +4,8 @@ #pragma once +#include + #include "Common/BreakPoints.h" #include "Common/Common.h" @@ -30,11 +32,6 @@ struct GC_ALIGNED64(PowerPCState) { u32 gpr[32]; // General purpose registers. r1 = stack pointer. - // The paired singles are strange : PS0 is stored in the full 64 bits of each FPR - // but ps calculations are only done in 32-bit precision, and PS1 is only 32 bits. - // Since we want to use SIMD, SSE2 is the only viable alternative - 2x double. - u64 ps[32][2]; - u32 pc; // program counter u32 npc; @@ -64,6 +61,20 @@ struct GC_ALIGNED64(PowerPCState) // This variable should be inside of the CoreTiming namespace if we wanted to be correct. int downcount; +#if _M_X86_64 + // This member exists for the purpose of an assertion in x86 JitBase.cpp + // that its offset <= 0x100. To minimize code size on x86, we want as much + // useful stuff in the one-byte offset range as possible - which is why ps + // is sitting down here. It currently doesn't make a difference on other + // supported architectures. + std::tuple<> above_fits_in_first_0x100; +#endif + + // The paired singles are strange : PS0 is stored in the full 64 bits of each FPR + // but ps calculations are only done in 32-bit precision, and PS1 is only 32 bits. + // Since we want to use SIMD, SSE2 is the only viable alternative - 2x double. + GC_ALIGNED16(u64 ps[32][2]); + u32 sr[16]; // Segment registers. // special purpose registers - controls quantizers, DMA, and lots of other misc extensions. @@ -84,6 +95,10 @@ struct GC_ALIGNED64(PowerPCState) InstructionCache iCache; }; +#if _M_X86_64 +static_assert(offsetof(PowerPC::PowerPCState, above_fits_in_first_0x100) <= 0x100, "top of PowerPCState too big"); +#endif + enum CPUState { CPU_RUNNING = 0,