diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp index bf109005ed..9271532cb5 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp @@ -81,12 +81,10 @@ edge over the current JIT mostly due to the fast memory optimization. TODO (in no particular order): Floating-point JIT (both paired and unpaired): currently falls back to the interpreter -Improve register allocator to deal with long live intervals. Optimize conditions for conditional branches. Inter-block dead register elimination, especially for CR0. Inter-block inlining. Track down a few correctness bugs. -Known zero bits: eliminate unneeded AND instructions for rlwinm/rlwimi Implement a select instruction 64-bit compat (it should only be a few tweaks to register allocation and the load/store code) @@ -95,7 +93,7 @@ Scheduling to reduce register pressure: PowerPC compilers like to push x86 processors, which are short on registers and extremely good at instruction reordering. Common subexpression elimination -Optimize load of sum using complex addressing +Optimize load of sum using complex addressing (partially implemented) Implement idle-skipping */ @@ -180,6 +178,40 @@ InstLoc IRBuilder::EmitTriOp(unsigned Opcode, InstLoc Op1, InstLoc Op2, } #endif +unsigned IRBuilder::ComputeKnownZeroBits(InstLoc I) { + switch (getOpcode(*I)) { + case Load8: + return 0xFFFFFF00; + case Or: + return ComputeKnownZeroBits(getOp1(I)) & + ComputeKnownZeroBits(getOp2(I)); + case And: + return ComputeKnownZeroBits(getOp1(I)) | + ComputeKnownZeroBits(getOp2(I)); + case Shl: + if (isImm(*getOp2(I))) { + unsigned samt = GetImmValue(getOp2(I)) & 31; + return (ComputeKnownZeroBits(getOp1(I)) << samt) | + ~(-1U << samt); + } + return 0; + case Shrl: + if (isImm(*getOp2(I))) { + unsigned samt = GetImmValue(getOp2(I)) & 31; + return (ComputeKnownZeroBits(getOp1(I)) >> samt) | + ~(-1U >> samt); + } + return 0; + case Rol: + if (isImm(*getOp2(I))) { + return _rotl(ComputeKnownZeroBits(getOp1(I)), + GetImmValue(getOp2(I))); + } + default: + return 0; + } +} + InstLoc IRBuilder::FoldZeroOp(unsigned Opcode, unsigned extra) { if (Opcode == LoadGReg) { // Reg load folding: if we already loaded the value, @@ -275,6 +307,9 @@ InstLoc IRBuilder::FoldAnd(InstLoc Op1, InstLoc Op2) { return FoldShrl(getOp1(Op1), EmitIntConst(shiftAmt2)); } } + if (!(~ComputeKnownZeroBits(Op1) & ~GetImmValue(Op2))) { + return Op1; + } } if (Op1 == Op2) return Op1; @@ -348,6 +383,35 @@ InstLoc IRBuilder::FoldRol(InstLoc Op1, InstLoc Op2) { return EmitBiOp(Rol, Op1, Op2); } +InstLoc IRBuilder::FoldBranchCond(InstLoc Op1, InstLoc Op2) { + if (getOpcode(*Op1) == And && + isImm(*getOp2(Op1)) && + getOpcode(*getOp1(Op1)) == ICmpCRSigned) { + unsigned branchValue = GetImmValue(getOp2(Op1)); + if (branchValue == 2) + return FoldBranchCond(EmitICmpEq(getOp1(getOp1(Op1)), + getOp2(getOp1(Op1))), Op2); + } + if (getOpcode(*Op1) == Xor && + isImm(*getOp2(Op1))) { + InstLoc XOp1 = getOp1(Op1); + unsigned branchValue = GetImmValue(getOp2(Op1)); + if (getOpcode(*XOp1) == And && + isImm(*getOp2(XOp1)) && + getOpcode(*getOp1(XOp1)) == ICmpCRSigned) { + unsigned innerBranchValue = + GetImmValue(getOp2(XOp1)); + if (branchValue == innerBranchValue) { + if (branchValue == 4) { + return FoldBranchCond(EmitICmpSle(getOp1(getOp1(XOp1)), + getOp2(getOp1(XOp1))), Op2); + } + } + } + } + return EmitBiOp(BranchCond, Op1, Op2); +} + InstLoc IRBuilder::FoldInterpreterFallback(InstLoc Op1, InstLoc Op2) { for (unsigned i = 0; i < 32; i++) { GRegCache[i] = 0; @@ -371,6 +435,7 @@ InstLoc IRBuilder::FoldBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2) { case Shl: return FoldShl(Op1, Op2); case Shrl: return FoldShrl(Op1, Op2); case Rol: return FoldRol(Op1, Op2); + case BranchCond: return FoldBranchCond(Op1, Op2); case InterpreterFallback: return FoldInterpreterFallback(Op1, Op2); default: return EmitBiOp(Opcode, Op1, Op2); } @@ -473,9 +538,12 @@ static X64Reg regFindFreeReg(RegInfo& RI) { if (RI.regs[EBX] == 0) return EBX; if (RI.regs[EDX] == 0) return EDX; if (RI.regs[EAX] == 0) return EAX; - // ECX is scratch; never allocate it! - regSpill(RI, EDI); - return EDI; + // ECX is scratch, so we don't allocate it + static X64Reg regs[] = {EDI, ESI, EBP, EBX, EDX, EAX}; + static unsigned nextReg = 0; + X64Reg reg = regs[nextReg++ % 6]; + regSpill(RI, reg); + return reg; } static OpArg regLocForInst(RegInfo& RI, InstLoc I) { @@ -532,6 +600,15 @@ static void regSpillCallerSaved(RegInfo& RI) { regSpill(RI, EAX); } +static X64Reg regUReg(RegInfo& RI, InstLoc I) { + if (RI.IInfo[I - RI.FirstI] & 4 && + regLocForInst(RI, getOp1(I)).IsSimpleReg()) { + return regLocForInst(RI, getOp1(I)).GetSimpleReg(); + } + X64Reg reg = regFindFreeReg(RI); + return reg; +} + static X64Reg regBinLHSReg(RegInfo& RI, InstLoc I) { if (RI.IInfo[I - RI.FirstI] & 4) { return regEnsureInReg(RI, getOp1(I)); @@ -559,12 +636,25 @@ static void regEmitBinInst(RegInfo& RI, InstLoc I, } static void regEmitMemLoad(RegInfo& RI, InstLoc I, unsigned Size) { - X64Reg reg = regBinLHSReg(RI, I); + X64Reg reg; + unsigned offset; + + if (getOpcode(*getOp1(I)) == Add && isImm(*getOp2(getOp1(I)))) { + offset = RI.Build->GetImmValue(getOp2(getOp1(I))); + reg = regBinLHSReg(RI, getOp1(I)); + if (RI.IInfo[I - RI.FirstI] & 4) + regClearInst(RI, getOp1(getOp1(I))); + } else { + offset = 0; + reg = regBinLHSReg(RI, I); + if (RI.IInfo[I - RI.FirstI] & 4) + regClearInst(RI, getOp1(I)); + } if (RI.UseProfile) { unsigned curLoad = ProfiledLoads[RI.numProfiledLoads++]; if (!(curLoad & 0x0C000000)) { if (regReadUse(RI, I)) { - unsigned addr = (u32)Memory::base - (curLoad & 0xC0000000); + unsigned addr = (u32)Memory::base - (curLoad & 0xC0000000) + offset; RI.Jit->MOVZX(32, Size, reg, MDisp(reg, addr)); RI.Jit->BSWAP(Size, reg); RI.regs[reg] = I; @@ -572,6 +662,9 @@ static void regEmitMemLoad(RegInfo& RI, InstLoc I, unsigned Size) { return; } } + if (offset) { + RI.Jit->ADD(32, R(reg), Imm32(offset)); + } if (RI.MakeProfile) { RI.Jit->MOV(32, M(&ProfiledLoads[RI.numProfiledLoads++]), R(reg)); } @@ -638,7 +731,6 @@ static void regEmitShiftInst(RegInfo& RI, InstLoc I, RI.regs[reg] = I; return; } - // FIXME: prevent regBinLHSReg from finding ecx! RI.Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I))); (RI.Jit->*op)(32, R(reg), R(ECX)); RI.regs[reg] = I; @@ -695,10 +787,8 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) { RI.Build = ibuild; RI.UseProfile = UseProfile; RI.MakeProfile = !RI.UseProfile; + unsigned bs = Jit->js.blockStart; // Pass to compute liveness - // Note that despite this marking, we never materialize immediates; - // on x86, they almost always fold into the instruction, and it's at - // best a code-size reduction in the cases where they don't. ibuild->StartBackPass(); for (unsigned index = RI.IInfo.size() - 1; index != -1U; --index) { InstLoc I = ibuild->ReadBackward(); @@ -719,6 +809,9 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) { case BlockEnd: case BlockStart: case InterpreterFallback: + case SystemCall: + case RFIExit: + case InterpreterBranch: // No liveness effects break; case Tramp: @@ -732,13 +825,18 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) { if (thisUsed) regMarkUse(RI, I, getOp1(I), 1); break; + case StoreCR: + case StoreCarry: case Load8: case Load16: case Load32: + if (getOpcode(*getOp1(I)) == Add && + isImm(*getOp2(getOp1(I)))) { + regMarkUse(RI, I, getOp1(getOp1(I)), 1); + break; + } case StoreGReg: - case StoreCR: case StoreLink: - case StoreCarry: case StoreCTR: case StoreMSR: regMarkUse(RI, I, getOp1(I), 1); @@ -757,6 +855,8 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) { case ICmpCRSigned: case ICmpEq: case ICmpUgt: + case ICmpSle: + case ICmpSgt: if (thisUsed) { regMarkUse(RI, I, getOp1(I), 1); if (!isImm(*getOp2(I))) @@ -773,12 +873,20 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) { if (!isImm(*getOp1(I))) regMarkUse(RI, I, getOp1(I), 1); break; - case BranchCond: - regMarkUse(RI, I, getOp1(I), 1); + case BranchCond: { + unsigned CondOpcode = getOpcode(*getOp1(I)); + if ((CondOpcode == ICmpEq || + CondOpcode == ICmpSle) && + isImm(*getOp2(getOp1(I)))) { + regMarkUse(RI, I, getOp1(getOp1(I)), 1); + } else { + regMarkUse(RI, I, getOp1(I), 1); + } if (!isImm(*getOp2(I))) regMarkUse(RI, I, getOp2(I), 2); break; } + } } ibuild->StartForwardPass(); @@ -902,7 +1010,7 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) { } case SExt16: { if (!thisUsed) break; - X64Reg reg = regFindFreeReg(RI); + X64Reg reg = regUReg(RI, I); Jit->MOVSX(32, 16, reg, regLocForInst(RI, getOp1(I))); RI.regs[reg] = I; break; @@ -987,6 +1095,15 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) { RI.regs[reg] = I; break; } + case ICmpSle: { + if (!thisUsed) break; + regEmitCmp(RI, I); + Jit->SETcc(CC_LE, R(ECX)); // Caution: SETCC uses 8-bit regs! + X64Reg reg = regFindFreeReg(RI); + Jit->MOVZX(32, 8, reg, R(ECX)); + RI.regs[reg] = I; + break; + } case ICmpCRUnsigned: { if (!thisUsed) break; regEmitCmp(RI, I); @@ -1035,16 +1152,72 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) { case BlockEnd: break; case BranchCond: { - Jit->CMP(32, regLocForInst(RI, getOp1(I)), Imm8(0)); - FixupBranch cont = Jit->J_CC(CC_NZ); - regWriteExit(RI, getOp2(I)); - Jit->SetJumpTarget(cont); + if (getOpcode(*getOp1(I)) == ICmpEq && + isImm(*getOp2(getOp1(I)))) { + Jit->CMP(32, regLocForInst(RI, getOp1(getOp1(I))), + Imm32(RI.Build->GetImmValue(getOp2(getOp1(I))))); + FixupBranch cont = Jit->J_CC(CC_Z); + regWriteExit(RI, getOp2(I)); + Jit->SetJumpTarget(cont); + if (RI.IInfo[I - RI.FirstI] & 4) + regClearInst(RI, getOp1(getOp1(I))); + } else if (getOpcode(*getOp1(I)) == ICmpSle && + isImm(*getOp2(getOp1(I)))) { + Jit->CMP(32, regLocForInst(RI, getOp1(getOp1(I))), + Imm32(RI.Build->GetImmValue(getOp2(getOp1(I))))); + FixupBranch cont = Jit->J_CC(CC_LE); + regWriteExit(RI, getOp2(I)); + Jit->SetJumpTarget(cont); + if (RI.IInfo[I - RI.FirstI] & 4) + regClearInst(RI, getOp1(getOp1(I))); + } else { + Jit->CMP(32, regLocForInst(RI, getOp1(I)), Imm8(0)); + FixupBranch cont = Jit->J_CC(CC_NZ); + regWriteExit(RI, getOp2(I)); + Jit->SetJumpTarget(cont); + if (RI.IInfo[I - RI.FirstI] & 4) + regClearInst(RI, getOp1(I)); + } + if (RI.IInfo[I - RI.FirstI] & 8) + regClearInst(RI, getOp2(I)); break; } case BranchUncond: { regWriteExit(RI, getOp1(I)); break; } + case SystemCall: { + unsigned InstLoc = ibuild->GetImmValue(getOp1(I)); + Jit->Cleanup(); + Jit->OR(32, M(&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_SYSCALL)); + Jit->MOV(32, M(&PC), Imm32(InstLoc + 4)); + Jit->JMP(asm_routines.testExceptions, true); + break; + } + case InterpreterBranch: { + Jit->MOV(32, R(EAX), M(&NPC)); + Jit->WriteExitDestInEAX(0); + break; + } + case RFIExit: { + // Bits SRR1[0, 5-9, 16-23, 25-27, 30-31] are placed + // into the corresponding bits of the MSR. + // MSR[13] is set to 0. + const u32 mask = 0x87C0FF73; + // MSR = (MSR & ~mask) | (SRR1 & mask); + Jit->MOV(32, R(EAX), M(&MSR)); + Jit->MOV(32, R(ECX), M(&SRR1)); + Jit->AND(32, R(EAX), Imm32(~mask)); + Jit->AND(32, R(ECX), Imm32(mask)); + Jit->OR(32, R(EAX), R(ECX)); + // MSR &= 0xFFFDFFFF; //TODO: VERIFY + Jit->AND(32, R(EAX), Imm32(0xFFFDFFFF)); + Jit->MOV(32, M(&MSR), R(EAX)); + // NPC = SRR0; + Jit->MOV(32, R(EAX), M(&SRR0)); + Jit->WriteRfiExitDestInEAX(); + break; + } case Tramp: { if (!thisUsed) break; // FIXME: Optimize! @@ -1061,7 +1234,11 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) { PanicAlert("Unknown JIT instruction; aborting!"); exit(1); } - if (getOpcode(*I) != Tramp) { + if (getOpcode(*I) != Tramp && + getOpcode(*I) != BranchCond && + getOpcode(*I) != Load8 && + getOpcode(*I) != Load16 && + getOpcode(*I) != Load32) { if (RI.IInfo[I - RI.FirstI] & 4) regClearInst(RI, getOp1(I)); if (RI.IInfo[I - RI.FirstI] & 8) @@ -1075,10 +1252,9 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) { } } - printf("Block: %x, numspills %d\n", Jit->js.blockStart, RI.numSpills); + if (RI.numSpills) + printf("Block: %x, numspills %d\n", Jit->js.blockStart, RI.numSpills); - Jit->MOV(32, R(EAX), M(&NPC)); - Jit->WriteRfiExitDestInEAX(); Jit->UD2(); } diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.h b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.h index 2a3f3c67c6..a7a1a47563 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.h +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.h @@ -73,6 +73,8 @@ namespace IREmitter { ICmpCRUnsigned, // CR for unsigned int compare ICmpEq, // One if equal, zero otherwise ICmpUgt, // One if op1 > op2, zero otherwise + ICmpSgt, // One if op1 > op2, zero otherwise + ICmpSle, // Opposite of sgt // Memory store operators Store8, Store16, @@ -87,6 +89,11 @@ namespace IREmitter { CInt16, CInt32, + // Funny PPC "branches" + SystemCall, + RFIExit, + InterpreterBranch, + // "Opcode" representing a register too far away to // reference directly; this is a size optimization Tramp, @@ -159,6 +166,7 @@ namespace IREmitter { InstLoc FoldShl(InstLoc Op1, InstLoc Op2); InstLoc FoldShrl(InstLoc Op1, InstLoc Op2); InstLoc FoldXor(InstLoc Op1, InstLoc Op2); + InstLoc FoldBranchCond(InstLoc Op1, InstLoc Op2); InstLoc FoldInterpreterFallback(InstLoc Op1, InstLoc Op2); @@ -167,6 +175,8 @@ namespace IREmitter { unsigned extra = 0); InstLoc FoldBiOp(unsigned OpCode, InstLoc Op1, InstLoc Op2); + unsigned ComputeKnownZeroBits(InstLoc I); + public: InstLoc EmitIntConst(unsigned value); InstLoc EmitStoreLink(InstLoc val) { @@ -241,6 +251,12 @@ namespace IREmitter { InstLoc EmitICmpUgt(InstLoc op1, InstLoc op2) { return FoldBiOp(ICmpUgt, op1, op2); } + InstLoc EmitICmpSgt(InstLoc op1, InstLoc op2) { + return FoldBiOp(ICmpSgt, op1, op2); + } + InstLoc EmitICmpSle(InstLoc op1, InstLoc op2) { + return FoldBiOp(ICmpSle, op1, op2); + } InstLoc EmitLoad8(InstLoc op1) { return FoldUOp(Load8, op1); } @@ -274,9 +290,18 @@ namespace IREmitter { InstLoc EmitInterpreterFallback(InstLoc op1, InstLoc op2) { return FoldBiOp(InterpreterFallback, op1, op2); } + InstLoc EmitInterpreterBranch() { + return FoldZeroOp(InterpreterBranch, 0); + } InstLoc EmitStoreCarry(InstLoc op1) { return FoldUOp(StoreCarry, op1); } + InstLoc EmitSystemCall(InstLoc pc) { + return FoldUOp(SystemCall, pc); + } + InstLoc EmitRFIExit() { + return FoldZeroOp(RFIExit, 0); + } void StartBackPass() { curReadPtr = &InstList[InstList.size()]; } void StartForwardPass() { curReadPtr = &InstList[0]; } diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_Branch.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_Branch.cpp index 9be9ee9ff9..09596548df 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_Branch.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_Branch.cpp @@ -43,36 +43,12 @@ using namespace Gen; void Jit64::sc(UGeckoInstruction inst) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITBranchOff) - {Default(inst); return;} // turn off from debugger - - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); - WriteExceptionExit(EXCEPTION_SYSCALL); + ibuild.EmitSystemCall(ibuild.EmitIntConst(js.compilerPC)); } void Jit64::rfi(UGeckoInstruction inst) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITBranchOff) - {Default(inst); return;} // turn off from debugger - - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); - //Bits SRR1[0, 5-9, 16-23, 25-27, 30-31] are placed into the corresponding bits of the MSR. - //MSR[13] is set to 0. - const u32 mask = 0x87C0FF73; - // MSR = (MSR & ~mask) | (SRR1 & mask); - MOV(32, R(EAX), M(&MSR)); - MOV(32, R(ECX), M(&SRR1)); - AND(32, R(EAX), Imm32(~mask)); - AND(32, R(ECX), Imm32(mask)); - OR(32, R(EAX), R(ECX)); - // MSR &= 0xFFFDFFFF; //TODO: VERIFY - AND(32, R(EAX), Imm32(0xFFFDFFFF)); - MOV(32, M(&MSR), R(EAX)); - // NPC = SRR0; - MOV(32, R(EAX), M(&SRR0)); - WriteRfiExitDestInEAX(); + ibuild.EmitRFIExit(); } void Jit64::bx(UGeckoInstruction inst) @@ -89,9 +65,6 @@ using namespace Gen; ibuild.EmitBranchUncond(ibuild.EmitIntConst(destination)); } - // TODO - optimize to hell and beyond - // TODO - make nice easy to optimize special cases for the most common - // variants of this instruction. void Jit64::bcx(UGeckoInstruction inst) { if (inst.LK) @@ -148,46 +121,11 @@ using namespace Gen; void Jit64::bcctrx(UGeckoInstruction inst) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITBranchOff) - {Default(inst); return;} // turn off from debugger - - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); - - // bool fastway = true; - - if ((inst.BO & 16) == 0) - { - PanicAlert("Bizarro bcctrx %08x, not supported.", inst.hex); - _assert_msg_(DYNA_REC, 0, "Bizarro bcctrx"); - /* - fastway = false; - MOV(32, M(&PC), Imm32(js.compilerPC+4)); - MOV(32, R(EAX), M(&CR)); - XOR(32, R(ECX), R(ECX)); - AND(32, R(EAX), Imm32(0x80000000 >> inst.BI)); - - CCFlags branch; - if(inst.BO & 8) - branch = CC_NZ; - else - branch = CC_Z; - */ - // TODO(ector): Why is this commented out? - //SETcc(branch, R(ECX)); - // check for EBX - //TEST(32, R(ECX), R(ECX)); - //linkEnd = J_CC(branch); - } - // NPC = CTR & 0xfffffffc; - MOV(32, R(EAX), M(&CTR)); - if (inst.LK) - MOV(32, M(&LR), Imm32(js.compilerPC + 4)); // LR = PC + 4; - AND(32, R(EAX), Imm32(0xFFFFFFFC)); - WriteExitDestInEAX(0); + Default(inst); + ibuild.EmitInterpreterBranch(); + return; } - void Jit64::bclrx(UGeckoInstruction inst) { if (inst.hex == 0x4e800020) { @@ -195,6 +133,7 @@ using namespace Gen; return; } Default(inst); + ibuild.EmitInterpreterBranch(); return; } diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_Integer.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_Integer.cpp index 04cf273211..9be273564c 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_Integer.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_Integer.cpp @@ -25,7 +25,7 @@ #include "JitRegCache.h" #include "JitAsm.h" -// #define INSTRUCTION_START Default(inst); return; +//#define INSTRUCTION_START Default(inst); return; #define INSTRUCTION_START static void ComputeRC(IREmitter::IRBuilder& ibuild, @@ -37,6 +37,7 @@ void Jit64::reg_imm(UGeckoInstruction inst) { + INSTRUCTION_START int d = inst.RD, a = inst.RA, s = inst.RS; IREmitter::InstLoc val, test, c; switch (inst.OPCD) @@ -103,6 +104,7 @@ void Jit64::cmpXX(UGeckoInstruction inst) { + INSTRUCTION_START IREmitter::InstLoc lhs, rhs, res; lhs = ibuild.EmitLoadGReg(inst.RA); if (inst.OPCD == 31) { @@ -125,6 +127,7 @@ void Jit64::orx(UGeckoInstruction inst) { + INSTRUCTION_START IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB); val = ibuild.EmitOr(ibuild.EmitLoadGReg(inst.RS), val); ibuild.EmitStoreGReg(val, inst.RA); @@ -136,6 +139,7 @@ // m_GPR[_inst.RA] = m_GPR[_inst.RS] ^ m_GPR[_inst.RB]; void Jit64::xorx(UGeckoInstruction inst) { + INSTRUCTION_START IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB); val = ibuild.EmitXor(ibuild.EmitLoadGReg(inst.RS), val); ibuild.EmitStoreGReg(val, inst.RA); @@ -145,6 +149,7 @@ void Jit64::andx(UGeckoInstruction inst) { + INSTRUCTION_START IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB); val = ibuild.EmitAnd(ibuild.EmitLoadGReg(inst.RS), val); ibuild.EmitStoreGReg(val, inst.RA); @@ -154,6 +159,7 @@ void Jit64::extsbx(UGeckoInstruction inst) { + INSTRUCTION_START IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS); val = ibuild.EmitSExt8(val); ibuild.EmitStoreGReg(val, inst.RA); @@ -163,6 +169,7 @@ void Jit64::extshx(UGeckoInstruction inst) { + INSTRUCTION_START IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS); val = ibuild.EmitSExt16(val); ibuild.EmitStoreGReg(val, inst.RA); @@ -226,6 +233,7 @@ void Jit64::subfx(UGeckoInstruction inst) { + INSTRUCTION_START if (inst.OE) PanicAlert("OE: subfx"); IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB); val = ibuild.EmitSub(val, ibuild.EmitLoadGReg(inst.RA)); @@ -236,6 +244,7 @@ void Jit64::mulli(UGeckoInstruction inst) { + INSTRUCTION_START IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RA); val = ibuild.EmitMul(val, ibuild.EmitIntConst(inst.SIMM_16)); ibuild.EmitStoreGReg(val, inst.RD); @@ -243,6 +252,7 @@ void Jit64::mullwx(UGeckoInstruction inst) { + INSTRUCTION_START IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB); val = ibuild.EmitMul(ibuild.EmitLoadGReg(inst.RA), val); ibuild.EmitStoreGReg(val, inst.RD); @@ -316,6 +326,7 @@ void Jit64::addx(UGeckoInstruction inst) { + INSTRUCTION_START IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB); val = ibuild.EmitAdd(ibuild.EmitLoadGReg(inst.RA), val); ibuild.EmitStoreGReg(val, inst.RD); @@ -355,6 +366,7 @@ void Jit64::rlwinmx(UGeckoInstruction inst) { + INSTRUCTION_START unsigned mask = Helper_Mask(inst.MB, inst.ME); IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS); val = ibuild.EmitRol(val, ibuild.EmitIntConst(inst.SH)); @@ -367,6 +379,7 @@ void Jit64::rlwimix(UGeckoInstruction inst) { + INSTRUCTION_START unsigned mask = Helper_Mask(inst.MB, inst.ME); IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS); val = ibuild.EmitRol(val, ibuild.EmitIntConst(inst.SH)); @@ -412,6 +425,7 @@ void Jit64::negx(UGeckoInstruction inst) { + INSTRUCTION_START IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RA); val = ibuild.EmitSub(ibuild.EmitIntConst(0), val); ibuild.EmitStoreGReg(val, inst.RD); @@ -421,6 +435,7 @@ void Jit64::srwx(UGeckoInstruction inst) { + INSTRUCTION_START IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS), samt = ibuild.EmitLoadGReg(inst.RB), corr; @@ -438,6 +453,7 @@ void Jit64::slwx(UGeckoInstruction inst) { + INSTRUCTION_START IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS), samt = ibuild.EmitLoadGReg(inst.RB), corr; @@ -455,6 +471,7 @@ void Jit64::srawx(UGeckoInstruction inst) { + INSTRUCTION_START // FIXME: We can do a lot better on 64-bit IREmitter::InstLoc val, samt, mask, mask2, test; val = ibuild.EmitLoadGReg(inst.RS); @@ -476,6 +493,7 @@ void Jit64::srawix(UGeckoInstruction inst) { + INSTRUCTION_START IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS), test; val = ibuild.EmitSarl(val, ibuild.EmitIntConst(inst.SH)); ibuild.EmitStoreGReg(val, inst.RA); diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_LoadStore.cpp index 141942c71b..31d03a06d0 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_LoadStore.cpp @@ -36,163 +36,141 @@ #include "JitAsm.h" #include "JitRegCache.h" -// #define INSTRUCTION_START Default(inst); return; +//#define INSTRUCTION_START Default(inst); return; #define INSTRUCTION_START - void Jit64::lbzx(UGeckoInstruction inst) - { - IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB); - if (inst.RA) - addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); - ibuild.EmitStoreGReg(ibuild.EmitLoad8(addr), inst.RD); - } - - void Jit64::lwzx(UGeckoInstruction inst) - { - IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB); - if (inst.RA) - addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); - ibuild.EmitStoreGReg(ibuild.EmitLoad32(addr), inst.RD); - } - - void Jit64::lhax(UGeckoInstruction inst) - { - IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB); - if (inst.RA) - addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); - IREmitter::InstLoc val = ibuild.EmitLoad16(addr); - val = ibuild.EmitSExt16(val); - ibuild.EmitStoreGReg(val, inst.RD); - } - - void Jit64::lXz(UGeckoInstruction inst) - { - IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16); - if (inst.RA) - addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); - IREmitter::InstLoc val; - switch (inst.OPCD) - { - case 32: val = ibuild.EmitLoad32(addr); break; //lwz - case 40: val = ibuild.EmitLoad16(addr); break; //lhz - case 34: val = ibuild.EmitLoad8(addr); break; //lbz - default: PanicAlert("lXz: invalid access size"); - } - ibuild.EmitStoreGReg(val, inst.RD); - } - - void Jit64::lha(UGeckoInstruction inst) - { - IREmitter::InstLoc addr = - ibuild.EmitIntConst((s32)(s16)inst.SIMM_16); - if (inst.RA) - addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); - IREmitter::InstLoc val = ibuild.EmitLoad16(addr); - val = ibuild.EmitSExt16(val); - ibuild.EmitStoreGReg(val, inst.RD); - } - - void Jit64::lwzux(UGeckoInstruction inst) - { - IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB); - if (inst.RA) { - addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); - ibuild.EmitStoreGReg(addr, inst.RA); - } - ibuild.EmitStoreGReg(ibuild.EmitLoad32(addr), inst.RD); - } - - // Zero cache line. - void Jit64::dcbz(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - - MOV(32, R(EAX), gpr.R(inst.RB)); - if (inst.RA) - ADD(32, R(EAX), gpr.R(inst.RA)); - AND(32, R(EAX), Imm32(~31)); - XORPD(XMM0, R(XMM0)); -#ifdef _M_X64 - MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0); - MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0); -#else - AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); - MOVAPS(MDisp(EAX, (u32)Memory::base), XMM0); - MOVAPS(MDisp(EAX, (u32)Memory::base + 16), XMM0); -#endif - } - - void Jit64::stX(UGeckoInstruction inst) - { - IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16), - value = ibuild.EmitLoadGReg(inst.RS); - if (inst.RA) - addr = ibuild.EmitAdd(ibuild.EmitLoadGReg(inst.RA), addr); - if (inst.OPCD & 1) - ibuild.EmitStoreGReg(addr, inst.RA); - switch (inst.OPCD & ~1) - { - case 36: ibuild.EmitStore32(value, addr); break; //stw - case 44: ibuild.EmitStore16(value, addr); break; //sth - case 38: ibuild.EmitStore8(value, addr); break; //stb - default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return; - } - } - - void Jit64::stXx(UGeckoInstruction inst) - { - IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB), - value = ibuild.EmitLoadGReg(inst.RS); +void Jit64::lbzx(UGeckoInstruction inst) +{ + INSTRUCTION_START + IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB); + if (inst.RA) addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); - if (inst.SUBOP10 & 32) - ibuild.EmitStoreGReg(addr, inst.RA); - switch (inst.SUBOP10 & ~32) - { - case 151: ibuild.EmitStore32(value, addr); break; //stw - case 407: ibuild.EmitStore16(value, addr); break; //sth - case 215: ibuild.EmitStore8(value, addr); break; //stb - default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return; - } + ibuild.EmitStoreGReg(ibuild.EmitLoad8(addr), inst.RD); +} + +void Jit64::lwzx(UGeckoInstruction inst) +{ + INSTRUCTION_START + IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB); + if (inst.RA) + addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); + ibuild.EmitStoreGReg(ibuild.EmitLoad32(addr), inst.RD); +} + +void Jit64::lhax(UGeckoInstruction inst) +{ + INSTRUCTION_START + IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB); + if (inst.RA) + addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); + IREmitter::InstLoc val = ibuild.EmitLoad16(addr); + val = ibuild.EmitSExt16(val); + ibuild.EmitStoreGReg(val, inst.RD); +} + +void Jit64::lXz(UGeckoInstruction inst) +{ + INSTRUCTION_START + IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16); + if (inst.RA) + addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); + IREmitter::InstLoc val; + switch (inst.OPCD) + { + case 32: val = ibuild.EmitLoad32(addr); break; //lwz + case 40: val = ibuild.EmitLoad16(addr); break; //lhz + case 34: val = ibuild.EmitLoad8(addr); break; //lbz + default: PanicAlert("lXz: invalid access size"); } + ibuild.EmitStoreGReg(val, inst.RD); +} + +void Jit64::lha(UGeckoInstruction inst) +{ + INSTRUCTION_START + IREmitter::InstLoc addr = + ibuild.EmitIntConst((s32)(s16)inst.SIMM_16); + if (inst.RA) + addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); + IREmitter::InstLoc val = ibuild.EmitLoad16(addr); + val = ibuild.EmitSExt16(val); + ibuild.EmitStoreGReg(val, inst.RD); +} + +void Jit64::lwzux(UGeckoInstruction inst) +{ + INSTRUCTION_START + IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB); + if (inst.RA) { + addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); + ibuild.EmitStoreGReg(addr, inst.RA); + } + ibuild.EmitStoreGReg(ibuild.EmitLoad32(addr), inst.RD); +} + +// Zero cache line. +void Jit64::dcbz(UGeckoInstruction inst) +{ + if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff) + {Default(inst); return;} // turn off from debugger + INSTRUCTION_START; + MOV(32, R(EAX), gpr.R(inst.RB)); + if (inst.RA) + ADD(32, R(EAX), gpr.R(inst.RA)); + AND(32, R(EAX), Imm32(~31)); + XORPD(XMM0, R(XMM0)); +#ifdef _M_X64 + MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0); + MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0); +#else + AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); + MOVAPS(MDisp(EAX, (u32)Memory::base), XMM0); + MOVAPS(MDisp(EAX, (u32)Memory::base + 16), XMM0); +#endif +} + +void Jit64::stX(UGeckoInstruction inst) +{ + INSTRUCTION_START + IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16), + value = ibuild.EmitLoadGReg(inst.RS); + if (inst.RA) + addr = ibuild.EmitAdd(ibuild.EmitLoadGReg(inst.RA), addr); + if (inst.OPCD & 1) + ibuild.EmitStoreGReg(addr, inst.RA); + switch (inst.OPCD & ~1) + { + case 36: ibuild.EmitStore32(value, addr); break; //stw + case 44: ibuild.EmitStore16(value, addr); break; //sth + case 38: ibuild.EmitStore8(value, addr); break; //stb + default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return; + } +} + +void Jit64::stXx(UGeckoInstruction inst) +{ + INSTRUCTION_START + IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB), + value = ibuild.EmitLoadGReg(inst.RS); + addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA)); + if (inst.SUBOP10 & 32) + ibuild.EmitStoreGReg(addr, inst.RA); + switch (inst.SUBOP10 & ~32) + { + case 151: ibuild.EmitStore32(value, addr); break; //stw + case 407: ibuild.EmitStore16(value, addr); break; //sth + case 215: ibuild.EmitStore8(value, addr); break; //stb + default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return; + } +} // A few games use these heavily in video codecs. void Jit64::lmw(UGeckoInstruction inst) { -#ifdef _M_IX86 Default(inst); return; -#else - gpr.FlushLockX(ECX); - MOV(32, R(EAX), Imm32((u32)(s32)inst.SIMM_16)); - if (inst.RA) - ADD(32, R(EAX), gpr.R(inst.RA)); - for (int i = inst.RD; i < 32; i++) - { - MOV(32, R(ECX), MComplex(EBX, EAX, SCALE_1, (i - inst.RD) * 4)); - BSWAP(32, ECX); - gpr.LoadToX64(i, false, true); - MOV(32, gpr.R(i), R(ECX)); - } - gpr.UnlockAllX(); -#endif } void Jit64::stmw(UGeckoInstruction inst) { -#ifdef _M_IX86 Default(inst); return; -#else - gpr.FlushLockX(ECX); - MOV(32, R(EAX), Imm32((u32)(s32)inst.SIMM_16)); - if (inst.RA) - ADD(32, R(EAX), gpr.R(inst.RA)); - for (int i = inst.RD; i < 32; i++) - { - MOV(32, R(ECX), gpr.R(i)); - BSWAP(32, ECX); - MOV(32, MComplex(EBX, EAX, SCALE_1, (i - inst.RD) * 4), R(ECX)); - } - gpr.UnlockAllX(); -#endif } diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_SystemRegisters.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_SystemRegisters.cpp index 10dec47e99..5ee975a482 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_SystemRegisters.cpp @@ -30,11 +30,12 @@ #include "JitCache.h" #include "JitRegCache.h" +//#define INSTRUCTION_START Default(inst); return; #define INSTRUCTION_START -// #define INSTRUCTION_START Default(inst); return; void Jit64::mtspr(UGeckoInstruction inst) { + INSTRUCTION_START u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F); switch(iIndex) { case SPR_LR: @@ -44,7 +45,6 @@ ibuild.EmitStoreCTR(ibuild.EmitLoadGReg(inst.RD)); return; default: - printf("mtspr case %d", iIndex); Default(inst); return; } @@ -52,6 +52,7 @@ void Jit64::mfspr(UGeckoInstruction inst) { + INSTRUCTION_START u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F); switch (iIndex) { @@ -62,7 +63,6 @@ ibuild.EmitStoreGReg(ibuild.EmitLoadCTR(), inst.RD); return; default: - printf("mfspr case %d", iIndex); Default(inst); return; } @@ -82,6 +82,7 @@ void Jit64::mfmsr(UGeckoInstruction inst) { + INSTRUCTION_START ibuild.EmitStoreGReg(ibuild.EmitLoadMSR(), inst.RD); }