More JIT WIP work.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1727 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
2e58a5bef0
commit
f419223908
|
@ -81,12 +81,10 @@ edge over the current JIT mostly due to the fast memory optimization.
|
|||
TODO (in no particular order):
|
||||
Floating-point JIT (both paired and unpaired): currently falls back
|
||||
to the interpreter
|
||||
Improve register allocator to deal with long live intervals.
|
||||
Optimize conditions for conditional branches.
|
||||
Inter-block dead register elimination, especially for CR0.
|
||||
Inter-block inlining.
|
||||
Track down a few correctness bugs.
|
||||
Known zero bits: eliminate unneeded AND instructions for rlwinm/rlwimi
|
||||
Implement a select instruction
|
||||
64-bit compat (it should only be a few tweaks to register allocation and
|
||||
the load/store code)
|
||||
|
@ -95,7 +93,7 @@ Scheduling to reduce register pressure: PowerPC compilers like to push
|
|||
x86 processors, which are short on registers and extremely good at
|
||||
instruction reordering.
|
||||
Common subexpression elimination
|
||||
Optimize load of sum using complex addressing
|
||||
Optimize load of sum using complex addressing (partially implemented)
|
||||
Implement idle-skipping
|
||||
|
||||
*/
|
||||
|
@ -180,6 +178,40 @@ InstLoc IRBuilder::EmitTriOp(unsigned Opcode, InstLoc Op1, InstLoc Op2,
|
|||
}
|
||||
#endif
|
||||
|
||||
unsigned IRBuilder::ComputeKnownZeroBits(InstLoc I) {
|
||||
switch (getOpcode(*I)) {
|
||||
case Load8:
|
||||
return 0xFFFFFF00;
|
||||
case Or:
|
||||
return ComputeKnownZeroBits(getOp1(I)) &
|
||||
ComputeKnownZeroBits(getOp2(I));
|
||||
case And:
|
||||
return ComputeKnownZeroBits(getOp1(I)) |
|
||||
ComputeKnownZeroBits(getOp2(I));
|
||||
case Shl:
|
||||
if (isImm(*getOp2(I))) {
|
||||
unsigned samt = GetImmValue(getOp2(I)) & 31;
|
||||
return (ComputeKnownZeroBits(getOp1(I)) << samt) |
|
||||
~(-1U << samt);
|
||||
}
|
||||
return 0;
|
||||
case Shrl:
|
||||
if (isImm(*getOp2(I))) {
|
||||
unsigned samt = GetImmValue(getOp2(I)) & 31;
|
||||
return (ComputeKnownZeroBits(getOp1(I)) >> samt) |
|
||||
~(-1U >> samt);
|
||||
}
|
||||
return 0;
|
||||
case Rol:
|
||||
if (isImm(*getOp2(I))) {
|
||||
return _rotl(ComputeKnownZeroBits(getOp1(I)),
|
||||
GetImmValue(getOp2(I)));
|
||||
}
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
InstLoc IRBuilder::FoldZeroOp(unsigned Opcode, unsigned extra) {
|
||||
if (Opcode == LoadGReg) {
|
||||
// Reg load folding: if we already loaded the value,
|
||||
|
@ -275,6 +307,9 @@ InstLoc IRBuilder::FoldAnd(InstLoc Op1, InstLoc Op2) {
|
|||
return FoldShrl(getOp1(Op1), EmitIntConst(shiftAmt2));
|
||||
}
|
||||
}
|
||||
if (!(~ComputeKnownZeroBits(Op1) & ~GetImmValue(Op2))) {
|
||||
return Op1;
|
||||
}
|
||||
}
|
||||
if (Op1 == Op2) return Op1;
|
||||
|
||||
|
@ -348,6 +383,35 @@ InstLoc IRBuilder::FoldRol(InstLoc Op1, InstLoc Op2) {
|
|||
return EmitBiOp(Rol, Op1, Op2);
|
||||
}
|
||||
|
||||
InstLoc IRBuilder::FoldBranchCond(InstLoc Op1, InstLoc Op2) {
|
||||
if (getOpcode(*Op1) == And &&
|
||||
isImm(*getOp2(Op1)) &&
|
||||
getOpcode(*getOp1(Op1)) == ICmpCRSigned) {
|
||||
unsigned branchValue = GetImmValue(getOp2(Op1));
|
||||
if (branchValue == 2)
|
||||
return FoldBranchCond(EmitICmpEq(getOp1(getOp1(Op1)),
|
||||
getOp2(getOp1(Op1))), Op2);
|
||||
}
|
||||
if (getOpcode(*Op1) == Xor &&
|
||||
isImm(*getOp2(Op1))) {
|
||||
InstLoc XOp1 = getOp1(Op1);
|
||||
unsigned branchValue = GetImmValue(getOp2(Op1));
|
||||
if (getOpcode(*XOp1) == And &&
|
||||
isImm(*getOp2(XOp1)) &&
|
||||
getOpcode(*getOp1(XOp1)) == ICmpCRSigned) {
|
||||
unsigned innerBranchValue =
|
||||
GetImmValue(getOp2(XOp1));
|
||||
if (branchValue == innerBranchValue) {
|
||||
if (branchValue == 4) {
|
||||
return FoldBranchCond(EmitICmpSle(getOp1(getOp1(XOp1)),
|
||||
getOp2(getOp1(XOp1))), Op2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return EmitBiOp(BranchCond, Op1, Op2);
|
||||
}
|
||||
|
||||
InstLoc IRBuilder::FoldInterpreterFallback(InstLoc Op1, InstLoc Op2) {
|
||||
for (unsigned i = 0; i < 32; i++) {
|
||||
GRegCache[i] = 0;
|
||||
|
@ -371,6 +435,7 @@ InstLoc IRBuilder::FoldBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2) {
|
|||
case Shl: return FoldShl(Op1, Op2);
|
||||
case Shrl: return FoldShrl(Op1, Op2);
|
||||
case Rol: return FoldRol(Op1, Op2);
|
||||
case BranchCond: return FoldBranchCond(Op1, Op2);
|
||||
case InterpreterFallback: return FoldInterpreterFallback(Op1, Op2);
|
||||
default: return EmitBiOp(Opcode, Op1, Op2);
|
||||
}
|
||||
|
@ -473,9 +538,12 @@ static X64Reg regFindFreeReg(RegInfo& RI) {
|
|||
if (RI.regs[EBX] == 0) return EBX;
|
||||
if (RI.regs[EDX] == 0) return EDX;
|
||||
if (RI.regs[EAX] == 0) return EAX;
|
||||
// ECX is scratch; never allocate it!
|
||||
regSpill(RI, EDI);
|
||||
return EDI;
|
||||
// ECX is scratch, so we don't allocate it
|
||||
static X64Reg regs[] = {EDI, ESI, EBP, EBX, EDX, EAX};
|
||||
static unsigned nextReg = 0;
|
||||
X64Reg reg = regs[nextReg++ % 6];
|
||||
regSpill(RI, reg);
|
||||
return reg;
|
||||
}
|
||||
|
||||
static OpArg regLocForInst(RegInfo& RI, InstLoc I) {
|
||||
|
@ -532,6 +600,15 @@ static void regSpillCallerSaved(RegInfo& RI) {
|
|||
regSpill(RI, EAX);
|
||||
}
|
||||
|
||||
static X64Reg regUReg(RegInfo& RI, InstLoc I) {
|
||||
if (RI.IInfo[I - RI.FirstI] & 4 &&
|
||||
regLocForInst(RI, getOp1(I)).IsSimpleReg()) {
|
||||
return regLocForInst(RI, getOp1(I)).GetSimpleReg();
|
||||
}
|
||||
X64Reg reg = regFindFreeReg(RI);
|
||||
return reg;
|
||||
}
|
||||
|
||||
static X64Reg regBinLHSReg(RegInfo& RI, InstLoc I) {
|
||||
if (RI.IInfo[I - RI.FirstI] & 4) {
|
||||
return regEnsureInReg(RI, getOp1(I));
|
||||
|
@ -559,12 +636,25 @@ static void regEmitBinInst(RegInfo& RI, InstLoc I,
|
|||
}
|
||||
|
||||
static void regEmitMemLoad(RegInfo& RI, InstLoc I, unsigned Size) {
|
||||
X64Reg reg = regBinLHSReg(RI, I);
|
||||
X64Reg reg;
|
||||
unsigned offset;
|
||||
|
||||
if (getOpcode(*getOp1(I)) == Add && isImm(*getOp2(getOp1(I)))) {
|
||||
offset = RI.Build->GetImmValue(getOp2(getOp1(I)));
|
||||
reg = regBinLHSReg(RI, getOp1(I));
|
||||
if (RI.IInfo[I - RI.FirstI] & 4)
|
||||
regClearInst(RI, getOp1(getOp1(I)));
|
||||
} else {
|
||||
offset = 0;
|
||||
reg = regBinLHSReg(RI, I);
|
||||
if (RI.IInfo[I - RI.FirstI] & 4)
|
||||
regClearInst(RI, getOp1(I));
|
||||
}
|
||||
if (RI.UseProfile) {
|
||||
unsigned curLoad = ProfiledLoads[RI.numProfiledLoads++];
|
||||
if (!(curLoad & 0x0C000000)) {
|
||||
if (regReadUse(RI, I)) {
|
||||
unsigned addr = (u32)Memory::base - (curLoad & 0xC0000000);
|
||||
unsigned addr = (u32)Memory::base - (curLoad & 0xC0000000) + offset;
|
||||
RI.Jit->MOVZX(32, Size, reg, MDisp(reg, addr));
|
||||
RI.Jit->BSWAP(Size, reg);
|
||||
RI.regs[reg] = I;
|
||||
|
@ -572,6 +662,9 @@ static void regEmitMemLoad(RegInfo& RI, InstLoc I, unsigned Size) {
|
|||
return;
|
||||
}
|
||||
}
|
||||
if (offset) {
|
||||
RI.Jit->ADD(32, R(reg), Imm32(offset));
|
||||
}
|
||||
if (RI.MakeProfile) {
|
||||
RI.Jit->MOV(32, M(&ProfiledLoads[RI.numProfiledLoads++]), R(reg));
|
||||
}
|
||||
|
@ -638,7 +731,6 @@ static void regEmitShiftInst(RegInfo& RI, InstLoc I,
|
|||
RI.regs[reg] = I;
|
||||
return;
|
||||
}
|
||||
// FIXME: prevent regBinLHSReg from finding ecx!
|
||||
RI.Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I)));
|
||||
(RI.Jit->*op)(32, R(reg), R(ECX));
|
||||
RI.regs[reg] = I;
|
||||
|
@ -695,10 +787,8 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
|||
RI.Build = ibuild;
|
||||
RI.UseProfile = UseProfile;
|
||||
RI.MakeProfile = !RI.UseProfile;
|
||||
unsigned bs = Jit->js.blockStart;
|
||||
// Pass to compute liveness
|
||||
// Note that despite this marking, we never materialize immediates;
|
||||
// on x86, they almost always fold into the instruction, and it's at
|
||||
// best a code-size reduction in the cases where they don't.
|
||||
ibuild->StartBackPass();
|
||||
for (unsigned index = RI.IInfo.size() - 1; index != -1U; --index) {
|
||||
InstLoc I = ibuild->ReadBackward();
|
||||
|
@ -719,6 +809,9 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
|||
case BlockEnd:
|
||||
case BlockStart:
|
||||
case InterpreterFallback:
|
||||
case SystemCall:
|
||||
case RFIExit:
|
||||
case InterpreterBranch:
|
||||
// No liveness effects
|
||||
break;
|
||||
case Tramp:
|
||||
|
@ -732,13 +825,18 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
|||
if (thisUsed)
|
||||
regMarkUse(RI, I, getOp1(I), 1);
|
||||
break;
|
||||
case StoreCR:
|
||||
case StoreCarry:
|
||||
case Load8:
|
||||
case Load16:
|
||||
case Load32:
|
||||
if (getOpcode(*getOp1(I)) == Add &&
|
||||
isImm(*getOp2(getOp1(I)))) {
|
||||
regMarkUse(RI, I, getOp1(getOp1(I)), 1);
|
||||
break;
|
||||
}
|
||||
case StoreGReg:
|
||||
case StoreCR:
|
||||
case StoreLink:
|
||||
case StoreCarry:
|
||||
case StoreCTR:
|
||||
case StoreMSR:
|
||||
regMarkUse(RI, I, getOp1(I), 1);
|
||||
|
@ -757,6 +855,8 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
|||
case ICmpCRSigned:
|
||||
case ICmpEq:
|
||||
case ICmpUgt:
|
||||
case ICmpSle:
|
||||
case ICmpSgt:
|
||||
if (thisUsed) {
|
||||
regMarkUse(RI, I, getOp1(I), 1);
|
||||
if (!isImm(*getOp2(I)))
|
||||
|
@ -773,13 +873,21 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
|||
if (!isImm(*getOp1(I)))
|
||||
regMarkUse(RI, I, getOp1(I), 1);
|
||||
break;
|
||||
case BranchCond:
|
||||
case BranchCond: {
|
||||
unsigned CondOpcode = getOpcode(*getOp1(I));
|
||||
if ((CondOpcode == ICmpEq ||
|
||||
CondOpcode == ICmpSle) &&
|
||||
isImm(*getOp2(getOp1(I)))) {
|
||||
regMarkUse(RI, I, getOp1(getOp1(I)), 1);
|
||||
} else {
|
||||
regMarkUse(RI, I, getOp1(I), 1);
|
||||
}
|
||||
if (!isImm(*getOp2(I)))
|
||||
regMarkUse(RI, I, getOp2(I), 2);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ibuild->StartForwardPass();
|
||||
for (unsigned i = 0; i != RI.IInfo.size(); i++) {
|
||||
|
@ -902,7 +1010,7 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
|||
}
|
||||
case SExt16: {
|
||||
if (!thisUsed) break;
|
||||
X64Reg reg = regFindFreeReg(RI);
|
||||
X64Reg reg = regUReg(RI, I);
|
||||
Jit->MOVSX(32, 16, reg, regLocForInst(RI, getOp1(I)));
|
||||
RI.regs[reg] = I;
|
||||
break;
|
||||
|
@ -987,6 +1095,15 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
|||
RI.regs[reg] = I;
|
||||
break;
|
||||
}
|
||||
case ICmpSle: {
|
||||
if (!thisUsed) break;
|
||||
regEmitCmp(RI, I);
|
||||
Jit->SETcc(CC_LE, R(ECX)); // Caution: SETCC uses 8-bit regs!
|
||||
X64Reg reg = regFindFreeReg(RI);
|
||||
Jit->MOVZX(32, 8, reg, R(ECX));
|
||||
RI.regs[reg] = I;
|
||||
break;
|
||||
}
|
||||
case ICmpCRUnsigned: {
|
||||
if (!thisUsed) break;
|
||||
regEmitCmp(RI, I);
|
||||
|
@ -1035,16 +1152,72 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
|||
case BlockEnd:
|
||||
break;
|
||||
case BranchCond: {
|
||||
if (getOpcode(*getOp1(I)) == ICmpEq &&
|
||||
isImm(*getOp2(getOp1(I)))) {
|
||||
Jit->CMP(32, regLocForInst(RI, getOp1(getOp1(I))),
|
||||
Imm32(RI.Build->GetImmValue(getOp2(getOp1(I)))));
|
||||
FixupBranch cont = Jit->J_CC(CC_Z);
|
||||
regWriteExit(RI, getOp2(I));
|
||||
Jit->SetJumpTarget(cont);
|
||||
if (RI.IInfo[I - RI.FirstI] & 4)
|
||||
regClearInst(RI, getOp1(getOp1(I)));
|
||||
} else if (getOpcode(*getOp1(I)) == ICmpSle &&
|
||||
isImm(*getOp2(getOp1(I)))) {
|
||||
Jit->CMP(32, regLocForInst(RI, getOp1(getOp1(I))),
|
||||
Imm32(RI.Build->GetImmValue(getOp2(getOp1(I)))));
|
||||
FixupBranch cont = Jit->J_CC(CC_LE);
|
||||
regWriteExit(RI, getOp2(I));
|
||||
Jit->SetJumpTarget(cont);
|
||||
if (RI.IInfo[I - RI.FirstI] & 4)
|
||||
regClearInst(RI, getOp1(getOp1(I)));
|
||||
} else {
|
||||
Jit->CMP(32, regLocForInst(RI, getOp1(I)), Imm8(0));
|
||||
FixupBranch cont = Jit->J_CC(CC_NZ);
|
||||
regWriteExit(RI, getOp2(I));
|
||||
Jit->SetJumpTarget(cont);
|
||||
if (RI.IInfo[I - RI.FirstI] & 4)
|
||||
regClearInst(RI, getOp1(I));
|
||||
}
|
||||
if (RI.IInfo[I - RI.FirstI] & 8)
|
||||
regClearInst(RI, getOp2(I));
|
||||
break;
|
||||
}
|
||||
case BranchUncond: {
|
||||
regWriteExit(RI, getOp1(I));
|
||||
break;
|
||||
}
|
||||
case SystemCall: {
|
||||
unsigned InstLoc = ibuild->GetImmValue(getOp1(I));
|
||||
Jit->Cleanup();
|
||||
Jit->OR(32, M(&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_SYSCALL));
|
||||
Jit->MOV(32, M(&PC), Imm32(InstLoc + 4));
|
||||
Jit->JMP(asm_routines.testExceptions, true);
|
||||
break;
|
||||
}
|
||||
case InterpreterBranch: {
|
||||
Jit->MOV(32, R(EAX), M(&NPC));
|
||||
Jit->WriteExitDestInEAX(0);
|
||||
break;
|
||||
}
|
||||
case RFIExit: {
|
||||
// Bits SRR1[0, 5-9, 16-23, 25-27, 30-31] are placed
|
||||
// into the corresponding bits of the MSR.
|
||||
// MSR[13] is set to 0.
|
||||
const u32 mask = 0x87C0FF73;
|
||||
// MSR = (MSR & ~mask) | (SRR1 & mask);
|
||||
Jit->MOV(32, R(EAX), M(&MSR));
|
||||
Jit->MOV(32, R(ECX), M(&SRR1));
|
||||
Jit->AND(32, R(EAX), Imm32(~mask));
|
||||
Jit->AND(32, R(ECX), Imm32(mask));
|
||||
Jit->OR(32, R(EAX), R(ECX));
|
||||
// MSR &= 0xFFFDFFFF; //TODO: VERIFY
|
||||
Jit->AND(32, R(EAX), Imm32(0xFFFDFFFF));
|
||||
Jit->MOV(32, M(&MSR), R(EAX));
|
||||
// NPC = SRR0;
|
||||
Jit->MOV(32, R(EAX), M(&SRR0));
|
||||
Jit->WriteRfiExitDestInEAX();
|
||||
break;
|
||||
}
|
||||
case Tramp: {
|
||||
if (!thisUsed) break;
|
||||
// FIXME: Optimize!
|
||||
|
@ -1061,7 +1234,11 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
|||
PanicAlert("Unknown JIT instruction; aborting!");
|
||||
exit(1);
|
||||
}
|
||||
if (getOpcode(*I) != Tramp) {
|
||||
if (getOpcode(*I) != Tramp &&
|
||||
getOpcode(*I) != BranchCond &&
|
||||
getOpcode(*I) != Load8 &&
|
||||
getOpcode(*I) != Load16 &&
|
||||
getOpcode(*I) != Load32) {
|
||||
if (RI.IInfo[I - RI.FirstI] & 4)
|
||||
regClearInst(RI, getOp1(I));
|
||||
if (RI.IInfo[I - RI.FirstI] & 8)
|
||||
|
@ -1075,10 +1252,9 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
|||
}
|
||||
}
|
||||
|
||||
if (RI.numSpills)
|
||||
printf("Block: %x, numspills %d\n", Jit->js.blockStart, RI.numSpills);
|
||||
|
||||
Jit->MOV(32, R(EAX), M(&NPC));
|
||||
Jit->WriteRfiExitDestInEAX();
|
||||
Jit->UD2();
|
||||
}
|
||||
|
||||
|
|
|
@ -73,6 +73,8 @@ namespace IREmitter {
|
|||
ICmpCRUnsigned, // CR for unsigned int compare
|
||||
ICmpEq, // One if equal, zero otherwise
|
||||
ICmpUgt, // One if op1 > op2, zero otherwise
|
||||
ICmpSgt, // One if op1 > op2, zero otherwise
|
||||
ICmpSle, // Opposite of sgt
|
||||
// Memory store operators
|
||||
Store8,
|
||||
Store16,
|
||||
|
@ -87,6 +89,11 @@ namespace IREmitter {
|
|||
CInt16,
|
||||
CInt32,
|
||||
|
||||
// Funny PPC "branches"
|
||||
SystemCall,
|
||||
RFIExit,
|
||||
InterpreterBranch,
|
||||
|
||||
// "Opcode" representing a register too far away to
|
||||
// reference directly; this is a size optimization
|
||||
Tramp,
|
||||
|
@ -159,6 +166,7 @@ namespace IREmitter {
|
|||
InstLoc FoldShl(InstLoc Op1, InstLoc Op2);
|
||||
InstLoc FoldShrl(InstLoc Op1, InstLoc Op2);
|
||||
InstLoc FoldXor(InstLoc Op1, InstLoc Op2);
|
||||
InstLoc FoldBranchCond(InstLoc Op1, InstLoc Op2);
|
||||
|
||||
InstLoc FoldInterpreterFallback(InstLoc Op1, InstLoc Op2);
|
||||
|
||||
|
@ -167,6 +175,8 @@ namespace IREmitter {
|
|||
unsigned extra = 0);
|
||||
InstLoc FoldBiOp(unsigned OpCode, InstLoc Op1, InstLoc Op2);
|
||||
|
||||
unsigned ComputeKnownZeroBits(InstLoc I);
|
||||
|
||||
public:
|
||||
InstLoc EmitIntConst(unsigned value);
|
||||
InstLoc EmitStoreLink(InstLoc val) {
|
||||
|
@ -241,6 +251,12 @@ namespace IREmitter {
|
|||
InstLoc EmitICmpUgt(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(ICmpUgt, op1, op2);
|
||||
}
|
||||
InstLoc EmitICmpSgt(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(ICmpSgt, op1, op2);
|
||||
}
|
||||
InstLoc EmitICmpSle(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(ICmpSle, op1, op2);
|
||||
}
|
||||
InstLoc EmitLoad8(InstLoc op1) {
|
||||
return FoldUOp(Load8, op1);
|
||||
}
|
||||
|
@ -274,9 +290,18 @@ namespace IREmitter {
|
|||
InstLoc EmitInterpreterFallback(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(InterpreterFallback, op1, op2);
|
||||
}
|
||||
InstLoc EmitInterpreterBranch() {
|
||||
return FoldZeroOp(InterpreterBranch, 0);
|
||||
}
|
||||
InstLoc EmitStoreCarry(InstLoc op1) {
|
||||
return FoldUOp(StoreCarry, op1);
|
||||
}
|
||||
InstLoc EmitSystemCall(InstLoc pc) {
|
||||
return FoldUOp(SystemCall, pc);
|
||||
}
|
||||
InstLoc EmitRFIExit() {
|
||||
return FoldZeroOp(RFIExit, 0);
|
||||
}
|
||||
|
||||
void StartBackPass() { curReadPtr = &InstList[InstList.size()]; }
|
||||
void StartForwardPass() { curReadPtr = &InstList[0]; }
|
||||
|
|
|
@ -43,36 +43,12 @@ using namespace Gen;
|
|||
|
||||
void Jit64::sc(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITBranchOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
|
||||
gpr.Flush(FLUSH_ALL);
|
||||
fpr.Flush(FLUSH_ALL);
|
||||
WriteExceptionExit(EXCEPTION_SYSCALL);
|
||||
ibuild.EmitSystemCall(ibuild.EmitIntConst(js.compilerPC));
|
||||
}
|
||||
|
||||
void Jit64::rfi(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITBranchOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
|
||||
gpr.Flush(FLUSH_ALL);
|
||||
fpr.Flush(FLUSH_ALL);
|
||||
//Bits SRR1[0, 5-9, 16-23, 25-27, 30-31] are placed into the corresponding bits of the MSR.
|
||||
//MSR[13] is set to 0.
|
||||
const u32 mask = 0x87C0FF73;
|
||||
// MSR = (MSR & ~mask) | (SRR1 & mask);
|
||||
MOV(32, R(EAX), M(&MSR));
|
||||
MOV(32, R(ECX), M(&SRR1));
|
||||
AND(32, R(EAX), Imm32(~mask));
|
||||
AND(32, R(ECX), Imm32(mask));
|
||||
OR(32, R(EAX), R(ECX));
|
||||
// MSR &= 0xFFFDFFFF; //TODO: VERIFY
|
||||
AND(32, R(EAX), Imm32(0xFFFDFFFF));
|
||||
MOV(32, M(&MSR), R(EAX));
|
||||
// NPC = SRR0;
|
||||
MOV(32, R(EAX), M(&SRR0));
|
||||
WriteRfiExitDestInEAX();
|
||||
ibuild.EmitRFIExit();
|
||||
}
|
||||
|
||||
void Jit64::bx(UGeckoInstruction inst)
|
||||
|
@ -89,9 +65,6 @@ using namespace Gen;
|
|||
ibuild.EmitBranchUncond(ibuild.EmitIntConst(destination));
|
||||
}
|
||||
|
||||
// TODO - optimize to hell and beyond
|
||||
// TODO - make nice easy to optimize special cases for the most common
|
||||
// variants of this instruction.
|
||||
void Jit64::bcx(UGeckoInstruction inst)
|
||||
{
|
||||
if (inst.LK)
|
||||
|
@ -148,45 +121,10 @@ using namespace Gen;
|
|||
|
||||
void Jit64::bcctrx(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITBranchOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
|
||||
gpr.Flush(FLUSH_ALL);
|
||||
fpr.Flush(FLUSH_ALL);
|
||||
|
||||
// bool fastway = true;
|
||||
|
||||
if ((inst.BO & 16) == 0)
|
||||
{
|
||||
PanicAlert("Bizarro bcctrx %08x, not supported.", inst.hex);
|
||||
_assert_msg_(DYNA_REC, 0, "Bizarro bcctrx");
|
||||
/*
|
||||
fastway = false;
|
||||
MOV(32, M(&PC), Imm32(js.compilerPC+4));
|
||||
MOV(32, R(EAX), M(&CR));
|
||||
XOR(32, R(ECX), R(ECX));
|
||||
AND(32, R(EAX), Imm32(0x80000000 >> inst.BI));
|
||||
|
||||
CCFlags branch;
|
||||
if(inst.BO & 8)
|
||||
branch = CC_NZ;
|
||||
else
|
||||
branch = CC_Z;
|
||||
*/
|
||||
// TODO(ector): Why is this commented out?
|
||||
//SETcc(branch, R(ECX));
|
||||
// check for EBX
|
||||
//TEST(32, R(ECX), R(ECX));
|
||||
//linkEnd = J_CC(branch);
|
||||
Default(inst);
|
||||
ibuild.EmitInterpreterBranch();
|
||||
return;
|
||||
}
|
||||
// NPC = CTR & 0xfffffffc;
|
||||
MOV(32, R(EAX), M(&CTR));
|
||||
if (inst.LK)
|
||||
MOV(32, M(&LR), Imm32(js.compilerPC + 4)); // LR = PC + 4;
|
||||
AND(32, R(EAX), Imm32(0xFFFFFFFC));
|
||||
WriteExitDestInEAX(0);
|
||||
}
|
||||
|
||||
|
||||
void Jit64::bclrx(UGeckoInstruction inst)
|
||||
{
|
||||
|
@ -195,6 +133,7 @@ using namespace Gen;
|
|||
return;
|
||||
}
|
||||
Default(inst);
|
||||
ibuild.EmitInterpreterBranch();
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
|
||||
void Jit64::reg_imm(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
int d = inst.RD, a = inst.RA, s = inst.RS;
|
||||
IREmitter::InstLoc val, test, c;
|
||||
switch (inst.OPCD)
|
||||
|
@ -103,6 +104,7 @@
|
|||
|
||||
void Jit64::cmpXX(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
IREmitter::InstLoc lhs, rhs, res;
|
||||
lhs = ibuild.EmitLoadGReg(inst.RA);
|
||||
if (inst.OPCD == 31) {
|
||||
|
@ -125,6 +127,7 @@
|
|||
|
||||
void Jit64::orx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB);
|
||||
val = ibuild.EmitOr(ibuild.EmitLoadGReg(inst.RS), val);
|
||||
ibuild.EmitStoreGReg(val, inst.RA);
|
||||
|
@ -136,6 +139,7 @@
|
|||
// m_GPR[_inst.RA] = m_GPR[_inst.RS] ^ m_GPR[_inst.RB];
|
||||
void Jit64::xorx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB);
|
||||
val = ibuild.EmitXor(ibuild.EmitLoadGReg(inst.RS), val);
|
||||
ibuild.EmitStoreGReg(val, inst.RA);
|
||||
|
@ -145,6 +149,7 @@
|
|||
|
||||
void Jit64::andx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB);
|
||||
val = ibuild.EmitAnd(ibuild.EmitLoadGReg(inst.RS), val);
|
||||
ibuild.EmitStoreGReg(val, inst.RA);
|
||||
|
@ -154,6 +159,7 @@
|
|||
|
||||
void Jit64::extsbx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS);
|
||||
val = ibuild.EmitSExt8(val);
|
||||
ibuild.EmitStoreGReg(val, inst.RA);
|
||||
|
@ -163,6 +169,7 @@
|
|||
|
||||
void Jit64::extshx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS);
|
||||
val = ibuild.EmitSExt16(val);
|
||||
ibuild.EmitStoreGReg(val, inst.RA);
|
||||
|
@ -226,6 +233,7 @@
|
|||
|
||||
void Jit64::subfx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
if (inst.OE) PanicAlert("OE: subfx");
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB);
|
||||
val = ibuild.EmitSub(val, ibuild.EmitLoadGReg(inst.RA));
|
||||
|
@ -236,6 +244,7 @@
|
|||
|
||||
void Jit64::mulli(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RA);
|
||||
val = ibuild.EmitMul(val, ibuild.EmitIntConst(inst.SIMM_16));
|
||||
ibuild.EmitStoreGReg(val, inst.RD);
|
||||
|
@ -243,6 +252,7 @@
|
|||
|
||||
void Jit64::mullwx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB);
|
||||
val = ibuild.EmitMul(ibuild.EmitLoadGReg(inst.RA), val);
|
||||
ibuild.EmitStoreGReg(val, inst.RD);
|
||||
|
@ -316,6 +326,7 @@
|
|||
|
||||
void Jit64::addx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB);
|
||||
val = ibuild.EmitAdd(ibuild.EmitLoadGReg(inst.RA), val);
|
||||
ibuild.EmitStoreGReg(val, inst.RD);
|
||||
|
@ -355,6 +366,7 @@
|
|||
|
||||
void Jit64::rlwinmx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
unsigned mask = Helper_Mask(inst.MB, inst.ME);
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS);
|
||||
val = ibuild.EmitRol(val, ibuild.EmitIntConst(inst.SH));
|
||||
|
@ -367,6 +379,7 @@
|
|||
|
||||
void Jit64::rlwimix(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
unsigned mask = Helper_Mask(inst.MB, inst.ME);
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS);
|
||||
val = ibuild.EmitRol(val, ibuild.EmitIntConst(inst.SH));
|
||||
|
@ -412,6 +425,7 @@
|
|||
|
||||
void Jit64::negx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RA);
|
||||
val = ibuild.EmitSub(ibuild.EmitIntConst(0), val);
|
||||
ibuild.EmitStoreGReg(val, inst.RD);
|
||||
|
@ -421,6 +435,7 @@
|
|||
|
||||
void Jit64::srwx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS),
|
||||
samt = ibuild.EmitLoadGReg(inst.RB),
|
||||
corr;
|
||||
|
@ -438,6 +453,7 @@
|
|||
|
||||
void Jit64::slwx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS),
|
||||
samt = ibuild.EmitLoadGReg(inst.RB),
|
||||
corr;
|
||||
|
@ -455,6 +471,7 @@
|
|||
|
||||
void Jit64::srawx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
// FIXME: We can do a lot better on 64-bit
|
||||
IREmitter::InstLoc val, samt, mask, mask2, test;
|
||||
val = ibuild.EmitLoadGReg(inst.RS);
|
||||
|
@ -476,6 +493,7 @@
|
|||
|
||||
void Jit64::srawix(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS), test;
|
||||
val = ibuild.EmitSarl(val, ibuild.EmitIntConst(inst.SH));
|
||||
ibuild.EmitStoreGReg(val, inst.RA);
|
||||
|
|
|
@ -41,6 +41,7 @@
|
|||
|
||||
void Jit64::lbzx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB);
|
||||
if (inst.RA)
|
||||
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
|
||||
|
@ -49,6 +50,7 @@
|
|||
|
||||
void Jit64::lwzx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB);
|
||||
if (inst.RA)
|
||||
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
|
||||
|
@ -57,6 +59,7 @@
|
|||
|
||||
void Jit64::lhax(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB);
|
||||
if (inst.RA)
|
||||
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
|
||||
|
@ -67,6 +70,7 @@
|
|||
|
||||
void Jit64::lXz(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16);
|
||||
if (inst.RA)
|
||||
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
|
||||
|
@ -83,6 +87,7 @@
|
|||
|
||||
void Jit64::lha(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
IREmitter::InstLoc addr =
|
||||
ibuild.EmitIntConst((s32)(s16)inst.SIMM_16);
|
||||
if (inst.RA)
|
||||
|
@ -94,6 +99,7 @@
|
|||
|
||||
void Jit64::lwzux(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB);
|
||||
if (inst.RA) {
|
||||
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
|
||||
|
@ -108,7 +114,6 @@
|
|||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
|
||||
MOV(32, R(EAX), gpr.R(inst.RB));
|
||||
if (inst.RA)
|
||||
ADD(32, R(EAX), gpr.R(inst.RA));
|
||||
|
@ -126,6 +131,7 @@
|
|||
|
||||
void Jit64::stX(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16),
|
||||
value = ibuild.EmitLoadGReg(inst.RS);
|
||||
if (inst.RA)
|
||||
|
@ -143,6 +149,7 @@
|
|||
|
||||
void Jit64::stXx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB),
|
||||
value = ibuild.EmitLoadGReg(inst.RS);
|
||||
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
|
||||
|
@ -160,39 +167,10 @@
|
|||
// A few games use these heavily in video codecs.
|
||||
void Jit64::lmw(UGeckoInstruction inst)
|
||||
{
|
||||
#ifdef _M_IX86
|
||||
Default(inst); return;
|
||||
#else
|
||||
gpr.FlushLockX(ECX);
|
||||
MOV(32, R(EAX), Imm32((u32)(s32)inst.SIMM_16));
|
||||
if (inst.RA)
|
||||
ADD(32, R(EAX), gpr.R(inst.RA));
|
||||
for (int i = inst.RD; i < 32; i++)
|
||||
{
|
||||
MOV(32, R(ECX), MComplex(EBX, EAX, SCALE_1, (i - inst.RD) * 4));
|
||||
BSWAP(32, ECX);
|
||||
gpr.LoadToX64(i, false, true);
|
||||
MOV(32, gpr.R(i), R(ECX));
|
||||
}
|
||||
gpr.UnlockAllX();
|
||||
#endif
|
||||
}
|
||||
|
||||
void Jit64::stmw(UGeckoInstruction inst)
|
||||
{
|
||||
#ifdef _M_IX86
|
||||
Default(inst); return;
|
||||
#else
|
||||
gpr.FlushLockX(ECX);
|
||||
MOV(32, R(EAX), Imm32((u32)(s32)inst.SIMM_16));
|
||||
if (inst.RA)
|
||||
ADD(32, R(EAX), gpr.R(inst.RA));
|
||||
for (int i = inst.RD; i < 32; i++)
|
||||
{
|
||||
MOV(32, R(ECX), gpr.R(i));
|
||||
BSWAP(32, ECX);
|
||||
MOV(32, MComplex(EBX, EAX, SCALE_1, (i - inst.RD) * 4), R(ECX));
|
||||
}
|
||||
gpr.UnlockAllX();
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -30,11 +30,12 @@
|
|||
#include "JitCache.h"
|
||||
#include "JitRegCache.h"
|
||||
|
||||
#define INSTRUCTION_START
|
||||
//#define INSTRUCTION_START Default(inst); return;
|
||||
#define INSTRUCTION_START
|
||||
|
||||
void Jit64::mtspr(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
|
||||
switch(iIndex) {
|
||||
case SPR_LR:
|
||||
|
@ -44,7 +45,6 @@
|
|||
ibuild.EmitStoreCTR(ibuild.EmitLoadGReg(inst.RD));
|
||||
return;
|
||||
default:
|
||||
printf("mtspr case %d", iIndex);
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
|
@ -52,6 +52,7 @@
|
|||
|
||||
void Jit64::mfspr(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
|
||||
switch (iIndex)
|
||||
{
|
||||
|
@ -62,7 +63,6 @@
|
|||
ibuild.EmitStoreGReg(ibuild.EmitLoadCTR(), inst.RD);
|
||||
return;
|
||||
default:
|
||||
printf("mfspr case %d", iIndex);
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
|
@ -82,6 +82,7 @@
|
|||
|
||||
void Jit64::mfmsr(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
ibuild.EmitStoreGReg(ibuild.EmitLoadMSR(), inst.RD);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue