More JIT WIP work.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1727 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
magumagu9 2008-12-31 21:06:00 +00:00
parent 2e58a5bef0
commit f419223908
6 changed files with 378 additions and 241 deletions

View File

@ -81,12 +81,10 @@ edge over the current JIT mostly due to the fast memory optimization.
TODO (in no particular order):
Floating-point JIT (both paired and unpaired): currently falls back
to the interpreter
Improve register allocator to deal with long live intervals.
Optimize conditions for conditional branches.
Inter-block dead register elimination, especially for CR0.
Inter-block inlining.
Track down a few correctness bugs.
Known zero bits: eliminate unneeded AND instructions for rlwinm/rlwimi
Implement a select instruction
64-bit compat (it should only be a few tweaks to register allocation and
the load/store code)
@ -95,7 +93,7 @@ Scheduling to reduce register pressure: PowerPC compilers like to push
x86 processors, which are short on registers and extremely good at
instruction reordering.
Common subexpression elimination
Optimize load of sum using complex addressing
Optimize load of sum using complex addressing (partially implemented)
Implement idle-skipping
*/
@ -180,6 +178,40 @@ InstLoc IRBuilder::EmitTriOp(unsigned Opcode, InstLoc Op1, InstLoc Op2,
}
#endif
unsigned IRBuilder::ComputeKnownZeroBits(InstLoc I) {
switch (getOpcode(*I)) {
case Load8:
return 0xFFFFFF00;
case Or:
return ComputeKnownZeroBits(getOp1(I)) &
ComputeKnownZeroBits(getOp2(I));
case And:
return ComputeKnownZeroBits(getOp1(I)) |
ComputeKnownZeroBits(getOp2(I));
case Shl:
if (isImm(*getOp2(I))) {
unsigned samt = GetImmValue(getOp2(I)) & 31;
return (ComputeKnownZeroBits(getOp1(I)) << samt) |
~(-1U << samt);
}
return 0;
case Shrl:
if (isImm(*getOp2(I))) {
unsigned samt = GetImmValue(getOp2(I)) & 31;
return (ComputeKnownZeroBits(getOp1(I)) >> samt) |
~(-1U >> samt);
}
return 0;
case Rol:
if (isImm(*getOp2(I))) {
return _rotl(ComputeKnownZeroBits(getOp1(I)),
GetImmValue(getOp2(I)));
}
default:
return 0;
}
}
InstLoc IRBuilder::FoldZeroOp(unsigned Opcode, unsigned extra) {
if (Opcode == LoadGReg) {
// Reg load folding: if we already loaded the value,
@ -275,6 +307,9 @@ InstLoc IRBuilder::FoldAnd(InstLoc Op1, InstLoc Op2) {
return FoldShrl(getOp1(Op1), EmitIntConst(shiftAmt2));
}
}
if (!(~ComputeKnownZeroBits(Op1) & ~GetImmValue(Op2))) {
return Op1;
}
}
if (Op1 == Op2) return Op1;
@ -348,6 +383,35 @@ InstLoc IRBuilder::FoldRol(InstLoc Op1, InstLoc Op2) {
return EmitBiOp(Rol, Op1, Op2);
}
InstLoc IRBuilder::FoldBranchCond(InstLoc Op1, InstLoc Op2) {
if (getOpcode(*Op1) == And &&
isImm(*getOp2(Op1)) &&
getOpcode(*getOp1(Op1)) == ICmpCRSigned) {
unsigned branchValue = GetImmValue(getOp2(Op1));
if (branchValue == 2)
return FoldBranchCond(EmitICmpEq(getOp1(getOp1(Op1)),
getOp2(getOp1(Op1))), Op2);
}
if (getOpcode(*Op1) == Xor &&
isImm(*getOp2(Op1))) {
InstLoc XOp1 = getOp1(Op1);
unsigned branchValue = GetImmValue(getOp2(Op1));
if (getOpcode(*XOp1) == And &&
isImm(*getOp2(XOp1)) &&
getOpcode(*getOp1(XOp1)) == ICmpCRSigned) {
unsigned innerBranchValue =
GetImmValue(getOp2(XOp1));
if (branchValue == innerBranchValue) {
if (branchValue == 4) {
return FoldBranchCond(EmitICmpSle(getOp1(getOp1(XOp1)),
getOp2(getOp1(XOp1))), Op2);
}
}
}
}
return EmitBiOp(BranchCond, Op1, Op2);
}
InstLoc IRBuilder::FoldInterpreterFallback(InstLoc Op1, InstLoc Op2) {
for (unsigned i = 0; i < 32; i++) {
GRegCache[i] = 0;
@ -371,6 +435,7 @@ InstLoc IRBuilder::FoldBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2) {
case Shl: return FoldShl(Op1, Op2);
case Shrl: return FoldShrl(Op1, Op2);
case Rol: return FoldRol(Op1, Op2);
case BranchCond: return FoldBranchCond(Op1, Op2);
case InterpreterFallback: return FoldInterpreterFallback(Op1, Op2);
default: return EmitBiOp(Opcode, Op1, Op2);
}
@ -473,9 +538,12 @@ static X64Reg regFindFreeReg(RegInfo& RI) {
if (RI.regs[EBX] == 0) return EBX;
if (RI.regs[EDX] == 0) return EDX;
if (RI.regs[EAX] == 0) return EAX;
// ECX is scratch; never allocate it!
regSpill(RI, EDI);
return EDI;
// ECX is scratch, so we don't allocate it
static X64Reg regs[] = {EDI, ESI, EBP, EBX, EDX, EAX};
static unsigned nextReg = 0;
X64Reg reg = regs[nextReg++ % 6];
regSpill(RI, reg);
return reg;
}
static OpArg regLocForInst(RegInfo& RI, InstLoc I) {
@ -532,6 +600,15 @@ static void regSpillCallerSaved(RegInfo& RI) {
regSpill(RI, EAX);
}
static X64Reg regUReg(RegInfo& RI, InstLoc I) {
if (RI.IInfo[I - RI.FirstI] & 4 &&
regLocForInst(RI, getOp1(I)).IsSimpleReg()) {
return regLocForInst(RI, getOp1(I)).GetSimpleReg();
}
X64Reg reg = regFindFreeReg(RI);
return reg;
}
static X64Reg regBinLHSReg(RegInfo& RI, InstLoc I) {
if (RI.IInfo[I - RI.FirstI] & 4) {
return regEnsureInReg(RI, getOp1(I));
@ -559,12 +636,25 @@ static void regEmitBinInst(RegInfo& RI, InstLoc I,
}
static void regEmitMemLoad(RegInfo& RI, InstLoc I, unsigned Size) {
X64Reg reg = regBinLHSReg(RI, I);
X64Reg reg;
unsigned offset;
if (getOpcode(*getOp1(I)) == Add && isImm(*getOp2(getOp1(I)))) {
offset = RI.Build->GetImmValue(getOp2(getOp1(I)));
reg = regBinLHSReg(RI, getOp1(I));
if (RI.IInfo[I - RI.FirstI] & 4)
regClearInst(RI, getOp1(getOp1(I)));
} else {
offset = 0;
reg = regBinLHSReg(RI, I);
if (RI.IInfo[I - RI.FirstI] & 4)
regClearInst(RI, getOp1(I));
}
if (RI.UseProfile) {
unsigned curLoad = ProfiledLoads[RI.numProfiledLoads++];
if (!(curLoad & 0x0C000000)) {
if (regReadUse(RI, I)) {
unsigned addr = (u32)Memory::base - (curLoad & 0xC0000000);
unsigned addr = (u32)Memory::base - (curLoad & 0xC0000000) + offset;
RI.Jit->MOVZX(32, Size, reg, MDisp(reg, addr));
RI.Jit->BSWAP(Size, reg);
RI.regs[reg] = I;
@ -572,6 +662,9 @@ static void regEmitMemLoad(RegInfo& RI, InstLoc I, unsigned Size) {
return;
}
}
if (offset) {
RI.Jit->ADD(32, R(reg), Imm32(offset));
}
if (RI.MakeProfile) {
RI.Jit->MOV(32, M(&ProfiledLoads[RI.numProfiledLoads++]), R(reg));
}
@ -638,7 +731,6 @@ static void regEmitShiftInst(RegInfo& RI, InstLoc I,
RI.regs[reg] = I;
return;
}
// FIXME: prevent regBinLHSReg from finding ecx!
RI.Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I)));
(RI.Jit->*op)(32, R(reg), R(ECX));
RI.regs[reg] = I;
@ -695,10 +787,8 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
RI.Build = ibuild;
RI.UseProfile = UseProfile;
RI.MakeProfile = !RI.UseProfile;
unsigned bs = Jit->js.blockStart;
// Pass to compute liveness
// Note that despite this marking, we never materialize immediates;
// on x86, they almost always fold into the instruction, and it's at
// best a code-size reduction in the cases where they don't.
ibuild->StartBackPass();
for (unsigned index = RI.IInfo.size() - 1; index != -1U; --index) {
InstLoc I = ibuild->ReadBackward();
@ -719,6 +809,9 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
case BlockEnd:
case BlockStart:
case InterpreterFallback:
case SystemCall:
case RFIExit:
case InterpreterBranch:
// No liveness effects
break;
case Tramp:
@ -732,13 +825,18 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
if (thisUsed)
regMarkUse(RI, I, getOp1(I), 1);
break;
case StoreCR:
case StoreCarry:
case Load8:
case Load16:
case Load32:
if (getOpcode(*getOp1(I)) == Add &&
isImm(*getOp2(getOp1(I)))) {
regMarkUse(RI, I, getOp1(getOp1(I)), 1);
break;
}
case StoreGReg:
case StoreCR:
case StoreLink:
case StoreCarry:
case StoreCTR:
case StoreMSR:
regMarkUse(RI, I, getOp1(I), 1);
@ -757,6 +855,8 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
case ICmpCRSigned:
case ICmpEq:
case ICmpUgt:
case ICmpSle:
case ICmpSgt:
if (thisUsed) {
regMarkUse(RI, I, getOp1(I), 1);
if (!isImm(*getOp2(I)))
@ -773,12 +873,20 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
if (!isImm(*getOp1(I)))
regMarkUse(RI, I, getOp1(I), 1);
break;
case BranchCond:
regMarkUse(RI, I, getOp1(I), 1);
case BranchCond: {
unsigned CondOpcode = getOpcode(*getOp1(I));
if ((CondOpcode == ICmpEq ||
CondOpcode == ICmpSle) &&
isImm(*getOp2(getOp1(I)))) {
regMarkUse(RI, I, getOp1(getOp1(I)), 1);
} else {
regMarkUse(RI, I, getOp1(I), 1);
}
if (!isImm(*getOp2(I)))
regMarkUse(RI, I, getOp2(I), 2);
break;
}
}
}
ibuild->StartForwardPass();
@ -902,7 +1010,7 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
}
case SExt16: {
if (!thisUsed) break;
X64Reg reg = regFindFreeReg(RI);
X64Reg reg = regUReg(RI, I);
Jit->MOVSX(32, 16, reg, regLocForInst(RI, getOp1(I)));
RI.regs[reg] = I;
break;
@ -987,6 +1095,15 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
RI.regs[reg] = I;
break;
}
case ICmpSle: {
if (!thisUsed) break;
regEmitCmp(RI, I);
Jit->SETcc(CC_LE, R(ECX)); // Caution: SETCC uses 8-bit regs!
X64Reg reg = regFindFreeReg(RI);
Jit->MOVZX(32, 8, reg, R(ECX));
RI.regs[reg] = I;
break;
}
case ICmpCRUnsigned: {
if (!thisUsed) break;
regEmitCmp(RI, I);
@ -1035,16 +1152,72 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
case BlockEnd:
break;
case BranchCond: {
Jit->CMP(32, regLocForInst(RI, getOp1(I)), Imm8(0));
FixupBranch cont = Jit->J_CC(CC_NZ);
regWriteExit(RI, getOp2(I));
Jit->SetJumpTarget(cont);
if (getOpcode(*getOp1(I)) == ICmpEq &&
isImm(*getOp2(getOp1(I)))) {
Jit->CMP(32, regLocForInst(RI, getOp1(getOp1(I))),
Imm32(RI.Build->GetImmValue(getOp2(getOp1(I)))));
FixupBranch cont = Jit->J_CC(CC_Z);
regWriteExit(RI, getOp2(I));
Jit->SetJumpTarget(cont);
if (RI.IInfo[I - RI.FirstI] & 4)
regClearInst(RI, getOp1(getOp1(I)));
} else if (getOpcode(*getOp1(I)) == ICmpSle &&
isImm(*getOp2(getOp1(I)))) {
Jit->CMP(32, regLocForInst(RI, getOp1(getOp1(I))),
Imm32(RI.Build->GetImmValue(getOp2(getOp1(I)))));
FixupBranch cont = Jit->J_CC(CC_LE);
regWriteExit(RI, getOp2(I));
Jit->SetJumpTarget(cont);
if (RI.IInfo[I - RI.FirstI] & 4)
regClearInst(RI, getOp1(getOp1(I)));
} else {
Jit->CMP(32, regLocForInst(RI, getOp1(I)), Imm8(0));
FixupBranch cont = Jit->J_CC(CC_NZ);
regWriteExit(RI, getOp2(I));
Jit->SetJumpTarget(cont);
if (RI.IInfo[I - RI.FirstI] & 4)
regClearInst(RI, getOp1(I));
}
if (RI.IInfo[I - RI.FirstI] & 8)
regClearInst(RI, getOp2(I));
break;
}
case BranchUncond: {
regWriteExit(RI, getOp1(I));
break;
}
case SystemCall: {
unsigned InstLoc = ibuild->GetImmValue(getOp1(I));
Jit->Cleanup();
Jit->OR(32, M(&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_SYSCALL));
Jit->MOV(32, M(&PC), Imm32(InstLoc + 4));
Jit->JMP(asm_routines.testExceptions, true);
break;
}
case InterpreterBranch: {
Jit->MOV(32, R(EAX), M(&NPC));
Jit->WriteExitDestInEAX(0);
break;
}
case RFIExit: {
// Bits SRR1[0, 5-9, 16-23, 25-27, 30-31] are placed
// into the corresponding bits of the MSR.
// MSR[13] is set to 0.
const u32 mask = 0x87C0FF73;
// MSR = (MSR & ~mask) | (SRR1 & mask);
Jit->MOV(32, R(EAX), M(&MSR));
Jit->MOV(32, R(ECX), M(&SRR1));
Jit->AND(32, R(EAX), Imm32(~mask));
Jit->AND(32, R(ECX), Imm32(mask));
Jit->OR(32, R(EAX), R(ECX));
// MSR &= 0xFFFDFFFF; //TODO: VERIFY
Jit->AND(32, R(EAX), Imm32(0xFFFDFFFF));
Jit->MOV(32, M(&MSR), R(EAX));
// NPC = SRR0;
Jit->MOV(32, R(EAX), M(&SRR0));
Jit->WriteRfiExitDestInEAX();
break;
}
case Tramp: {
if (!thisUsed) break;
// FIXME: Optimize!
@ -1061,7 +1234,11 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
PanicAlert("Unknown JIT instruction; aborting!");
exit(1);
}
if (getOpcode(*I) != Tramp) {
if (getOpcode(*I) != Tramp &&
getOpcode(*I) != BranchCond &&
getOpcode(*I) != Load8 &&
getOpcode(*I) != Load16 &&
getOpcode(*I) != Load32) {
if (RI.IInfo[I - RI.FirstI] & 4)
regClearInst(RI, getOp1(I));
if (RI.IInfo[I - RI.FirstI] & 8)
@ -1075,10 +1252,9 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
}
}
printf("Block: %x, numspills %d\n", Jit->js.blockStart, RI.numSpills);
if (RI.numSpills)
printf("Block: %x, numspills %d\n", Jit->js.blockStart, RI.numSpills);
Jit->MOV(32, R(EAX), M(&NPC));
Jit->WriteRfiExitDestInEAX();
Jit->UD2();
}

View File

@ -73,6 +73,8 @@ namespace IREmitter {
ICmpCRUnsigned, // CR for unsigned int compare
ICmpEq, // One if equal, zero otherwise
ICmpUgt, // One if op1 > op2, zero otherwise
ICmpSgt, // One if op1 > op2, zero otherwise
ICmpSle, // Opposite of sgt
// Memory store operators
Store8,
Store16,
@ -87,6 +89,11 @@ namespace IREmitter {
CInt16,
CInt32,
// Funny PPC "branches"
SystemCall,
RFIExit,
InterpreterBranch,
// "Opcode" representing a register too far away to
// reference directly; this is a size optimization
Tramp,
@ -159,6 +166,7 @@ namespace IREmitter {
InstLoc FoldShl(InstLoc Op1, InstLoc Op2);
InstLoc FoldShrl(InstLoc Op1, InstLoc Op2);
InstLoc FoldXor(InstLoc Op1, InstLoc Op2);
InstLoc FoldBranchCond(InstLoc Op1, InstLoc Op2);
InstLoc FoldInterpreterFallback(InstLoc Op1, InstLoc Op2);
@ -167,6 +175,8 @@ namespace IREmitter {
unsigned extra = 0);
InstLoc FoldBiOp(unsigned OpCode, InstLoc Op1, InstLoc Op2);
unsigned ComputeKnownZeroBits(InstLoc I);
public:
InstLoc EmitIntConst(unsigned value);
InstLoc EmitStoreLink(InstLoc val) {
@ -241,6 +251,12 @@ namespace IREmitter {
InstLoc EmitICmpUgt(InstLoc op1, InstLoc op2) {
return FoldBiOp(ICmpUgt, op1, op2);
}
InstLoc EmitICmpSgt(InstLoc op1, InstLoc op2) {
return FoldBiOp(ICmpSgt, op1, op2);
}
InstLoc EmitICmpSle(InstLoc op1, InstLoc op2) {
return FoldBiOp(ICmpSle, op1, op2);
}
InstLoc EmitLoad8(InstLoc op1) {
return FoldUOp(Load8, op1);
}
@ -274,9 +290,18 @@ namespace IREmitter {
InstLoc EmitInterpreterFallback(InstLoc op1, InstLoc op2) {
return FoldBiOp(InterpreterFallback, op1, op2);
}
InstLoc EmitInterpreterBranch() {
return FoldZeroOp(InterpreterBranch, 0);
}
InstLoc EmitStoreCarry(InstLoc op1) {
return FoldUOp(StoreCarry, op1);
}
InstLoc EmitSystemCall(InstLoc pc) {
return FoldUOp(SystemCall, pc);
}
InstLoc EmitRFIExit() {
return FoldZeroOp(RFIExit, 0);
}
void StartBackPass() { curReadPtr = &InstList[InstList.size()]; }
void StartForwardPass() { curReadPtr = &InstList[0]; }

View File

@ -43,36 +43,12 @@ using namespace Gen;
void Jit64::sc(UGeckoInstruction inst)
{
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITBranchOff)
{Default(inst); return;} // turn off from debugger
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
WriteExceptionExit(EXCEPTION_SYSCALL);
ibuild.EmitSystemCall(ibuild.EmitIntConst(js.compilerPC));
}
void Jit64::rfi(UGeckoInstruction inst)
{
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITBranchOff)
{Default(inst); return;} // turn off from debugger
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
//Bits SRR1[0, 5-9, 16-23, 25-27, 30-31] are placed into the corresponding bits of the MSR.
//MSR[13] is set to 0.
const u32 mask = 0x87C0FF73;
// MSR = (MSR & ~mask) | (SRR1 & mask);
MOV(32, R(EAX), M(&MSR));
MOV(32, R(ECX), M(&SRR1));
AND(32, R(EAX), Imm32(~mask));
AND(32, R(ECX), Imm32(mask));
OR(32, R(EAX), R(ECX));
// MSR &= 0xFFFDFFFF; //TODO: VERIFY
AND(32, R(EAX), Imm32(0xFFFDFFFF));
MOV(32, M(&MSR), R(EAX));
// NPC = SRR0;
MOV(32, R(EAX), M(&SRR0));
WriteRfiExitDestInEAX();
ibuild.EmitRFIExit();
}
void Jit64::bx(UGeckoInstruction inst)
@ -89,9 +65,6 @@ using namespace Gen;
ibuild.EmitBranchUncond(ibuild.EmitIntConst(destination));
}
// TODO - optimize to hell and beyond
// TODO - make nice easy to optimize special cases for the most common
// variants of this instruction.
void Jit64::bcx(UGeckoInstruction inst)
{
if (inst.LK)
@ -148,46 +121,11 @@ using namespace Gen;
void Jit64::bcctrx(UGeckoInstruction inst)
{
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITBranchOff)
{Default(inst); return;} // turn off from debugger
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
// bool fastway = true;
if ((inst.BO & 16) == 0)
{
PanicAlert("Bizarro bcctrx %08x, not supported.", inst.hex);
_assert_msg_(DYNA_REC, 0, "Bizarro bcctrx");
/*
fastway = false;
MOV(32, M(&PC), Imm32(js.compilerPC+4));
MOV(32, R(EAX), M(&CR));
XOR(32, R(ECX), R(ECX));
AND(32, R(EAX), Imm32(0x80000000 >> inst.BI));
CCFlags branch;
if(inst.BO & 8)
branch = CC_NZ;
else
branch = CC_Z;
*/
// TODO(ector): Why is this commented out?
//SETcc(branch, R(ECX));
// check for EBX
//TEST(32, R(ECX), R(ECX));
//linkEnd = J_CC(branch);
}
// NPC = CTR & 0xfffffffc;
MOV(32, R(EAX), M(&CTR));
if (inst.LK)
MOV(32, M(&LR), Imm32(js.compilerPC + 4)); // LR = PC + 4;
AND(32, R(EAX), Imm32(0xFFFFFFFC));
WriteExitDestInEAX(0);
Default(inst);
ibuild.EmitInterpreterBranch();
return;
}
void Jit64::bclrx(UGeckoInstruction inst)
{
if (inst.hex == 0x4e800020) {
@ -195,6 +133,7 @@ using namespace Gen;
return;
}
Default(inst);
ibuild.EmitInterpreterBranch();
return;
}

View File

@ -25,7 +25,7 @@
#include "JitRegCache.h"
#include "JitAsm.h"
// #define INSTRUCTION_START Default(inst); return;
//#define INSTRUCTION_START Default(inst); return;
#define INSTRUCTION_START
static void ComputeRC(IREmitter::IRBuilder& ibuild,
@ -37,6 +37,7 @@
void Jit64::reg_imm(UGeckoInstruction inst)
{
INSTRUCTION_START
int d = inst.RD, a = inst.RA, s = inst.RS;
IREmitter::InstLoc val, test, c;
switch (inst.OPCD)
@ -103,6 +104,7 @@
void Jit64::cmpXX(UGeckoInstruction inst)
{
INSTRUCTION_START
IREmitter::InstLoc lhs, rhs, res;
lhs = ibuild.EmitLoadGReg(inst.RA);
if (inst.OPCD == 31) {
@ -125,6 +127,7 @@
void Jit64::orx(UGeckoInstruction inst)
{
INSTRUCTION_START
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB);
val = ibuild.EmitOr(ibuild.EmitLoadGReg(inst.RS), val);
ibuild.EmitStoreGReg(val, inst.RA);
@ -136,6 +139,7 @@
// m_GPR[_inst.RA] = m_GPR[_inst.RS] ^ m_GPR[_inst.RB];
void Jit64::xorx(UGeckoInstruction inst)
{
INSTRUCTION_START
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB);
val = ibuild.EmitXor(ibuild.EmitLoadGReg(inst.RS), val);
ibuild.EmitStoreGReg(val, inst.RA);
@ -145,6 +149,7 @@
void Jit64::andx(UGeckoInstruction inst)
{
INSTRUCTION_START
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB);
val = ibuild.EmitAnd(ibuild.EmitLoadGReg(inst.RS), val);
ibuild.EmitStoreGReg(val, inst.RA);
@ -154,6 +159,7 @@
void Jit64::extsbx(UGeckoInstruction inst)
{
INSTRUCTION_START
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS);
val = ibuild.EmitSExt8(val);
ibuild.EmitStoreGReg(val, inst.RA);
@ -163,6 +169,7 @@
void Jit64::extshx(UGeckoInstruction inst)
{
INSTRUCTION_START
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS);
val = ibuild.EmitSExt16(val);
ibuild.EmitStoreGReg(val, inst.RA);
@ -226,6 +233,7 @@
void Jit64::subfx(UGeckoInstruction inst)
{
INSTRUCTION_START
if (inst.OE) PanicAlert("OE: subfx");
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB);
val = ibuild.EmitSub(val, ibuild.EmitLoadGReg(inst.RA));
@ -236,6 +244,7 @@
void Jit64::mulli(UGeckoInstruction inst)
{
INSTRUCTION_START
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RA);
val = ibuild.EmitMul(val, ibuild.EmitIntConst(inst.SIMM_16));
ibuild.EmitStoreGReg(val, inst.RD);
@ -243,6 +252,7 @@
void Jit64::mullwx(UGeckoInstruction inst)
{
INSTRUCTION_START
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB);
val = ibuild.EmitMul(ibuild.EmitLoadGReg(inst.RA), val);
ibuild.EmitStoreGReg(val, inst.RD);
@ -316,6 +326,7 @@
void Jit64::addx(UGeckoInstruction inst)
{
INSTRUCTION_START
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB);
val = ibuild.EmitAdd(ibuild.EmitLoadGReg(inst.RA), val);
ibuild.EmitStoreGReg(val, inst.RD);
@ -355,6 +366,7 @@
void Jit64::rlwinmx(UGeckoInstruction inst)
{
INSTRUCTION_START
unsigned mask = Helper_Mask(inst.MB, inst.ME);
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS);
val = ibuild.EmitRol(val, ibuild.EmitIntConst(inst.SH));
@ -367,6 +379,7 @@
void Jit64::rlwimix(UGeckoInstruction inst)
{
INSTRUCTION_START
unsigned mask = Helper_Mask(inst.MB, inst.ME);
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS);
val = ibuild.EmitRol(val, ibuild.EmitIntConst(inst.SH));
@ -412,6 +425,7 @@
void Jit64::negx(UGeckoInstruction inst)
{
INSTRUCTION_START
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RA);
val = ibuild.EmitSub(ibuild.EmitIntConst(0), val);
ibuild.EmitStoreGReg(val, inst.RD);
@ -421,6 +435,7 @@
void Jit64::srwx(UGeckoInstruction inst)
{
INSTRUCTION_START
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS),
samt = ibuild.EmitLoadGReg(inst.RB),
corr;
@ -438,6 +453,7 @@
void Jit64::slwx(UGeckoInstruction inst)
{
INSTRUCTION_START
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS),
samt = ibuild.EmitLoadGReg(inst.RB),
corr;
@ -455,6 +471,7 @@
void Jit64::srawx(UGeckoInstruction inst)
{
INSTRUCTION_START
// FIXME: We can do a lot better on 64-bit
IREmitter::InstLoc val, samt, mask, mask2, test;
val = ibuild.EmitLoadGReg(inst.RS);
@ -476,6 +493,7 @@
void Jit64::srawix(UGeckoInstruction inst)
{
INSTRUCTION_START
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS), test;
val = ibuild.EmitSarl(val, ibuild.EmitIntConst(inst.SH));
ibuild.EmitStoreGReg(val, inst.RA);

View File

@ -36,163 +36,141 @@
#include "JitAsm.h"
#include "JitRegCache.h"
// #define INSTRUCTION_START Default(inst); return;
//#define INSTRUCTION_START Default(inst); return;
#define INSTRUCTION_START
void Jit64::lbzx(UGeckoInstruction inst)
{
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB);
if (inst.RA)
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
ibuild.EmitStoreGReg(ibuild.EmitLoad8(addr), inst.RD);
}
void Jit64::lwzx(UGeckoInstruction inst)
{
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB);
if (inst.RA)
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
ibuild.EmitStoreGReg(ibuild.EmitLoad32(addr), inst.RD);
}
void Jit64::lhax(UGeckoInstruction inst)
{
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB);
if (inst.RA)
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
IREmitter::InstLoc val = ibuild.EmitLoad16(addr);
val = ibuild.EmitSExt16(val);
ibuild.EmitStoreGReg(val, inst.RD);
}
void Jit64::lXz(UGeckoInstruction inst)
{
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16);
if (inst.RA)
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
IREmitter::InstLoc val;
switch (inst.OPCD)
{
case 32: val = ibuild.EmitLoad32(addr); break; //lwz
case 40: val = ibuild.EmitLoad16(addr); break; //lhz
case 34: val = ibuild.EmitLoad8(addr); break; //lbz
default: PanicAlert("lXz: invalid access size");
}
ibuild.EmitStoreGReg(val, inst.RD);
}
void Jit64::lha(UGeckoInstruction inst)
{
IREmitter::InstLoc addr =
ibuild.EmitIntConst((s32)(s16)inst.SIMM_16);
if (inst.RA)
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
IREmitter::InstLoc val = ibuild.EmitLoad16(addr);
val = ibuild.EmitSExt16(val);
ibuild.EmitStoreGReg(val, inst.RD);
}
void Jit64::lwzux(UGeckoInstruction inst)
{
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB);
if (inst.RA) {
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
ibuild.EmitStoreGReg(addr, inst.RA);
}
ibuild.EmitStoreGReg(ibuild.EmitLoad32(addr), inst.RD);
}
// Zero cache line.
void Jit64::dcbz(UGeckoInstruction inst)
{
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff)
{Default(inst); return;} // turn off from debugger
INSTRUCTION_START;
MOV(32, R(EAX), gpr.R(inst.RB));
if (inst.RA)
ADD(32, R(EAX), gpr.R(inst.RA));
AND(32, R(EAX), Imm32(~31));
XORPD(XMM0, R(XMM0));
#ifdef _M_X64
MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0);
MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0);
#else
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
MOVAPS(MDisp(EAX, (u32)Memory::base), XMM0);
MOVAPS(MDisp(EAX, (u32)Memory::base + 16), XMM0);
#endif
}
void Jit64::stX(UGeckoInstruction inst)
{
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16),
value = ibuild.EmitLoadGReg(inst.RS);
if (inst.RA)
addr = ibuild.EmitAdd(ibuild.EmitLoadGReg(inst.RA), addr);
if (inst.OPCD & 1)
ibuild.EmitStoreGReg(addr, inst.RA);
switch (inst.OPCD & ~1)
{
case 36: ibuild.EmitStore32(value, addr); break; //stw
case 44: ibuild.EmitStore16(value, addr); break; //sth
case 38: ibuild.EmitStore8(value, addr); break; //stb
default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return;
}
}
void Jit64::stXx(UGeckoInstruction inst)
{
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB),
value = ibuild.EmitLoadGReg(inst.RS);
void Jit64::lbzx(UGeckoInstruction inst)
{
INSTRUCTION_START
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB);
if (inst.RA)
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
if (inst.SUBOP10 & 32)
ibuild.EmitStoreGReg(addr, inst.RA);
switch (inst.SUBOP10 & ~32)
{
case 151: ibuild.EmitStore32(value, addr); break; //stw
case 407: ibuild.EmitStore16(value, addr); break; //sth
case 215: ibuild.EmitStore8(value, addr); break; //stb
default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return;
}
ibuild.EmitStoreGReg(ibuild.EmitLoad8(addr), inst.RD);
}
void Jit64::lwzx(UGeckoInstruction inst)
{
INSTRUCTION_START
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB);
if (inst.RA)
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
ibuild.EmitStoreGReg(ibuild.EmitLoad32(addr), inst.RD);
}
void Jit64::lhax(UGeckoInstruction inst)
{
INSTRUCTION_START
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB);
if (inst.RA)
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
IREmitter::InstLoc val = ibuild.EmitLoad16(addr);
val = ibuild.EmitSExt16(val);
ibuild.EmitStoreGReg(val, inst.RD);
}
void Jit64::lXz(UGeckoInstruction inst)
{
INSTRUCTION_START
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16);
if (inst.RA)
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
IREmitter::InstLoc val;
switch (inst.OPCD)
{
case 32: val = ibuild.EmitLoad32(addr); break; //lwz
case 40: val = ibuild.EmitLoad16(addr); break; //lhz
case 34: val = ibuild.EmitLoad8(addr); break; //lbz
default: PanicAlert("lXz: invalid access size");
}
ibuild.EmitStoreGReg(val, inst.RD);
}
void Jit64::lha(UGeckoInstruction inst)
{
INSTRUCTION_START
IREmitter::InstLoc addr =
ibuild.EmitIntConst((s32)(s16)inst.SIMM_16);
if (inst.RA)
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
IREmitter::InstLoc val = ibuild.EmitLoad16(addr);
val = ibuild.EmitSExt16(val);
ibuild.EmitStoreGReg(val, inst.RD);
}
void Jit64::lwzux(UGeckoInstruction inst)
{
INSTRUCTION_START
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB);
if (inst.RA) {
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
ibuild.EmitStoreGReg(addr, inst.RA);
}
ibuild.EmitStoreGReg(ibuild.EmitLoad32(addr), inst.RD);
}
// Zero cache line.
void Jit64::dcbz(UGeckoInstruction inst)
{
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff)
{Default(inst); return;} // turn off from debugger
INSTRUCTION_START;
MOV(32, R(EAX), gpr.R(inst.RB));
if (inst.RA)
ADD(32, R(EAX), gpr.R(inst.RA));
AND(32, R(EAX), Imm32(~31));
XORPD(XMM0, R(XMM0));
#ifdef _M_X64
MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0);
MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0);
#else
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
MOVAPS(MDisp(EAX, (u32)Memory::base), XMM0);
MOVAPS(MDisp(EAX, (u32)Memory::base + 16), XMM0);
#endif
}
void Jit64::stX(UGeckoInstruction inst)
{
INSTRUCTION_START
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16),
value = ibuild.EmitLoadGReg(inst.RS);
if (inst.RA)
addr = ibuild.EmitAdd(ibuild.EmitLoadGReg(inst.RA), addr);
if (inst.OPCD & 1)
ibuild.EmitStoreGReg(addr, inst.RA);
switch (inst.OPCD & ~1)
{
case 36: ibuild.EmitStore32(value, addr); break; //stw
case 44: ibuild.EmitStore16(value, addr); break; //sth
case 38: ibuild.EmitStore8(value, addr); break; //stb
default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return;
}
}
void Jit64::stXx(UGeckoInstruction inst)
{
INSTRUCTION_START
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB),
value = ibuild.EmitLoadGReg(inst.RS);
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
if (inst.SUBOP10 & 32)
ibuild.EmitStoreGReg(addr, inst.RA);
switch (inst.SUBOP10 & ~32)
{
case 151: ibuild.EmitStore32(value, addr); break; //stw
case 407: ibuild.EmitStore16(value, addr); break; //sth
case 215: ibuild.EmitStore8(value, addr); break; //stb
default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return;
}
}
// A few games use these heavily in video codecs.
void Jit64::lmw(UGeckoInstruction inst)
{
#ifdef _M_IX86
Default(inst); return;
#else
gpr.FlushLockX(ECX);
MOV(32, R(EAX), Imm32((u32)(s32)inst.SIMM_16));
if (inst.RA)
ADD(32, R(EAX), gpr.R(inst.RA));
for (int i = inst.RD; i < 32; i++)
{
MOV(32, R(ECX), MComplex(EBX, EAX, SCALE_1, (i - inst.RD) * 4));
BSWAP(32, ECX);
gpr.LoadToX64(i, false, true);
MOV(32, gpr.R(i), R(ECX));
}
gpr.UnlockAllX();
#endif
}
void Jit64::stmw(UGeckoInstruction inst)
{
#ifdef _M_IX86
Default(inst); return;
#else
gpr.FlushLockX(ECX);
MOV(32, R(EAX), Imm32((u32)(s32)inst.SIMM_16));
if (inst.RA)
ADD(32, R(EAX), gpr.R(inst.RA));
for (int i = inst.RD; i < 32; i++)
{
MOV(32, R(ECX), gpr.R(i));
BSWAP(32, ECX);
MOV(32, MComplex(EBX, EAX, SCALE_1, (i - inst.RD) * 4), R(ECX));
}
gpr.UnlockAllX();
#endif
}

View File

@ -30,11 +30,12 @@
#include "JitCache.h"
#include "JitRegCache.h"
//#define INSTRUCTION_START Default(inst); return;
#define INSTRUCTION_START
// #define INSTRUCTION_START Default(inst); return;
void Jit64::mtspr(UGeckoInstruction inst)
{
INSTRUCTION_START
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
switch(iIndex) {
case SPR_LR:
@ -44,7 +45,6 @@
ibuild.EmitStoreCTR(ibuild.EmitLoadGReg(inst.RD));
return;
default:
printf("mtspr case %d", iIndex);
Default(inst);
return;
}
@ -52,6 +52,7 @@
void Jit64::mfspr(UGeckoInstruction inst)
{
INSTRUCTION_START
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
switch (iIndex)
{
@ -62,7 +63,6 @@
ibuild.EmitStoreGReg(ibuild.EmitLoadCTR(), inst.RD);
return;
default:
printf("mfspr case %d", iIndex);
Default(inst);
return;
}
@ -82,6 +82,7 @@
void Jit64::mfmsr(UGeckoInstruction inst)
{
INSTRUCTION_START
ibuild.EmitStoreGReg(ibuild.EmitLoadMSR(), inst.RD);
}