And a bit more JIT WIP work: improved code generation for integer
load/store, and outlining the start of FP support. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1729 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
bd3f468c37
commit
0367e7ee4d
|
@ -78,14 +78,30 @@ on the test I've been working on (which bounded by JIT performance and doesn't
|
||||||
use any floating-point), it's roughly 25% faster than the current JIT, with the
|
use any floating-point), it's roughly 25% faster than the current JIT, with the
|
||||||
edge over the current JIT mostly due to the fast memory optimization.
|
edge over the current JIT mostly due to the fast memory optimization.
|
||||||
|
|
||||||
|
Update on perf:
|
||||||
|
I've been doing a bit more tweaking for a small perf improvement (in the
|
||||||
|
range of 5-10%). That said, it's getting to the point where I'm simply
|
||||||
|
not seeing potential for improvements to codegen, at least for long,
|
||||||
|
straightforward blocks. For one long block that's at the top of my samples,
|
||||||
|
I've managed to get the bloat% (number of instructions compared to PPC
|
||||||
|
equivalent) down to 225%, and I can't really see it going down much further.
|
||||||
|
It looks like the most promising paths to further improvement for pure
|
||||||
|
integer code are more aggresively combining blocks and dead condition
|
||||||
|
register elimination, which should be very helpful for small blocks.
|
||||||
|
|
||||||
TODO (in no particular order):
|
TODO (in no particular order):
|
||||||
Floating-point JIT (both paired and unpaired): currently falls back
|
Floating-point JIT (both paired and unpaired)
|
||||||
to the interpreter
|
(very large win for FP code, no effect for integer code)
|
||||||
|
Inter-block dead condition register elimination (Likely significant win
|
||||||
|
combined with optimized conditions)
|
||||||
Optimize conditions for conditional branches.
|
Optimize conditions for conditional branches.
|
||||||
Inter-block dead register elimination, especially for CR0.
|
General dead register elimination.
|
||||||
Inter-block inlining.
|
Inter-block inlining.
|
||||||
Track down a few correctness bugs.
|
Track down a few correctness bugs (I think there's something wrong
|
||||||
Implement a select instruction
|
with my branches, but I haven't been able to figure it out).
|
||||||
|
Specialized slw/srw/sraw; I think there are some tricks that could
|
||||||
|
have a non-trivial effect, and there are significantly shorter
|
||||||
|
implementations for 64-bit involving abusing 64-bit shifts.
|
||||||
64-bit compat (it should only be a few tweaks to register allocation and
|
64-bit compat (it should only be a few tweaks to register allocation and
|
||||||
the load/store code)
|
the load/store code)
|
||||||
Scheduling to reduce register pressure: PowerPC compilers like to push
|
Scheduling to reduce register pressure: PowerPC compilers like to push
|
||||||
|
@ -93,8 +109,16 @@ Scheduling to reduce register pressure: PowerPC compilers like to push
|
||||||
x86 processors, which are short on registers and extremely good at
|
x86 processors, which are short on registers and extremely good at
|
||||||
instruction reordering.
|
instruction reordering.
|
||||||
Common subexpression elimination
|
Common subexpression elimination
|
||||||
Optimize load of sum using complex addressing (partially implemented)
|
Optimize load/store of sum using complex addressing (partially implemented)
|
||||||
Implement idle-skipping
|
Implement idle-skipping
|
||||||
|
Loop optimizations (loop-carried registers, LICM); not sure how much
|
||||||
|
this will help on top of dead register elimination
|
||||||
|
Fold loads (both register and memory) into arithmetic operations
|
||||||
|
Code refactoring/cleanup
|
||||||
|
Investigate performance of the JIT itself; this doesn't affect
|
||||||
|
framerates significantly, but it does take a visible amount
|
||||||
|
of time for a complicated piece of code like a video decoder
|
||||||
|
to compile.
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@ -492,6 +516,9 @@ struct RegInfo {
|
||||||
exitNumber = 0;
|
exitNumber = 0;
|
||||||
MakeProfile = UseProfile = false;
|
MakeProfile = UseProfile = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
RegInfo(RegInfo&); // DO NOT IMPLEMENT
|
||||||
};
|
};
|
||||||
|
|
||||||
static void regMarkUse(RegInfo& R, InstLoc I, InstLoc Op, unsigned OpNum) {
|
static void regMarkUse(RegInfo& R, InstLoc I, InstLoc Op, unsigned OpNum) {
|
||||||
|
@ -635,48 +662,119 @@ static void regEmitBinInst(RegInfo& RI, InstLoc I,
|
||||||
RI.regs[reg] = I;
|
RI.regs[reg] = I;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void regEmitMemLoad(RegInfo& RI, InstLoc I, unsigned Size) {
|
// Mark and calculation routines for profiled load/store addresses
|
||||||
X64Reg reg;
|
// Could be extended to unprofiled addresses.
|
||||||
unsigned offset;
|
// FIXME: Finish/activate!
|
||||||
|
static void regMarkMemAddress(RegInfo& RI, InstLoc I, InstLoc AI, unsigned OpNum) {
|
||||||
|
if (isImm(*AI)) {
|
||||||
|
unsigned addr = RI.Build->GetImmValue(AI);
|
||||||
|
if (Memory::IsRAMAddress(addr))
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (getOpcode(*AI) == Add && isImm(*getOp2(AI))) {
|
||||||
|
regMarkUse(RI, I, getOp1(AI), OpNum);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
regMarkUse(RI, I, AI, OpNum);
|
||||||
|
}
|
||||||
|
|
||||||
if (getOpcode(*getOp1(I)) == Add && isImm(*getOp2(getOp1(I)))) {
|
static void regClearDeadMemAddress(RegInfo& RI, InstLoc I, InstLoc AI, unsigned OpNum) {
|
||||||
offset = RI.Build->GetImmValue(getOp2(getOp1(I)));
|
if (!(RI.IInfo[I - RI.FirstI] & (2 << OpNum)))
|
||||||
reg = regBinLHSReg(RI, getOp1(I));
|
return;
|
||||||
if (RI.IInfo[I - RI.FirstI] & 4)
|
if (isImm(*AI)) {
|
||||||
regClearInst(RI, getOp1(getOp1(I)));
|
unsigned addr = RI.Build->GetImmValue(AI);
|
||||||
} else {
|
if (Memory::IsRAMAddress(addr)) {
|
||||||
offset = 0;
|
|
||||||
reg = regBinLHSReg(RI, I);
|
|
||||||
if (RI.IInfo[I - RI.FirstI] & 4)
|
|
||||||
regClearInst(RI, getOp1(I));
|
|
||||||
}
|
|
||||||
if (RI.UseProfile) {
|
|
||||||
unsigned curLoad = ProfiledLoads[RI.numProfiledLoads++];
|
|
||||||
if (!(curLoad & 0x0C000000)) {
|
|
||||||
if (regReadUse(RI, I)) {
|
|
||||||
unsigned addr = (u32)Memory::base - (curLoad & 0xC0000000) + offset;
|
|
||||||
RI.Jit->MOVZX(32, Size, reg, MDisp(reg, addr));
|
|
||||||
RI.Jit->BSWAP(Size, reg);
|
|
||||||
RI.regs[reg] = I;
|
|
||||||
}
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (offset) {
|
InstLoc AddrBase;
|
||||||
RI.Jit->ADD(32, R(reg), Imm32(offset));
|
if (getOpcode(*AI) == Add && isImm(*getOp2(AI))) {
|
||||||
|
AddrBase = getOp1(AI);
|
||||||
|
} else {
|
||||||
|
AddrBase = AI;
|
||||||
}
|
}
|
||||||
|
regClearInst(RI, AddrBase);
|
||||||
|
}
|
||||||
|
|
||||||
|
static OpArg regBuildMemAddress(RegInfo& RI, InstLoc I, InstLoc AI,
|
||||||
|
unsigned OpNum, unsigned Size, X64Reg* dest,
|
||||||
|
bool Profiled,
|
||||||
|
unsigned ProfileOffset = 0) {
|
||||||
|
if (isImm(*AI)) {
|
||||||
|
unsigned addr = RI.Build->GetImmValue(AI);
|
||||||
|
if (Memory::IsRAMAddress(addr)) {
|
||||||
|
if (dest)
|
||||||
|
*dest = regFindFreeReg(RI);
|
||||||
|
if (Profiled)
|
||||||
|
return M((void*)((u32)Memory::base + (addr & Memory::MEMVIEW32_MASK)));
|
||||||
|
return M((void*)addr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
unsigned offset;
|
||||||
|
InstLoc AddrBase;
|
||||||
|
if (getOpcode(*AI) == Add && isImm(*getOp2(AI))) {
|
||||||
|
offset = RI.Build->GetImmValue(getOp2(AI));
|
||||||
|
AddrBase = getOp1(AI);
|
||||||
|
} else {
|
||||||
|
offset = 0;
|
||||||
|
AddrBase = AI;
|
||||||
|
}
|
||||||
|
X64Reg baseReg;
|
||||||
|
if (RI.IInfo[I - RI.FirstI] & (2 << OpNum)) {
|
||||||
|
baseReg = regEnsureInReg(RI, AddrBase);
|
||||||
|
regClearInst(RI, AddrBase);
|
||||||
|
if (dest)
|
||||||
|
*dest = baseReg;
|
||||||
|
} else if (dest) {
|
||||||
|
X64Reg reg = regFindFreeReg(RI);
|
||||||
|
if (!regLocForInst(RI, AddrBase).IsSimpleReg()) {
|
||||||
|
RI.Jit->MOV(32, R(reg), regLocForInst(RI, AddrBase));
|
||||||
|
baseReg = reg;
|
||||||
|
} else {
|
||||||
|
baseReg = regLocForInst(RI, AddrBase).GetSimpleReg();
|
||||||
|
}
|
||||||
|
*dest = reg;
|
||||||
|
} else {
|
||||||
|
baseReg = regEnsureInReg(RI, AddrBase);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Profiled) {
|
||||||
|
return MDisp(baseReg, (u32)Memory::base + offset + ProfileOffset);
|
||||||
|
}
|
||||||
|
return MDisp(baseReg, offset);
|
||||||
|
}
|
||||||
|
// end FIXME
|
||||||
|
|
||||||
|
static void regEmitMemLoad(RegInfo& RI, InstLoc I, unsigned Size) {
|
||||||
|
if (RI.UseProfile) {
|
||||||
|
unsigned curLoad = ProfiledLoads[RI.numProfiledLoads++];
|
||||||
|
if (!(curLoad & 0x0C000000)) {
|
||||||
|
X64Reg reg;
|
||||||
|
OpArg addr = regBuildMemAddress(RI, I, getOp1(I), 1,
|
||||||
|
Size, ®, true,
|
||||||
|
-(curLoad & 0xC0000000));
|
||||||
|
RI.Jit->MOVZX(32, Size, reg, addr);
|
||||||
|
RI.Jit->BSWAP(Size, reg);
|
||||||
|
if (regReadUse(RI, I))
|
||||||
|
RI.regs[reg] = I;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
X64Reg reg;
|
||||||
|
OpArg addr = regBuildMemAddress(RI, I, getOp1(I), 1, Size, ®, false);
|
||||||
|
RI.Jit->LEA(32, ECX, addr);
|
||||||
if (RI.MakeProfile) {
|
if (RI.MakeProfile) {
|
||||||
RI.Jit->MOV(32, M(&ProfiledLoads[RI.numProfiledLoads++]), R(reg));
|
RI.Jit->MOV(32, M(&ProfiledLoads[RI.numProfiledLoads++]), R(ECX));
|
||||||
}
|
}
|
||||||
RI.Jit->TEST(32, R(reg), Imm32(0x0C000000));
|
RI.Jit->TEST(32, R(ECX), Imm32(0x0C000000));
|
||||||
FixupBranch argh = RI.Jit->J_CC(CC_Z);
|
FixupBranch argh = RI.Jit->J_CC(CC_Z);
|
||||||
if (reg != EAX)
|
if (reg != EAX)
|
||||||
RI.Jit->PUSH(32, R(EAX));
|
RI.Jit->PUSH(32, R(EAX));
|
||||||
switch (Size)
|
switch (Size)
|
||||||
{
|
{
|
||||||
case 32: RI.Jit->ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), reg); break;
|
case 32: RI.Jit->ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), ECX); break;
|
||||||
case 16: RI.Jit->ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U16, 1), reg); break;
|
case 16: RI.Jit->ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U16, 1), ECX); break;
|
||||||
case 8: RI.Jit->ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U8, 1), reg); break;
|
case 8: RI.Jit->ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U8, 1), ECX); break;
|
||||||
}
|
}
|
||||||
if (reg != EAX) {
|
if (reg != EAX) {
|
||||||
RI.Jit->MOV(32, R(reg), R(EAX));
|
RI.Jit->MOV(32, R(reg), R(EAX));
|
||||||
|
@ -684,41 +782,87 @@ static void regEmitMemLoad(RegInfo& RI, InstLoc I, unsigned Size) {
|
||||||
}
|
}
|
||||||
FixupBranch arg2 = RI.Jit->J();
|
FixupBranch arg2 = RI.Jit->J();
|
||||||
RI.Jit->SetJumpTarget(argh);
|
RI.Jit->SetJumpTarget(argh);
|
||||||
RI.Jit->UnsafeLoadRegToReg(reg, reg, Size, 0, false);
|
RI.Jit->UnsafeLoadRegToReg(ECX, reg, Size, 0, false);
|
||||||
RI.Jit->SetJumpTarget(arg2);
|
RI.Jit->SetJumpTarget(arg2);
|
||||||
if (regReadUse(RI, I))
|
if (regReadUse(RI, I))
|
||||||
RI.regs[reg] = I;
|
RI.regs[reg] = I;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static OpArg regSwappedImmForConst(RegInfo& RI, InstLoc I, unsigned Size) {
|
||||||
|
unsigned imm = RI.Build->GetImmValue(I);
|
||||||
|
if (Size == 32) {
|
||||||
|
imm = Common::swap32(imm);
|
||||||
|
return Imm32(imm);
|
||||||
|
} else if (Size == 16) {
|
||||||
|
imm = Common::swap16(imm);
|
||||||
|
return Imm16(imm);
|
||||||
|
} else {
|
||||||
|
return Imm8(imm);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static OpArg regImmForConst(RegInfo& RI, InstLoc I, unsigned Size) {
|
||||||
|
unsigned imm = RI.Build->GetImmValue(I);
|
||||||
|
if (Size == 32) {
|
||||||
|
return Imm32(imm);
|
||||||
|
} else if (Size == 16) {
|
||||||
|
return Imm16(imm);
|
||||||
|
} else {
|
||||||
|
return Imm8(imm);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void regEmitMemStore(RegInfo& RI, InstLoc I, unsigned Size) {
|
static void regEmitMemStore(RegInfo& RI, InstLoc I, unsigned Size) {
|
||||||
if (RI.UseProfile) {
|
if (RI.UseProfile) {
|
||||||
unsigned curStore = ProfiledLoads[RI.numProfiledLoads++];
|
unsigned curStore = ProfiledLoads[RI.numProfiledLoads++];
|
||||||
if (!(curStore & 0x0C000000)) {
|
if (!(curStore & 0x0C000000)) {
|
||||||
X64Reg reg = regEnsureInReg(RI, getOp2(I));
|
OpArg addr = regBuildMemAddress(RI, I, getOp2(I), 2,
|
||||||
|
Size, 0, true,
|
||||||
|
-(curStore & 0xC0000000));
|
||||||
|
if (isImm(*getOp1(I))) {
|
||||||
|
RI.Jit->MOV(Size, addr, regSwappedImmForConst(RI, getOp1(I), Size));
|
||||||
|
} else {
|
||||||
RI.Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I)));
|
RI.Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I)));
|
||||||
RI.Jit->BSWAP(Size, ECX);
|
RI.Jit->BSWAP(Size, ECX);
|
||||||
unsigned addr = (u32)Memory::base - (curStore & 0xC0000000);
|
RI.Jit->MOV(Size, addr, R(ECX));
|
||||||
RI.Jit->MOV(Size, MDisp(reg, addr), R(ECX));
|
}
|
||||||
|
if (RI.IInfo[I - RI.FirstI] & 4)
|
||||||
|
regClearInst(RI, getOp1(I));
|
||||||
return;
|
return;
|
||||||
} else if ((curStore & 0xFFFFF000) == 0xCC008000) {
|
} else if ((curStore & 0xFFFFF000) == 0xCC008000) {
|
||||||
regSpill(RI, EAX);
|
regSpill(RI, EAX);
|
||||||
|
if (isImm(*getOp1(I))) {
|
||||||
|
RI.Jit->MOV(Size, R(ECX), regSwappedImmForConst(RI, getOp1(I), Size));
|
||||||
|
} else {
|
||||||
RI.Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I)));
|
RI.Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I)));
|
||||||
RI.Jit->BSWAP(Size, ECX);
|
RI.Jit->BSWAP(Size, ECX);
|
||||||
|
}
|
||||||
RI.Jit->MOV(32, R(EAX), M(&GPFifo::m_gatherPipeCount));
|
RI.Jit->MOV(32, R(EAX), M(&GPFifo::m_gatherPipeCount));
|
||||||
RI.Jit->MOV(Size, MDisp(EAX, (u32)GPFifo::m_gatherPipe), R(ECX));
|
RI.Jit->MOV(Size, MDisp(EAX, (u32)GPFifo::m_gatherPipe), R(ECX));
|
||||||
RI.Jit->ADD(32, R(EAX), Imm8(Size >> 3));
|
RI.Jit->ADD(32, R(EAX), Imm8(Size >> 3));
|
||||||
RI.Jit->MOV(32, M(&GPFifo::m_gatherPipeCount), R(EAX));
|
RI.Jit->MOV(32, M(&GPFifo::m_gatherPipeCount), R(EAX));
|
||||||
RI.Jit->js.fifoBytesThisBlock += Size >> 3;
|
RI.Jit->js.fifoBytesThisBlock += Size >> 3;
|
||||||
|
if (RI.IInfo[I - RI.FirstI] & 4)
|
||||||
|
regClearInst(RI, getOp1(I));
|
||||||
|
//regBuildMemAddress(RI, I, getOp2(I), 2, Size, 0, false);
|
||||||
|
regClearDeadMemAddress(RI, I, getOp2(I), 2);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
OpArg addr = regBuildMemAddress(RI, I, getOp2(I), 2, Size, 0, false);
|
||||||
|
RI.Jit->LEA(32, ECX, addr);
|
||||||
regSpill(RI, EAX);
|
regSpill(RI, EAX);
|
||||||
|
if (isImm(*getOp1(I))) {
|
||||||
|
RI.Jit->MOV(Size, R(EAX), regImmForConst(RI, getOp1(I), Size));
|
||||||
|
} else {
|
||||||
RI.Jit->MOV(32, R(EAX), regLocForInst(RI, getOp1(I)));
|
RI.Jit->MOV(32, R(EAX), regLocForInst(RI, getOp1(I)));
|
||||||
RI.Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I)));
|
}
|
||||||
if (RI.MakeProfile) {
|
if (RI.MakeProfile) {
|
||||||
RI.Jit->MOV(32, M(&ProfiledLoads[RI.numProfiledLoads++]), R(ECX));
|
RI.Jit->MOV(32, M(&ProfiledLoads[RI.numProfiledLoads++]), R(ECX));
|
||||||
}
|
}
|
||||||
RI.Jit->SafeWriteRegToReg(EAX, ECX, Size, 0);
|
RI.Jit->SafeWriteRegToReg(EAX, ECX, Size, 0);
|
||||||
|
if (RI.IInfo[I - RI.FirstI] & 4)
|
||||||
|
regClearInst(RI, getOp1(I));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void regEmitShiftInst(RegInfo& RI, InstLoc I,
|
static void regEmitShiftInst(RegInfo& RI, InstLoc I,
|
||||||
|
@ -787,7 +931,6 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
||||||
RI.Build = ibuild;
|
RI.Build = ibuild;
|
||||||
RI.UseProfile = UseProfile;
|
RI.UseProfile = UseProfile;
|
||||||
RI.MakeProfile = !RI.UseProfile;
|
RI.MakeProfile = !RI.UseProfile;
|
||||||
unsigned bs = Jit->js.blockStart;
|
|
||||||
// Pass to compute liveness
|
// Pass to compute liveness
|
||||||
ibuild->StartBackPass();
|
ibuild->StartBackPass();
|
||||||
for (unsigned index = RI.IInfo.size() - 1; index != -1U; --index) {
|
for (unsigned index = RI.IInfo.size() - 1; index != -1U; --index) {
|
||||||
|
@ -825,20 +968,20 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
||||||
if (thisUsed)
|
if (thisUsed)
|
||||||
regMarkUse(RI, I, getOp1(I), 1);
|
regMarkUse(RI, I, getOp1(I), 1);
|
||||||
break;
|
break;
|
||||||
case StoreCR:
|
|
||||||
case StoreCarry:
|
|
||||||
case Load8:
|
case Load8:
|
||||||
case Load16:
|
case Load16:
|
||||||
case Load32:
|
case Load32:
|
||||||
if (getOpcode(*getOp1(I)) == Add &&
|
regMarkMemAddress(RI, I, getOp1(I), 1);
|
||||||
isImm(*getOp2(getOp1(I)))) {
|
break;
|
||||||
regMarkUse(RI, I, getOp1(getOp1(I)), 1);
|
case StoreCR:
|
||||||
|
case StoreCarry:
|
||||||
|
regMarkUse(RI, I, getOp1(I), 1);
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
case StoreGReg:
|
case StoreGReg:
|
||||||
case StoreLink:
|
case StoreLink:
|
||||||
case StoreCTR:
|
case StoreCTR:
|
||||||
case StoreMSR:
|
case StoreMSR:
|
||||||
|
if (!isImm(*getOp1(I)))
|
||||||
regMarkUse(RI, I, getOp1(I), 1);
|
regMarkUse(RI, I, getOp1(I), 1);
|
||||||
break;
|
break;
|
||||||
case Add:
|
case Add:
|
||||||
|
@ -866,8 +1009,9 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
||||||
case Store8:
|
case Store8:
|
||||||
case Store16:
|
case Store16:
|
||||||
case Store32:
|
case Store32:
|
||||||
|
if (!isImm(*getOp1(I)))
|
||||||
regMarkUse(RI, I, getOp1(I), 1);
|
regMarkUse(RI, I, getOp1(I), 1);
|
||||||
regMarkUse(RI, I, getOp2(I), 2);
|
regMarkMemAddress(RI, I, getOp2(I), 2);
|
||||||
break;
|
break;
|
||||||
case BranchUncond:
|
case BranchUncond:
|
||||||
if (!isImm(*getOp1(I)))
|
if (!isImm(*getOp1(I)))
|
||||||
|
@ -1238,7 +1382,11 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
||||||
getOpcode(*I) != BranchCond &&
|
getOpcode(*I) != BranchCond &&
|
||||||
getOpcode(*I) != Load8 &&
|
getOpcode(*I) != Load8 &&
|
||||||
getOpcode(*I) != Load16 &&
|
getOpcode(*I) != Load16 &&
|
||||||
getOpcode(*I) != Load32) {
|
getOpcode(*I) != Load32 &&
|
||||||
|
getOpcode(*I) != Store8 &&
|
||||||
|
getOpcode(*I) != Store16 &&
|
||||||
|
getOpcode(*I) != Store32 &&
|
||||||
|
1) {
|
||||||
if (RI.IInfo[I - RI.FirstI] & 4)
|
if (RI.IInfo[I - RI.FirstI] & 4)
|
||||||
regClearInst(RI, getOp1(I));
|
regClearInst(RI, getOp1(I));
|
||||||
if (RI.IInfo[I - RI.FirstI] & 8)
|
if (RI.IInfo[I - RI.FirstI] & 8)
|
||||||
|
@ -1252,7 +1400,7 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (RI.numSpills)
|
if (UseProfile && RI.numSpills)
|
||||||
printf("Block: %x, numspills %d\n", Jit->js.blockStart, RI.numSpills);
|
printf("Block: %x, numspills %d\n", Jit->js.blockStart, RI.numSpills);
|
||||||
|
|
||||||
Jit->UD2();
|
Jit->UD2();
|
||||||
|
|
|
@ -80,6 +80,69 @@ namespace IREmitter {
|
||||||
Store16,
|
Store16,
|
||||||
Store32,
|
Store32,
|
||||||
BranchCond,
|
BranchCond,
|
||||||
|
// Floating-point
|
||||||
|
// There are three floating-point formats: single, double,
|
||||||
|
// and packed. For any operation where the format of the
|
||||||
|
// operand isn't known, the ForceTo* operations are used;
|
||||||
|
// these are folded into the appropriate conversion
|
||||||
|
// (or no conversion) depending on the type of the operand.
|
||||||
|
// The "mreg" format is a pair of doubles; this is the
|
||||||
|
// most general possible represenation which is used
|
||||||
|
// in the register state.
|
||||||
|
// This might seem like overkill, but it's a huge advantage
|
||||||
|
// to keep operands in the right format because extra
|
||||||
|
// precision can screw up games.
|
||||||
|
// FIXME: Does the slight loss of precision due to not
|
||||||
|
// having a madd instruction matter? It would be a
|
||||||
|
// performance loss for singles because the operations
|
||||||
|
// would have to be done in double precision, and a completely
|
||||||
|
// accurate double madd would require an extremely expensive
|
||||||
|
// fallback.
|
||||||
|
FDAdd,
|
||||||
|
FDSub,
|
||||||
|
FDMul,
|
||||||
|
FDDiv,
|
||||||
|
FDNeg,
|
||||||
|
FSAdd,
|
||||||
|
FSSub,
|
||||||
|
FSMul,
|
||||||
|
FSDiv,
|
||||||
|
FSNeg,
|
||||||
|
FPSAdd,
|
||||||
|
FPSSub,
|
||||||
|
FPSMul,
|
||||||
|
FPSDiv,
|
||||||
|
FPSNeg,
|
||||||
|
// FP Loads
|
||||||
|
LoadSingle,
|
||||||
|
LoadDouble,
|
||||||
|
// LoadPacked, // FIXME: Work out how this instruction should
|
||||||
|
// be implemented
|
||||||
|
// FP Stores
|
||||||
|
StoreSingle,
|
||||||
|
StoreDouble,
|
||||||
|
// StorePacked, // FIXME: Work out how this instruction should
|
||||||
|
// be implemented
|
||||||
|
PackedToSingle, // Extract PS0 from packed (type-pun)
|
||||||
|
// PackedToDouble == PackedToSingle+SingleToDouble
|
||||||
|
PackedToMReg, // Convert from packed format to mreg format (CVTPS2PD)
|
||||||
|
SingleToDouble, // Widen single to double (CVTSS2SD)
|
||||||
|
SingleToPacked, // Duplicate single to packed
|
||||||
|
// SingleToMReg == SingleToPacked+PackedToMReg
|
||||||
|
MRegToPacked, // Convert from mreg format to packed format (CVTPD2PS)
|
||||||
|
MRegToDouble, // Extract bottom half from mreg format. (type-pun)
|
||||||
|
// MRegToSingle == MRegToDouble + DoubleToSingle
|
||||||
|
DoubleToMReg, // Convert from double format to mreg format
|
||||||
|
DoubleToSingle, // Convert from double to single format (CVTSD2SS)
|
||||||
|
// DoubleToPacked should never be needed
|
||||||
|
|
||||||
|
ForceToPacked, // ForceTo* are "virtual"; they should be
|
||||||
|
// folded into the above conversions.
|
||||||
|
ForceToSingle,
|
||||||
|
ForceToDouble,
|
||||||
|
ForceToMReg,
|
||||||
|
LoadFPReg,
|
||||||
|
StoreFPReg,
|
||||||
|
|
||||||
// "Trinary" operators
|
// "Trinary" operators
|
||||||
// FIXME: Need to change representation!
|
// FIXME: Need to change representation!
|
||||||
|
@ -330,6 +393,7 @@ namespace IREmitter {
|
||||||
IRBuilder() { Reset(); }
|
IRBuilder() { Reset(); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
IRBuilder(IRBuilder&); // DO NOT IMPLEMENT
|
||||||
std::vector<Inst> InstList; // FIXME: We must ensure this is
|
std::vector<Inst> InstList; // FIXME: We must ensure this is
|
||||||
// continuous!
|
// continuous!
|
||||||
std::vector<unsigned> ConstList;
|
std::vector<unsigned> ConstList;
|
||||||
|
|
|
@ -81,14 +81,10 @@ using namespace Gen;
|
||||||
CRTest = ibuild.EmitXor(CRTest, CRCmp);
|
CRTest = ibuild.EmitXor(CRTest, CRCmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) {
|
if ((inst.BO & 4) == 0) {
|
||||||
IREmitter::InstLoc c = ibuild.EmitLoadCTR();
|
IREmitter::InstLoc c = ibuild.EmitLoadCTR();
|
||||||
c = ibuild.EmitSub(c, ibuild.EmitIntConst(1));
|
c = ibuild.EmitSub(c, ibuild.EmitIntConst(1));
|
||||||
ibuild.EmitStoreCTR(c);
|
ibuild.EmitStoreCTR(c);
|
||||||
}
|
|
||||||
|
|
||||||
if ((inst.BO & 4) == 0) {
|
|
||||||
IREmitter::InstLoc c = ibuild.EmitLoadCTR();
|
|
||||||
if (!(inst.BO & 2)) {
|
if (!(inst.BO & 2)) {
|
||||||
CTRTest = ibuild.EmitICmpEq(c,
|
CTRTest = ibuild.EmitICmpEq(c,
|
||||||
ibuild.EmitIntConst(0));
|
ibuild.EmitIntConst(0));
|
||||||
|
|
Loading…
Reference in New Issue