A bit more WIP JIT work; primary change is psq_st implementation.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1758 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
a72da4e76a
commit
b4d78829c3
|
@ -153,7 +153,7 @@ InstLoc IRBuilder::EmitUOp(unsigned Opcode, InstLoc Op1, unsigned extra) {
|
|||
return curIndex;
|
||||
}
|
||||
|
||||
InstLoc IRBuilder::EmitBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2) {
|
||||
InstLoc IRBuilder::EmitBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2, unsigned extra) {
|
||||
InstLoc curIndex = &InstList[InstList.size()];
|
||||
unsigned backOp1 = curIndex - 1 - Op1;
|
||||
if (backOp1 >= 255) {
|
||||
|
@ -168,7 +168,7 @@ InstLoc IRBuilder::EmitBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2) {
|
|||
backOp1++;
|
||||
curIndex++;
|
||||
}
|
||||
InstList.push_back(Opcode | backOp1 << 8 | backOp2 << 16);
|
||||
InstList.push_back(Opcode | (backOp1 << 8) | (backOp2 << 16) | (extra << 24));
|
||||
return curIndex;
|
||||
}
|
||||
|
||||
|
@ -451,7 +451,7 @@ InstLoc IRBuilder::FoldInterpreterFallback(InstLoc Op1, InstLoc Op2) {
|
|||
return EmitBiOp(InterpreterFallback, Op1, Op2);
|
||||
}
|
||||
|
||||
InstLoc IRBuilder::FoldBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2) {
|
||||
InstLoc IRBuilder::FoldBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2, unsigned extra) {
|
||||
switch (Opcode) {
|
||||
case Add: return FoldAdd(Op1, Op2);
|
||||
case And: return FoldAnd(Op1, Op2);
|
||||
|
@ -462,7 +462,7 @@ InstLoc IRBuilder::FoldBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2) {
|
|||
case Rol: return FoldRol(Op1, Op2);
|
||||
case BranchCond: return FoldBranchCond(Op1, Op2);
|
||||
case InterpreterFallback: return FoldInterpreterFallback(Op1, Op2);
|
||||
default: return EmitBiOp(Opcode, Op1, Op2);
|
||||
default: return EmitBiOp(Opcode, Op1, Op2, extra);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1019,6 +1019,7 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
|||
case DupSingleToMReg:
|
||||
case DoubleToSingle:
|
||||
case ExpandPackedToMReg:
|
||||
case CompactMRegToPacked:
|
||||
if (thisUsed)
|
||||
regMarkUse(RI, I, getOp1(I), 1);
|
||||
break;
|
||||
|
@ -1075,6 +1076,10 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
|||
regMarkUse(RI, I, getOp1(I), 1);
|
||||
regMarkMemAddress(RI, I, getOp2(I), 2);
|
||||
break;
|
||||
case StorePaired:
|
||||
regMarkUse(RI, I, getOp1(I), 1);
|
||||
regMarkUse(RI, I, getOp2(I), 2);
|
||||
break;
|
||||
case BranchUncond:
|
||||
if (!isImm(*getOp1(I)))
|
||||
regMarkUse(RI, I, getOp1(I), 1);
|
||||
|
@ -1390,6 +1395,23 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
|||
regNormalRegClear(RI, I);
|
||||
break;
|
||||
}
|
||||
case StorePaired: {
|
||||
regSpill(RI, EAX);
|
||||
regSpill(RI, EDX);
|
||||
unsigned quantreg = *I >> 24;
|
||||
Jit->MOVZX(32, 16, EAX, M(&PowerPC::ppcState.spr[SPR_GQR0 + quantreg]));
|
||||
Jit->MOVZX(32, 8, EDX, R(AL));
|
||||
// FIXME: Fix ModR/M encoding to allow [EDX*4+disp32]!
|
||||
Jit->SHL(32, R(EDX), Imm8(2));
|
||||
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I)));
|
||||
Jit->MOVAPD(XMM0, fregLocForInst(RI, getOp1(I)));
|
||||
Jit->CALLptr(MDisp(EDX, (u32)asm_routines.pairedStoreQuantized));
|
||||
if (RI.IInfo[I - RI.FirstI] & 4)
|
||||
fregClearInst(RI, getOp1(I));
|
||||
if (RI.IInfo[I - RI.FirstI] & 8)
|
||||
regClearInst(RI, getOp2(I));
|
||||
break;
|
||||
}
|
||||
case DupSingleToMReg: {
|
||||
if (!thisUsed) break;
|
||||
X64Reg reg = fregFindFreeReg(RI);
|
||||
|
@ -1417,6 +1439,14 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
|||
fregNormalRegClear(RI, I);
|
||||
break;
|
||||
}
|
||||
case CompactMRegToPacked: {
|
||||
if (!thisUsed) break;
|
||||
X64Reg reg = fregFindFreeReg(RI);
|
||||
Jit->CVTPD2PS(reg, fregLocForInst(RI, getOp1(I)));
|
||||
RI.fregs[reg] = I;
|
||||
fregNormalRegClear(RI, I);
|
||||
break;
|
||||
}
|
||||
case LoadFReg: {
|
||||
if (!thisUsed) break;
|
||||
X64Reg reg = fregFindFreeReg(RI);
|
||||
|
|
|
@ -146,10 +146,12 @@ namespace IREmitter {
|
|||
LoadSingle,
|
||||
LoadDouble,
|
||||
LoadPaired, // This handles quantizers itself
|
||||
StorePaired,
|
||||
DoubleToSingle,
|
||||
DupSingleToMReg,
|
||||
InsertDoubleInMReg,
|
||||
ExpandPackedToMReg,
|
||||
CompactMRegToPacked,
|
||||
LoadFReg,
|
||||
StoreFReg,
|
||||
FSMul,
|
||||
|
@ -232,7 +234,8 @@ namespace IREmitter {
|
|||
InstLoc EmitZeroOp(unsigned Opcode, unsigned extra);
|
||||
InstLoc EmitUOp(unsigned OpCode, InstLoc Op1,
|
||||
unsigned extra = 0);
|
||||
InstLoc EmitBiOp(unsigned OpCode, InstLoc Op1, InstLoc Op2);
|
||||
InstLoc EmitBiOp(unsigned OpCode, InstLoc Op1, InstLoc Op2,
|
||||
unsigned extra = 0);
|
||||
|
||||
InstLoc FoldAdd(InstLoc Op1, InstLoc Op2);
|
||||
InstLoc FoldAnd(InstLoc Op1, InstLoc Op2);
|
||||
|
@ -248,7 +251,8 @@ namespace IREmitter {
|
|||
InstLoc FoldZeroOp(unsigned Opcode, unsigned extra);
|
||||
InstLoc FoldUOp(unsigned OpCode, InstLoc Op1,
|
||||
unsigned extra = 0);
|
||||
InstLoc FoldBiOp(unsigned OpCode, InstLoc Op1, InstLoc Op2);
|
||||
InstLoc FoldBiOp(unsigned OpCode, InstLoc Op1, InstLoc Op2,
|
||||
unsigned extra = 0);
|
||||
|
||||
unsigned ComputeKnownZeroBits(InstLoc I);
|
||||
|
||||
|
@ -389,6 +393,9 @@ namespace IREmitter {
|
|||
InstLoc EmitLoadPaired(InstLoc addr, unsigned quantReg) {
|
||||
return FoldUOp(LoadPaired, addr, quantReg);
|
||||
}
|
||||
InstLoc EmitStorePaired(InstLoc value, InstLoc addr, unsigned quantReg) {
|
||||
return FoldBiOp(StorePaired, value, addr, quantReg);
|
||||
}
|
||||
InstLoc EmitLoadFReg(unsigned freg) {
|
||||
return FoldZeroOp(LoadFReg, freg);
|
||||
}
|
||||
|
@ -404,6 +411,9 @@ namespace IREmitter {
|
|||
InstLoc EmitExpandPackedToMReg(InstLoc val) {
|
||||
return FoldUOp(ExpandPackedToMReg, val);
|
||||
}
|
||||
InstLoc EmitCompactMRegToPacked(InstLoc val) {
|
||||
return FoldUOp(CompactMRegToPacked, val);
|
||||
}
|
||||
InstLoc EmitFSMul(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(FSMul, op1, op2);
|
||||
}
|
||||
|
|
|
@ -58,6 +58,9 @@ struct CONTEXT
|
|||
|
||||
#endif
|
||||
|
||||
// #define INSTRUCTION_START Default(inst); return;
|
||||
// #define INSTRUCTION_START PPCTables::CountInstruction(inst);
|
||||
#define INSTRUCTION_START
|
||||
|
||||
class TrampolineCache : public Gen::XCodeBlock
|
||||
{
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include "ABI.h"
|
||||
#include "Jit.h"
|
||||
#include "JitCache.h"
|
||||
#include "Thunk.h"
|
||||
|
||||
#include "../../HW/CPUCompare.h"
|
||||
#include "../../HW/GPFifo.h"
|
||||
|
@ -213,6 +214,145 @@ const float m_dequantizeTableS[] =
|
|||
|
||||
float psTemp[2];
|
||||
|
||||
void AsmRoutineManager::GenQuantizedStores() {
|
||||
const u8* storePairedIllegal = AlignCode4();
|
||||
UD2();
|
||||
const u8* storePairedFloat = AlignCode4();
|
||||
if (cpu_info.bSSSE3) {
|
||||
PSHUFB(XMM0, M((void *)pbswapShuffle2x4));
|
||||
#ifdef _M_X64
|
||||
MOVQ_xmm(MComplex(RBX, RCX, 1, 0), XMM0);
|
||||
#else
|
||||
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVQ_xmm(MDisp(ECX, (u32)Memory::base), XMM0);
|
||||
#endif
|
||||
} else {
|
||||
#ifdef _M_X64
|
||||
MOVQ_xmm(R(RCX), XMM0);
|
||||
ROL(64, RCX, Imm8(32));
|
||||
BSWAP(64, RCX);
|
||||
MOV(64, MComplex(RBX, RCX, 1, 0), R(RCX));
|
||||
#else
|
||||
#if 0
|
||||
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVQ_xmm(XMM0, MDisp(ECX, (u32)Memory::base));
|
||||
PXOR(XMM1, R(XMM1));
|
||||
PSHUFLW(XMM0, R(XMM0), 0xB1);
|
||||
MOVAPD(XMM1, R(XMM0));
|
||||
PSRLW(XMM0, 8);
|
||||
PSLLW(XMM1, 8);
|
||||
POR(XMM0, R(XMM1));
|
||||
#else
|
||||
MOVQ_xmm(M(&psTemp[0]), XMM0);
|
||||
#if 0
|
||||
TEST(32, R(ECX), Imm32(0x0C000000));
|
||||
FixupBranch argh = J_CC(CC_NZ);
|
||||
MOV(32, R(EAX), M(&psTemp));
|
||||
BSWAP(32, EAX);
|
||||
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(32, MDisp(ECX, (u32)Memory::base), R(EAX));
|
||||
MOV(32, R(EAX), M(((char*)&psTemp) + 4));
|
||||
BSWAP(32, EAX);
|
||||
MOV(32, MDisp(ECX, 4+(u32)Memory::base), R(EAX));
|
||||
FixupBranch arg2 = J();
|
||||
SetJumpTarget(argh);
|
||||
#endif
|
||||
MOV(32, R(EAX), M(((char*)&psTemp)));
|
||||
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX);
|
||||
MOV(32, R(EAX), M(((char*)&psTemp)+4));
|
||||
ADD(32, R(ECX), Imm32(4));
|
||||
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX);
|
||||
#if 0
|
||||
SetJumpTarget(arg2);
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
RET();
|
||||
|
||||
const u8* storePairedU8 = AlignCode4();
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)m_quantizeTableS));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
CVTPS2DQ(XMM0, R(XMM0));
|
||||
PACKSSDW(XMM0, R(XMM0));
|
||||
PACKUSWB(XMM0, R(XMM0));
|
||||
MOVD_xmm(R(EAX), XMM0);
|
||||
#ifdef _M_X64
|
||||
MOV(16, MComplex(RBX, RCX, 1, 0), R(AX));
|
||||
#else
|
||||
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(16, MDisp(ECX, (u32)Memory::base), R(AX));
|
||||
#endif
|
||||
RET();
|
||||
|
||||
const u8* storePairedS8 = AlignCode4();
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)m_quantizeTableS));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
CVTPS2DQ(XMM0, R(XMM0));
|
||||
PACKSSDW(XMM0, R(XMM0));
|
||||
PACKSSWB(XMM0, R(XMM0));
|
||||
MOVD_xmm(R(EAX), XMM0);
|
||||
#ifdef _M_X64
|
||||
MOV(16, MComplex(RBX, RCX, 1, 0), R(AX));
|
||||
#else
|
||||
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(16, MDisp(ECX, (u32)Memory::base), R(AX));
|
||||
#endif
|
||||
RET();
|
||||
|
||||
const u8* storePairedU16 = AlignCode4();
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)m_quantizeTableS));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
CVTPS2DQ(XMM0, R(XMM0));
|
||||
PXOR(XMM1, R(XMM1));
|
||||
PCMPGTD(XMM1, R(XMM0));
|
||||
PANDN(XMM0, R(XMM1));
|
||||
PACKSSDW(XMM0, R(XMM0)); //PACKUSDW(XMM0, R(XMM0)); // FIXME: Wrong!
|
||||
MOVD_xmm(R(EAX), XMM0);
|
||||
BSWAP(32, EAX);
|
||||
ROL(32, R(EAX), Imm8(16));
|
||||
#ifdef _M_X64
|
||||
MOV(32, MComplex(RBX, RCX, 1, 0), R(EAX));
|
||||
#else
|
||||
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(32, MDisp(ECX, (u32)Memory::base), R(EAX));
|
||||
#endif
|
||||
RET();
|
||||
|
||||
const u8* storePairedS16 = AlignCode4();
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)m_quantizeTableS));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
CVTPS2DQ(XMM0, R(XMM0));
|
||||
PACKSSDW(XMM0, R(XMM0));
|
||||
MOVD_xmm(R(EAX), XMM0);
|
||||
BSWAP(32, EAX);
|
||||
ROL(32, R(EAX), Imm8(16));
|
||||
#ifdef _M_X64
|
||||
MOV(32, MComplex(RBX, RCX, 1, 0), R(EAX));
|
||||
#else
|
||||
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(32, MDisp(ECX, (u32)Memory::base), R(EAX));
|
||||
#endif
|
||||
RET();
|
||||
|
||||
pairedStoreQuantized[0] = storePairedFloat;
|
||||
pairedStoreQuantized[1] = storePairedIllegal;
|
||||
pairedStoreQuantized[2] = storePairedIllegal;
|
||||
pairedStoreQuantized[3] = storePairedIllegal;
|
||||
pairedStoreQuantized[4] = storePairedU8;
|
||||
pairedStoreQuantized[5] = storePairedU16;
|
||||
pairedStoreQuantized[6] = storePairedS8;
|
||||
pairedStoreQuantized[7] = storePairedS16;
|
||||
}
|
||||
|
||||
void AsmRoutineManager::GenQuantizedLoads() {
|
||||
const u8* loadPairedIllegal = AlignCode4();
|
||||
UD2();
|
||||
|
@ -429,6 +569,7 @@ void AsmRoutineManager::GenerateCommon()
|
|||
JMP(dispatcher, true);
|
||||
|
||||
GenQuantizedLoads();
|
||||
GenQuantizedStores();
|
||||
|
||||
computeRcFp = AlignCode16();
|
||||
//CMPSD(R(XMM0), M(&zero),
|
||||
|
|
|
@ -43,6 +43,7 @@ private:
|
|||
void GenFifoFloatWrite();
|
||||
void GenFifoXmm64Write();
|
||||
void GenQuantizedLoads();
|
||||
void GenQuantizedStores();
|
||||
|
||||
public:
|
||||
void Init() {
|
||||
|
@ -82,6 +83,7 @@ public:
|
|||
const u8 *doReJit;
|
||||
|
||||
const u8 *pairedLoadQuantized[8];
|
||||
const u8 *pairedStoreQuantized[8];
|
||||
|
||||
bool compareEnabled;
|
||||
};
|
||||
|
|
|
@ -57,6 +57,8 @@ using namespace Gen;
|
|||
void Jit64::bx(UGeckoInstruction inst)
|
||||
{
|
||||
NORMALBRANCH_START
|
||||
INSTRUCTION_START;
|
||||
|
||||
if (inst.LK)
|
||||
ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4));
|
||||
|
||||
|
|
|
@ -26,9 +26,6 @@
|
|||
#include "JitCache.h"
|
||||
#include "JitRegCache.h"
|
||||
|
||||
#define INSTRUCTION_START
|
||||
// #define INSTRUCTION_START Default(inst); return;
|
||||
|
||||
void Jit64::fp_arith_s(UGeckoInstruction inst)
|
||||
{
|
||||
if (inst.Rc || inst.OPCD != 59 || inst.SUBOP5 != 25) {
|
||||
|
|
|
@ -36,9 +36,6 @@
|
|||
#include "JitAsm.h"
|
||||
#include "JitRegCache.h"
|
||||
|
||||
// #define INSTRUCTION_START Default(inst); return;
|
||||
#define INSTRUCTION_START
|
||||
|
||||
// pshufb todo: MOVQ
|
||||
const u8 GC_ALIGNED16(bswapShuffle1x4[16]) = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||
const u8 GC_ALIGNED16(bswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||
|
|
|
@ -37,14 +37,19 @@
|
|||
#include "JitAsm.h"
|
||||
#include "JitRegCache.h"
|
||||
|
||||
#define INSTRUCTION_START
|
||||
// #define INSTRUCTION_START Default(inst); return;
|
||||
|
||||
// The big problem is likely instructions that set the quantizers in the same block.
|
||||
// We will have to break block after quantizers are written to.
|
||||
void Jit64::psq_st(UGeckoInstruction inst)
|
||||
{
|
||||
Default(inst); return;
|
||||
if (inst.W) {Default(inst); return;}
|
||||
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_12), val;
|
||||
if (inst.RA)
|
||||
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
|
||||
if (inst.OPCD == 61)
|
||||
ibuild.EmitStoreGReg(addr, inst.RA);
|
||||
val = ibuild.EmitLoadFReg(inst.RS);
|
||||
val = ibuild.EmitCompactMRegToPacked(val);
|
||||
ibuild.EmitStorePaired(val, addr, inst.I);
|
||||
}
|
||||
|
||||
void Jit64::psq_l(UGeckoInstruction inst)
|
||||
|
|
|
@ -35,9 +35,6 @@
|
|||
// cmppd, andpd, andnpd, or
|
||||
// lfsx, ps_merge01 etc
|
||||
|
||||
// #define INSTRUCTION_START Default(inst); return;
|
||||
#define INSTRUCTION_START
|
||||
|
||||
const u64 GC_ALIGNED16(psSignBits[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL};
|
||||
const u64 GC_ALIGNED16(psAbsMask[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
|
||||
const double GC_ALIGNED16(psOneOne[2]) = {1.0, 1.0};
|
||||
|
|
Loading…
Reference in New Issue