A bit more WIP JIT work; primary change is psq_st implementation.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1758 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
a72da4e76a
commit
b4d78829c3
|
@ -153,7 +153,7 @@ InstLoc IRBuilder::EmitUOp(unsigned Opcode, InstLoc Op1, unsigned extra) {
|
||||||
return curIndex;
|
return curIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
InstLoc IRBuilder::EmitBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2) {
|
InstLoc IRBuilder::EmitBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2, unsigned extra) {
|
||||||
InstLoc curIndex = &InstList[InstList.size()];
|
InstLoc curIndex = &InstList[InstList.size()];
|
||||||
unsigned backOp1 = curIndex - 1 - Op1;
|
unsigned backOp1 = curIndex - 1 - Op1;
|
||||||
if (backOp1 >= 255) {
|
if (backOp1 >= 255) {
|
||||||
|
@ -168,7 +168,7 @@ InstLoc IRBuilder::EmitBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2) {
|
||||||
backOp1++;
|
backOp1++;
|
||||||
curIndex++;
|
curIndex++;
|
||||||
}
|
}
|
||||||
InstList.push_back(Opcode | backOp1 << 8 | backOp2 << 16);
|
InstList.push_back(Opcode | (backOp1 << 8) | (backOp2 << 16) | (extra << 24));
|
||||||
return curIndex;
|
return curIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -451,7 +451,7 @@ InstLoc IRBuilder::FoldInterpreterFallback(InstLoc Op1, InstLoc Op2) {
|
||||||
return EmitBiOp(InterpreterFallback, Op1, Op2);
|
return EmitBiOp(InterpreterFallback, Op1, Op2);
|
||||||
}
|
}
|
||||||
|
|
||||||
InstLoc IRBuilder::FoldBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2) {
|
InstLoc IRBuilder::FoldBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2, unsigned extra) {
|
||||||
switch (Opcode) {
|
switch (Opcode) {
|
||||||
case Add: return FoldAdd(Op1, Op2);
|
case Add: return FoldAdd(Op1, Op2);
|
||||||
case And: return FoldAnd(Op1, Op2);
|
case And: return FoldAnd(Op1, Op2);
|
||||||
|
@ -462,7 +462,7 @@ InstLoc IRBuilder::FoldBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2) {
|
||||||
case Rol: return FoldRol(Op1, Op2);
|
case Rol: return FoldRol(Op1, Op2);
|
||||||
case BranchCond: return FoldBranchCond(Op1, Op2);
|
case BranchCond: return FoldBranchCond(Op1, Op2);
|
||||||
case InterpreterFallback: return FoldInterpreterFallback(Op1, Op2);
|
case InterpreterFallback: return FoldInterpreterFallback(Op1, Op2);
|
||||||
default: return EmitBiOp(Opcode, Op1, Op2);
|
default: return EmitBiOp(Opcode, Op1, Op2, extra);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1019,6 +1019,7 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
||||||
case DupSingleToMReg:
|
case DupSingleToMReg:
|
||||||
case DoubleToSingle:
|
case DoubleToSingle:
|
||||||
case ExpandPackedToMReg:
|
case ExpandPackedToMReg:
|
||||||
|
case CompactMRegToPacked:
|
||||||
if (thisUsed)
|
if (thisUsed)
|
||||||
regMarkUse(RI, I, getOp1(I), 1);
|
regMarkUse(RI, I, getOp1(I), 1);
|
||||||
break;
|
break;
|
||||||
|
@ -1075,6 +1076,10 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
||||||
regMarkUse(RI, I, getOp1(I), 1);
|
regMarkUse(RI, I, getOp1(I), 1);
|
||||||
regMarkMemAddress(RI, I, getOp2(I), 2);
|
regMarkMemAddress(RI, I, getOp2(I), 2);
|
||||||
break;
|
break;
|
||||||
|
case StorePaired:
|
||||||
|
regMarkUse(RI, I, getOp1(I), 1);
|
||||||
|
regMarkUse(RI, I, getOp2(I), 2);
|
||||||
|
break;
|
||||||
case BranchUncond:
|
case BranchUncond:
|
||||||
if (!isImm(*getOp1(I)))
|
if (!isImm(*getOp1(I)))
|
||||||
regMarkUse(RI, I, getOp1(I), 1);
|
regMarkUse(RI, I, getOp1(I), 1);
|
||||||
|
@ -1390,6 +1395,23 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
||||||
regNormalRegClear(RI, I);
|
regNormalRegClear(RI, I);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case StorePaired: {
|
||||||
|
regSpill(RI, EAX);
|
||||||
|
regSpill(RI, EDX);
|
||||||
|
unsigned quantreg = *I >> 24;
|
||||||
|
Jit->MOVZX(32, 16, EAX, M(&PowerPC::ppcState.spr[SPR_GQR0 + quantreg]));
|
||||||
|
Jit->MOVZX(32, 8, EDX, R(AL));
|
||||||
|
// FIXME: Fix ModR/M encoding to allow [EDX*4+disp32]!
|
||||||
|
Jit->SHL(32, R(EDX), Imm8(2));
|
||||||
|
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I)));
|
||||||
|
Jit->MOVAPD(XMM0, fregLocForInst(RI, getOp1(I)));
|
||||||
|
Jit->CALLptr(MDisp(EDX, (u32)asm_routines.pairedStoreQuantized));
|
||||||
|
if (RI.IInfo[I - RI.FirstI] & 4)
|
||||||
|
fregClearInst(RI, getOp1(I));
|
||||||
|
if (RI.IInfo[I - RI.FirstI] & 8)
|
||||||
|
regClearInst(RI, getOp2(I));
|
||||||
|
break;
|
||||||
|
}
|
||||||
case DupSingleToMReg: {
|
case DupSingleToMReg: {
|
||||||
if (!thisUsed) break;
|
if (!thisUsed) break;
|
||||||
X64Reg reg = fregFindFreeReg(RI);
|
X64Reg reg = fregFindFreeReg(RI);
|
||||||
|
@ -1417,6 +1439,14 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile) {
|
||||||
fregNormalRegClear(RI, I);
|
fregNormalRegClear(RI, I);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case CompactMRegToPacked: {
|
||||||
|
if (!thisUsed) break;
|
||||||
|
X64Reg reg = fregFindFreeReg(RI);
|
||||||
|
Jit->CVTPD2PS(reg, fregLocForInst(RI, getOp1(I)));
|
||||||
|
RI.fregs[reg] = I;
|
||||||
|
fregNormalRegClear(RI, I);
|
||||||
|
break;
|
||||||
|
}
|
||||||
case LoadFReg: {
|
case LoadFReg: {
|
||||||
if (!thisUsed) break;
|
if (!thisUsed) break;
|
||||||
X64Reg reg = fregFindFreeReg(RI);
|
X64Reg reg = fregFindFreeReg(RI);
|
||||||
|
|
|
@ -146,10 +146,12 @@ namespace IREmitter {
|
||||||
LoadSingle,
|
LoadSingle,
|
||||||
LoadDouble,
|
LoadDouble,
|
||||||
LoadPaired, // This handles quantizers itself
|
LoadPaired, // This handles quantizers itself
|
||||||
|
StorePaired,
|
||||||
DoubleToSingle,
|
DoubleToSingle,
|
||||||
DupSingleToMReg,
|
DupSingleToMReg,
|
||||||
InsertDoubleInMReg,
|
InsertDoubleInMReg,
|
||||||
ExpandPackedToMReg,
|
ExpandPackedToMReg,
|
||||||
|
CompactMRegToPacked,
|
||||||
LoadFReg,
|
LoadFReg,
|
||||||
StoreFReg,
|
StoreFReg,
|
||||||
FSMul,
|
FSMul,
|
||||||
|
@ -232,7 +234,8 @@ namespace IREmitter {
|
||||||
InstLoc EmitZeroOp(unsigned Opcode, unsigned extra);
|
InstLoc EmitZeroOp(unsigned Opcode, unsigned extra);
|
||||||
InstLoc EmitUOp(unsigned OpCode, InstLoc Op1,
|
InstLoc EmitUOp(unsigned OpCode, InstLoc Op1,
|
||||||
unsigned extra = 0);
|
unsigned extra = 0);
|
||||||
InstLoc EmitBiOp(unsigned OpCode, InstLoc Op1, InstLoc Op2);
|
InstLoc EmitBiOp(unsigned OpCode, InstLoc Op1, InstLoc Op2,
|
||||||
|
unsigned extra = 0);
|
||||||
|
|
||||||
InstLoc FoldAdd(InstLoc Op1, InstLoc Op2);
|
InstLoc FoldAdd(InstLoc Op1, InstLoc Op2);
|
||||||
InstLoc FoldAnd(InstLoc Op1, InstLoc Op2);
|
InstLoc FoldAnd(InstLoc Op1, InstLoc Op2);
|
||||||
|
@ -248,7 +251,8 @@ namespace IREmitter {
|
||||||
InstLoc FoldZeroOp(unsigned Opcode, unsigned extra);
|
InstLoc FoldZeroOp(unsigned Opcode, unsigned extra);
|
||||||
InstLoc FoldUOp(unsigned OpCode, InstLoc Op1,
|
InstLoc FoldUOp(unsigned OpCode, InstLoc Op1,
|
||||||
unsigned extra = 0);
|
unsigned extra = 0);
|
||||||
InstLoc FoldBiOp(unsigned OpCode, InstLoc Op1, InstLoc Op2);
|
InstLoc FoldBiOp(unsigned OpCode, InstLoc Op1, InstLoc Op2,
|
||||||
|
unsigned extra = 0);
|
||||||
|
|
||||||
unsigned ComputeKnownZeroBits(InstLoc I);
|
unsigned ComputeKnownZeroBits(InstLoc I);
|
||||||
|
|
||||||
|
@ -389,6 +393,9 @@ namespace IREmitter {
|
||||||
InstLoc EmitLoadPaired(InstLoc addr, unsigned quantReg) {
|
InstLoc EmitLoadPaired(InstLoc addr, unsigned quantReg) {
|
||||||
return FoldUOp(LoadPaired, addr, quantReg);
|
return FoldUOp(LoadPaired, addr, quantReg);
|
||||||
}
|
}
|
||||||
|
InstLoc EmitStorePaired(InstLoc value, InstLoc addr, unsigned quantReg) {
|
||||||
|
return FoldBiOp(StorePaired, value, addr, quantReg);
|
||||||
|
}
|
||||||
InstLoc EmitLoadFReg(unsigned freg) {
|
InstLoc EmitLoadFReg(unsigned freg) {
|
||||||
return FoldZeroOp(LoadFReg, freg);
|
return FoldZeroOp(LoadFReg, freg);
|
||||||
}
|
}
|
||||||
|
@ -404,6 +411,9 @@ namespace IREmitter {
|
||||||
InstLoc EmitExpandPackedToMReg(InstLoc val) {
|
InstLoc EmitExpandPackedToMReg(InstLoc val) {
|
||||||
return FoldUOp(ExpandPackedToMReg, val);
|
return FoldUOp(ExpandPackedToMReg, val);
|
||||||
}
|
}
|
||||||
|
InstLoc EmitCompactMRegToPacked(InstLoc val) {
|
||||||
|
return FoldUOp(CompactMRegToPacked, val);
|
||||||
|
}
|
||||||
InstLoc EmitFSMul(InstLoc op1, InstLoc op2) {
|
InstLoc EmitFSMul(InstLoc op1, InstLoc op2) {
|
||||||
return FoldBiOp(FSMul, op1, op2);
|
return FoldBiOp(FSMul, op1, op2);
|
||||||
}
|
}
|
||||||
|
|
|
@ -58,6 +58,9 @@ struct CONTEXT
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// #define INSTRUCTION_START Default(inst); return;
|
||||||
|
// #define INSTRUCTION_START PPCTables::CountInstruction(inst);
|
||||||
|
#define INSTRUCTION_START
|
||||||
|
|
||||||
class TrampolineCache : public Gen::XCodeBlock
|
class TrampolineCache : public Gen::XCodeBlock
|
||||||
{
|
{
|
||||||
|
|
|
@ -28,6 +28,7 @@
|
||||||
#include "ABI.h"
|
#include "ABI.h"
|
||||||
#include "Jit.h"
|
#include "Jit.h"
|
||||||
#include "JitCache.h"
|
#include "JitCache.h"
|
||||||
|
#include "Thunk.h"
|
||||||
|
|
||||||
#include "../../HW/CPUCompare.h"
|
#include "../../HW/CPUCompare.h"
|
||||||
#include "../../HW/GPFifo.h"
|
#include "../../HW/GPFifo.h"
|
||||||
|
@ -213,6 +214,145 @@ const float m_dequantizeTableS[] =
|
||||||
|
|
||||||
float psTemp[2];
|
float psTemp[2];
|
||||||
|
|
||||||
|
void AsmRoutineManager::GenQuantizedStores() {
|
||||||
|
const u8* storePairedIllegal = AlignCode4();
|
||||||
|
UD2();
|
||||||
|
const u8* storePairedFloat = AlignCode4();
|
||||||
|
if (cpu_info.bSSSE3) {
|
||||||
|
PSHUFB(XMM0, M((void *)pbswapShuffle2x4));
|
||||||
|
#ifdef _M_X64
|
||||||
|
MOVQ_xmm(MComplex(RBX, RCX, 1, 0), XMM0);
|
||||||
|
#else
|
||||||
|
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||||
|
MOVQ_xmm(MDisp(ECX, (u32)Memory::base), XMM0);
|
||||||
|
#endif
|
||||||
|
} else {
|
||||||
|
#ifdef _M_X64
|
||||||
|
MOVQ_xmm(R(RCX), XMM0);
|
||||||
|
ROL(64, RCX, Imm8(32));
|
||||||
|
BSWAP(64, RCX);
|
||||||
|
MOV(64, MComplex(RBX, RCX, 1, 0), R(RCX));
|
||||||
|
#else
|
||||||
|
#if 0
|
||||||
|
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||||
|
MOVQ_xmm(XMM0, MDisp(ECX, (u32)Memory::base));
|
||||||
|
PXOR(XMM1, R(XMM1));
|
||||||
|
PSHUFLW(XMM0, R(XMM0), 0xB1);
|
||||||
|
MOVAPD(XMM1, R(XMM0));
|
||||||
|
PSRLW(XMM0, 8);
|
||||||
|
PSLLW(XMM1, 8);
|
||||||
|
POR(XMM0, R(XMM1));
|
||||||
|
#else
|
||||||
|
MOVQ_xmm(M(&psTemp[0]), XMM0);
|
||||||
|
#if 0
|
||||||
|
TEST(32, R(ECX), Imm32(0x0C000000));
|
||||||
|
FixupBranch argh = J_CC(CC_NZ);
|
||||||
|
MOV(32, R(EAX), M(&psTemp));
|
||||||
|
BSWAP(32, EAX);
|
||||||
|
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||||
|
MOV(32, MDisp(ECX, (u32)Memory::base), R(EAX));
|
||||||
|
MOV(32, R(EAX), M(((char*)&psTemp) + 4));
|
||||||
|
BSWAP(32, EAX);
|
||||||
|
MOV(32, MDisp(ECX, 4+(u32)Memory::base), R(EAX));
|
||||||
|
FixupBranch arg2 = J();
|
||||||
|
SetJumpTarget(argh);
|
||||||
|
#endif
|
||||||
|
MOV(32, R(EAX), M(((char*)&psTemp)));
|
||||||
|
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX);
|
||||||
|
MOV(32, R(EAX), M(((char*)&psTemp)+4));
|
||||||
|
ADD(32, R(ECX), Imm32(4));
|
||||||
|
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX);
|
||||||
|
#if 0
|
||||||
|
SetJumpTarget(arg2);
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
RET();
|
||||||
|
|
||||||
|
const u8* storePairedU8 = AlignCode4();
|
||||||
|
SHR(32, R(EAX), Imm8(6));
|
||||||
|
MOVSS(XMM1, MDisp(EAX, (u32)m_quantizeTableS));
|
||||||
|
PUNPCKLDQ(XMM1, R(XMM1));
|
||||||
|
MULPS(XMM0, R(XMM1));
|
||||||
|
CVTPS2DQ(XMM0, R(XMM0));
|
||||||
|
PACKSSDW(XMM0, R(XMM0));
|
||||||
|
PACKUSWB(XMM0, R(XMM0));
|
||||||
|
MOVD_xmm(R(EAX), XMM0);
|
||||||
|
#ifdef _M_X64
|
||||||
|
MOV(16, MComplex(RBX, RCX, 1, 0), R(AX));
|
||||||
|
#else
|
||||||
|
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||||
|
MOV(16, MDisp(ECX, (u32)Memory::base), R(AX));
|
||||||
|
#endif
|
||||||
|
RET();
|
||||||
|
|
||||||
|
const u8* storePairedS8 = AlignCode4();
|
||||||
|
SHR(32, R(EAX), Imm8(6));
|
||||||
|
MOVSS(XMM1, MDisp(EAX, (u32)m_quantizeTableS));
|
||||||
|
PUNPCKLDQ(XMM1, R(XMM1));
|
||||||
|
MULPS(XMM0, R(XMM1));
|
||||||
|
CVTPS2DQ(XMM0, R(XMM0));
|
||||||
|
PACKSSDW(XMM0, R(XMM0));
|
||||||
|
PACKSSWB(XMM0, R(XMM0));
|
||||||
|
MOVD_xmm(R(EAX), XMM0);
|
||||||
|
#ifdef _M_X64
|
||||||
|
MOV(16, MComplex(RBX, RCX, 1, 0), R(AX));
|
||||||
|
#else
|
||||||
|
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||||
|
MOV(16, MDisp(ECX, (u32)Memory::base), R(AX));
|
||||||
|
#endif
|
||||||
|
RET();
|
||||||
|
|
||||||
|
const u8* storePairedU16 = AlignCode4();
|
||||||
|
SHR(32, R(EAX), Imm8(6));
|
||||||
|
MOVSS(XMM1, MDisp(EAX, (u32)m_quantizeTableS));
|
||||||
|
PUNPCKLDQ(XMM1, R(XMM1));
|
||||||
|
MULPS(XMM0, R(XMM1));
|
||||||
|
CVTPS2DQ(XMM0, R(XMM0));
|
||||||
|
PXOR(XMM1, R(XMM1));
|
||||||
|
PCMPGTD(XMM1, R(XMM0));
|
||||||
|
PANDN(XMM0, R(XMM1));
|
||||||
|
PACKSSDW(XMM0, R(XMM0)); //PACKUSDW(XMM0, R(XMM0)); // FIXME: Wrong!
|
||||||
|
MOVD_xmm(R(EAX), XMM0);
|
||||||
|
BSWAP(32, EAX);
|
||||||
|
ROL(32, R(EAX), Imm8(16));
|
||||||
|
#ifdef _M_X64
|
||||||
|
MOV(32, MComplex(RBX, RCX, 1, 0), R(EAX));
|
||||||
|
#else
|
||||||
|
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||||
|
MOV(32, MDisp(ECX, (u32)Memory::base), R(EAX));
|
||||||
|
#endif
|
||||||
|
RET();
|
||||||
|
|
||||||
|
const u8* storePairedS16 = AlignCode4();
|
||||||
|
SHR(32, R(EAX), Imm8(6));
|
||||||
|
MOVSS(XMM1, MDisp(EAX, (u32)m_quantizeTableS));
|
||||||
|
PUNPCKLDQ(XMM1, R(XMM1));
|
||||||
|
MULPS(XMM0, R(XMM1));
|
||||||
|
CVTPS2DQ(XMM0, R(XMM0));
|
||||||
|
PACKSSDW(XMM0, R(XMM0));
|
||||||
|
MOVD_xmm(R(EAX), XMM0);
|
||||||
|
BSWAP(32, EAX);
|
||||||
|
ROL(32, R(EAX), Imm8(16));
|
||||||
|
#ifdef _M_X64
|
||||||
|
MOV(32, MComplex(RBX, RCX, 1, 0), R(EAX));
|
||||||
|
#else
|
||||||
|
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||||
|
MOV(32, MDisp(ECX, (u32)Memory::base), R(EAX));
|
||||||
|
#endif
|
||||||
|
RET();
|
||||||
|
|
||||||
|
pairedStoreQuantized[0] = storePairedFloat;
|
||||||
|
pairedStoreQuantized[1] = storePairedIllegal;
|
||||||
|
pairedStoreQuantized[2] = storePairedIllegal;
|
||||||
|
pairedStoreQuantized[3] = storePairedIllegal;
|
||||||
|
pairedStoreQuantized[4] = storePairedU8;
|
||||||
|
pairedStoreQuantized[5] = storePairedU16;
|
||||||
|
pairedStoreQuantized[6] = storePairedS8;
|
||||||
|
pairedStoreQuantized[7] = storePairedS16;
|
||||||
|
}
|
||||||
|
|
||||||
void AsmRoutineManager::GenQuantizedLoads() {
|
void AsmRoutineManager::GenQuantizedLoads() {
|
||||||
const u8* loadPairedIllegal = AlignCode4();
|
const u8* loadPairedIllegal = AlignCode4();
|
||||||
UD2();
|
UD2();
|
||||||
|
@ -429,6 +569,7 @@ void AsmRoutineManager::GenerateCommon()
|
||||||
JMP(dispatcher, true);
|
JMP(dispatcher, true);
|
||||||
|
|
||||||
GenQuantizedLoads();
|
GenQuantizedLoads();
|
||||||
|
GenQuantizedStores();
|
||||||
|
|
||||||
computeRcFp = AlignCode16();
|
computeRcFp = AlignCode16();
|
||||||
//CMPSD(R(XMM0), M(&zero),
|
//CMPSD(R(XMM0), M(&zero),
|
||||||
|
|
|
@ -43,6 +43,7 @@ private:
|
||||||
void GenFifoFloatWrite();
|
void GenFifoFloatWrite();
|
||||||
void GenFifoXmm64Write();
|
void GenFifoXmm64Write();
|
||||||
void GenQuantizedLoads();
|
void GenQuantizedLoads();
|
||||||
|
void GenQuantizedStores();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
void Init() {
|
void Init() {
|
||||||
|
@ -82,6 +83,7 @@ public:
|
||||||
const u8 *doReJit;
|
const u8 *doReJit;
|
||||||
|
|
||||||
const u8 *pairedLoadQuantized[8];
|
const u8 *pairedLoadQuantized[8];
|
||||||
|
const u8 *pairedStoreQuantized[8];
|
||||||
|
|
||||||
bool compareEnabled;
|
bool compareEnabled;
|
||||||
};
|
};
|
||||||
|
|
|
@ -57,6 +57,8 @@ using namespace Gen;
|
||||||
void Jit64::bx(UGeckoInstruction inst)
|
void Jit64::bx(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
NORMALBRANCH_START
|
NORMALBRANCH_START
|
||||||
|
INSTRUCTION_START;
|
||||||
|
|
||||||
if (inst.LK)
|
if (inst.LK)
|
||||||
ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4));
|
ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4));
|
||||||
|
|
||||||
|
|
|
@ -26,9 +26,6 @@
|
||||||
#include "JitCache.h"
|
#include "JitCache.h"
|
||||||
#include "JitRegCache.h"
|
#include "JitRegCache.h"
|
||||||
|
|
||||||
#define INSTRUCTION_START
|
|
||||||
// #define INSTRUCTION_START Default(inst); return;
|
|
||||||
|
|
||||||
void Jit64::fp_arith_s(UGeckoInstruction inst)
|
void Jit64::fp_arith_s(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
if (inst.Rc || inst.OPCD != 59 || inst.SUBOP5 != 25) {
|
if (inst.Rc || inst.OPCD != 59 || inst.SUBOP5 != 25) {
|
||||||
|
|
|
@ -36,9 +36,6 @@
|
||||||
#include "JitAsm.h"
|
#include "JitAsm.h"
|
||||||
#include "JitRegCache.h"
|
#include "JitRegCache.h"
|
||||||
|
|
||||||
// #define INSTRUCTION_START Default(inst); return;
|
|
||||||
#define INSTRUCTION_START
|
|
||||||
|
|
||||||
// pshufb todo: MOVQ
|
// pshufb todo: MOVQ
|
||||||
const u8 GC_ALIGNED16(bswapShuffle1x4[16]) = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
|
const u8 GC_ALIGNED16(bswapShuffle1x4[16]) = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||||
const u8 GC_ALIGNED16(bswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
|
const u8 GC_ALIGNED16(bswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||||
|
|
|
@ -37,14 +37,19 @@
|
||||||
#include "JitAsm.h"
|
#include "JitAsm.h"
|
||||||
#include "JitRegCache.h"
|
#include "JitRegCache.h"
|
||||||
|
|
||||||
#define INSTRUCTION_START
|
|
||||||
// #define INSTRUCTION_START Default(inst); return;
|
|
||||||
|
|
||||||
// The big problem is likely instructions that set the quantizers in the same block.
|
// The big problem is likely instructions that set the quantizers in the same block.
|
||||||
// We will have to break block after quantizers are written to.
|
// We will have to break block after quantizers are written to.
|
||||||
void Jit64::psq_st(UGeckoInstruction inst)
|
void Jit64::psq_st(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
Default(inst); return;
|
if (inst.W) {Default(inst); return;}
|
||||||
|
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_12), val;
|
||||||
|
if (inst.RA)
|
||||||
|
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
|
||||||
|
if (inst.OPCD == 61)
|
||||||
|
ibuild.EmitStoreGReg(addr, inst.RA);
|
||||||
|
val = ibuild.EmitLoadFReg(inst.RS);
|
||||||
|
val = ibuild.EmitCompactMRegToPacked(val);
|
||||||
|
ibuild.EmitStorePaired(val, addr, inst.I);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::psq_l(UGeckoInstruction inst)
|
void Jit64::psq_l(UGeckoInstruction inst)
|
||||||
|
|
|
@ -35,9 +35,6 @@
|
||||||
// cmppd, andpd, andnpd, or
|
// cmppd, andpd, andnpd, or
|
||||||
// lfsx, ps_merge01 etc
|
// lfsx, ps_merge01 etc
|
||||||
|
|
||||||
// #define INSTRUCTION_START Default(inst); return;
|
|
||||||
#define INSTRUCTION_START
|
|
||||||
|
|
||||||
const u64 GC_ALIGNED16(psSignBits[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL};
|
const u64 GC_ALIGNED16(psSignBits[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL};
|
||||||
const u64 GC_ALIGNED16(psAbsMask[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
|
const u64 GC_ALIGNED16(psAbsMask[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
|
||||||
const double GC_ALIGNED16(psOneOne[2]) = {1.0, 1.0};
|
const double GC_ALIGNED16(psOneOne[2]) = {1.0, 1.0};
|
||||||
|
|
Loading…
Reference in New Issue