JitIL: Modified "LoadDouble" with SSSE3 to improve the performance. The code is ported from Jit64.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6152 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
nodchip 2010-08-30 02:07:09 +00:00
parent baba7f54dc
commit 014e87157c
1 changed files with 24 additions and 9 deletions

View File

@ -47,6 +47,7 @@ The register allocation is linear scan allocation.
#include "../../HW/GPFifo.h"
#include "../../ConfigManager.h"
#include "x64Emitter.h"
#include "../../../../Common/Src/CPUDetect.h"
static ThunkManager thunks;
@ -1244,15 +1245,29 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak
case LoadDouble: {
if (!thisUsed) break;
X64Reg reg = fregFindFreeReg(RI);
const OpArg loc = regLocForInst(RI, getOp1(I));
Jit->MOV(32, R(ECX), loc);
Jit->ADD(32, R(ECX), Imm8(4));
RI.Jit->UnsafeLoadRegToReg(ECX, ECX, 32, 0, false);
Jit->MOVD_xmm(reg, R(ECX));
Jit->MOV(32, R(ECX), loc);
RI.Jit->UnsafeLoadRegToReg(ECX, ECX, 32, 0, false);
Jit->MOVD_xmm(XMM0, R(ECX));
Jit->PUNPCKLDQ(reg, R(XMM0));
if (cpu_info.bSSSE3) {
static const u32 GC_ALIGNED16(maskSwapa64_1[4]) =
{0x04050607L, 0x00010203L, 0xFFFFFFFFL, 0xFFFFFFFFL};
#ifdef _M_X64
X64Reg address = regEnsureInReg(RI, getOp1(I));
Jit->MOVQ_xmm(reg, MComplex(RBX, address, SCALE_1, 0));
#else
X64Reg address = regBinLHSReg(RI, I);
Jit->AND(32, R(address), Imm32(Memory::MEMVIEW32_MASK));
Jit->MOVQ_xmm(reg, MDisp(address, (u32)Memory::base));
#endif
Jit->PSHUFB(reg, M((void*)maskSwapa64_1));
} else {
const OpArg loc = regLocForInst(RI, getOp1(I));
Jit->MOV(32, R(ECX), loc);
Jit->ADD(32, R(ECX), Imm8(4));
RI.Jit->UnsafeLoadRegToReg(ECX, ECX, 32, 0, false);
Jit->MOVD_xmm(reg, R(ECX));
Jit->MOV(32, R(ECX), loc);
RI.Jit->UnsafeLoadRegToReg(ECX, ECX, 32, 0, false);
Jit->MOVD_xmm(XMM0, R(ECX));
Jit->PUNPCKLDQ(reg, R(XMM0));
}
RI.fregs[reg] = I;
regNormalRegClear(RI, I);
break;