New LockThreads option. Also added new INI core option - OptimizeQuantizers. Set to False to work around Resident Evil 1 bug (this will slow down other games somewhat).
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@20 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
b673d8d770
commit
d23af1a15e
|
@ -1,7 +1,7 @@
|
|||
<?xml version="1.0" encoding="Windows-1252"?>
|
||||
<VisualStudioProject
|
||||
ProjectType="Visual C++"
|
||||
Version="8,00"
|
||||
Version="8.00"
|
||||
Name="zlib"
|
||||
ProjectGUID="{3E03C179-8251-46E4-81F4-466F114BAC63}"
|
||||
RootNamespace="zlib"
|
||||
|
@ -24,6 +24,7 @@
|
|||
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
|
||||
ConfigurationType="4"
|
||||
CharacterSet="1"
|
||||
WholeProgramOptimization="0"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
|
@ -86,6 +87,7 @@
|
|||
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
|
||||
ConfigurationType="4"
|
||||
CharacterSet="1"
|
||||
WholeProgramOptimization="0"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
|
@ -149,7 +151,7 @@
|
|||
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
|
||||
ConfigurationType="4"
|
||||
CharacterSet="1"
|
||||
WholeProgramOptimization="1"
|
||||
WholeProgramOptimization="0"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
|
@ -213,7 +215,7 @@
|
|||
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
|
||||
ConfigurationType="4"
|
||||
CharacterSet="1"
|
||||
WholeProgramOptimization="1"
|
||||
WholeProgramOptimization="0"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
|
@ -278,7 +280,7 @@
|
|||
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
|
||||
ConfigurationType="4"
|
||||
CharacterSet="1"
|
||||
WholeProgramOptimization="1"
|
||||
WholeProgramOptimization="0"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
|
@ -342,7 +344,7 @@
|
|||
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
|
||||
ConfigurationType="4"
|
||||
CharacterSet="1"
|
||||
WholeProgramOptimization="1"
|
||||
WholeProgramOptimization="0"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
|
|
|
@ -885,6 +885,10 @@
|
|||
RelativePath=".\Src\PowerPC\Jit64\Jit_LoadStore.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\Src\PowerPC\Jit64\Jit_LoadStorePaired.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\Src\PowerPC\Jit64\Jit_Paired.cpp"
|
||||
>
|
||||
|
|
|
@ -182,7 +182,8 @@ THREAD_RETURN CpuThread(void *pArg)
|
|||
CPUCompare::ConnectAsClient();
|
||||
}
|
||||
|
||||
Common::Thread::SetCurrentThreadAffinity(1); //Force to first core
|
||||
if (_CoreParameter.bLockThreads)
|
||||
Common::Thread::SetCurrentThreadAffinity(1); //Force to first core
|
||||
|
||||
// Let's run under memory watch
|
||||
EMM::InstallExceptionHandler();
|
||||
|
@ -208,7 +209,8 @@ THREAD_RETURN EmuThread(void *pArg)
|
|||
Common::SetCurrentThreadName("Emuthread - starting");
|
||||
const SCoreStartupParameter& _CoreParameter = *(SCoreStartupParameter*)pArg;
|
||||
|
||||
Common::Thread::SetCurrentThreadAffinity(2); //Force to second core
|
||||
if (_CoreParameter.bLockThreads)
|
||||
Common::Thread::SetCurrentThreadAffinity(2); //Force to second core
|
||||
|
||||
LOG(OSREPORT, "Starting core = %s mode", _CoreParameter.bWii ? "Wii" : "Gamecube");
|
||||
LOG(OSREPORT, "Dualcore = %s", _CoreParameter.bUseDualCore ? "Yes" : "No");
|
||||
|
@ -222,7 +224,7 @@ THREAD_RETURN EmuThread(void *pArg)
|
|||
VideoInitialize.pGetMemoryPointer = Memory::GetPointer;
|
||||
VideoInitialize.pSetPEToken = PixelEngine::SetToken;
|
||||
VideoInitialize.pSetPEFinish = PixelEngine::SetFinish;
|
||||
VideoInitialize.pWindowHandle = _CoreParameter.hMainWindow; // NULL; // filled by video_initialize
|
||||
VideoInitialize.pWindowHandle = NULL; // _CoreParameter.hMainWindow; // NULL; // filled by video_initialize
|
||||
VideoInitialize.pLog = Callback_VideoLog;
|
||||
VideoInitialize.pRequestWindowSize = NULL; //Callback_VideoRequestWindowSize;
|
||||
VideoInitialize.pCopiedToXFB = Callback_VideoCopiedToXFB;
|
||||
|
|
|
@ -32,6 +32,7 @@ void SCoreStartupParameter::LoadDefaults()
|
|||
bUseDynarec = false;
|
||||
bUseDualCore = false;
|
||||
bRunCompareServer = false;
|
||||
bLockThreads = true;
|
||||
bWii = false;
|
||||
}
|
||||
|
||||
|
|
|
@ -41,6 +41,8 @@ struct SCoreStartupParameter
|
|||
bool bHLEBios;
|
||||
bool bThrottle;
|
||||
bool bUseFastMem;
|
||||
bool bLockThreads;
|
||||
bool bOptimizeQuantizers;
|
||||
|
||||
bool bRunCompareServer;
|
||||
bool bRunCompareClient;
|
||||
|
|
|
@ -43,6 +43,9 @@
|
|||
|
||||
namespace Jit64
|
||||
{
|
||||
static u64 GC_ALIGNED16(temp64);
|
||||
static u32 GC_ALIGNED16(temp32);
|
||||
|
||||
#ifdef _M_X64
|
||||
void SafeLoadECXtoEAX(int accessSize, s32 offset)
|
||||
{
|
||||
|
@ -119,11 +122,6 @@ namespace Jit64
|
|||
gpr.UnlockAll();
|
||||
}
|
||||
|
||||
void SafeStoreECXtoEDX(int accessSize, int offset)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void lXz(UGeckoInstruction inst)
|
||||
{
|
||||
int d = inst.RD;
|
||||
|
@ -209,9 +207,6 @@ namespace Jit64
|
|||
gpr.UnlockAll();
|
||||
}
|
||||
|
||||
u32 GC_ALIGNED16(temp32);
|
||||
u64 GC_ALIGNED16(temp64);
|
||||
|
||||
void lfs(UGeckoInstruction inst)
|
||||
{
|
||||
// BIT32OLD;
|
||||
|
@ -227,15 +222,15 @@ namespace Jit64
|
|||
gpr.Flush(FLUSH_VOLATILE);
|
||||
gpr.Lock(d, a);
|
||||
|
||||
MOV(32,R(ECX),gpr.R(a));
|
||||
MOV(32,R(ECX), gpr.R(a));
|
||||
#ifdef _M_X64
|
||||
if (!jo.noAssumeFPLoadFromMem)
|
||||
{
|
||||
MOV(32, R(EAX), MComplex(RBX,ECX,SCALE_1,offset));
|
||||
MOV(32, R(EAX), MComplex(RBX, ECX, SCALE_1, offset));
|
||||
//#else
|
||||
// MOV(32, R(EAX), MDisp(ECX, (u32)Memory::GetMainRAMPtr() + (u32)offset));
|
||||
//#endif
|
||||
BSWAP(32,EAX);
|
||||
BSWAP(32, EAX);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
|
@ -243,7 +238,7 @@ namespace Jit64
|
|||
SafeLoadECXtoEAX(32, offset);
|
||||
}
|
||||
|
||||
MOV(32,M(&temp32), R(EAX));
|
||||
MOV(32, M(&temp32), R(EAX));
|
||||
fpr.Lock(d);
|
||||
fpr.LoadToX64(d, false);
|
||||
CVTSS2SD(fpr.RX(d), M(&temp32));
|
||||
|
@ -252,7 +247,6 @@ namespace Jit64
|
|||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
||||
void lfd(UGeckoInstruction inst)
|
||||
{
|
||||
BIT32OLD;
|
||||
|
@ -301,8 +295,6 @@ namespace Jit64
|
|||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
double GC_ALIGNED16(psTemp[2]) = {1.0, 1.0};
|
||||
|
||||
void stfs(UGeckoInstruction inst)
|
||||
{
|
||||
BIT32OLD;
|
||||
|
@ -364,255 +356,7 @@ namespace Jit64
|
|||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
||||
// TODO(ector): Improve 64-bit version
|
||||
void WriteDual32(u64 value, u32 address)
|
||||
{
|
||||
Memory::Write_U32((u32)(value>>32), address);
|
||||
Memory::Write_U32((u32)value, address+4);
|
||||
}
|
||||
|
||||
const double m_quantizeTableD[] =
|
||||
{
|
||||
(1 << 0), (1 << 1), (1 << 2), (1 << 3),
|
||||
(1 << 4), (1 << 5), (1 << 6), (1 << 7),
|
||||
(1 << 8), (1 << 9), (1 << 10), (1 << 11),
|
||||
(1 << 12), (1 << 13), (1 << 14), (1 << 15),
|
||||
(1 << 16), (1 << 17), (1 << 18), (1 << 19),
|
||||
(1 << 20), (1 << 21), (1 << 22), (1 << 23),
|
||||
(1 << 24), (1 << 25), (1 << 26), (1 << 27),
|
||||
(1 << 28), (1 << 29), (1 << 30), (1 << 31),
|
||||
1.0 / (1ULL << 32), 1.0 / (1 << 31), 1.0 / (1 << 30), 1.0 / (1 << 29),
|
||||
1.0 / (1 << 28), 1.0 / (1 << 27), 1.0 / (1 << 26), 1.0 / (1 << 25),
|
||||
1.0 / (1 << 24), 1.0 / (1 << 23), 1.0 / (1 << 22), 1.0 / (1 << 21),
|
||||
1.0 / (1 << 20), 1.0 / (1 << 19), 1.0 / (1 << 18), 1.0 / (1 << 17),
|
||||
1.0 / (1 << 16), 1.0 / (1 << 15), 1.0 / (1 << 14), 1.0 / (1 << 13),
|
||||
1.0 / (1 << 12), 1.0 / (1 << 11), 1.0 / (1 << 10), 1.0 / (1 << 9),
|
||||
1.0 / (1 << 8), 1.0 / (1 << 7), 1.0 / (1 << 6), 1.0 / (1 << 5),
|
||||
1.0 / (1 << 4), 1.0 / (1 << 3), 1.0 / (1 << 2), 1.0 / (1 << 1),
|
||||
};
|
||||
|
||||
const double m_dequantizeTableD[] =
|
||||
{
|
||||
1.0 / (1 << 0), 1.0 / (1 << 1), 1.0 / (1 << 2), 1.0 / (1 << 3),
|
||||
1.0 / (1 << 4), 1.0 / (1 << 5), 1.0 / (1 << 6), 1.0 / (1 << 7),
|
||||
1.0 / (1 << 8), 1.0 / (1 << 9), 1.0 / (1 << 10), 1.0 / (1 << 11),
|
||||
1.0 / (1 << 12), 1.0 / (1 << 13), 1.0 / (1 << 14), 1.0 / (1 << 15),
|
||||
1.0 / (1 << 16), 1.0 / (1 << 17), 1.0 / (1 << 18), 1.0 / (1 << 19),
|
||||
1.0 / (1 << 20), 1.0 / (1 << 21), 1.0 / (1 << 22), 1.0 / (1 << 23),
|
||||
1.0 / (1 << 24), 1.0 / (1 << 25), 1.0 / (1 << 26), 1.0 / (1 << 27),
|
||||
1.0 / (1 << 28), 1.0 / (1 << 29), 1.0 / (1 << 30), 1.0 / (1 << 31),
|
||||
(1ULL << 32), (1 << 31), (1 << 30), (1 << 29),
|
||||
(1 << 28), (1 << 27), (1 << 26), (1 << 25),
|
||||
(1 << 24), (1 << 23), (1 << 22), (1 << 21),
|
||||
(1 << 20), (1 << 19), (1 << 18), (1 << 17),
|
||||
(1 << 16), (1 << 15), (1 << 14), (1 << 13),
|
||||
(1 << 12), (1 << 11), (1 << 10), (1 << 9),
|
||||
(1 << 8), (1 << 7), (1 << 6), (1 << 5),
|
||||
(1 << 4), (1 << 3), (1 << 2), (1 << 1),
|
||||
};
|
||||
|
||||
u32 temp;
|
||||
void psq_st(UGeckoInstruction inst)
|
||||
{
|
||||
BIT32OLD;
|
||||
OLD;
|
||||
const UGQR gqr(rSPR(SPR_GQR0 + inst.I));
|
||||
const EQuantizeType stType = static_cast<EQuantizeType>(gqr.ST_TYPE);
|
||||
int stScale = gqr.ST_SCALE;
|
||||
bool update = inst.OPCD == 61;
|
||||
if (!inst.RA || inst.W)
|
||||
{
|
||||
// PanicAlert(inst.RA ? "W" : "inst");
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
|
||||
int offset = inst.SIMM_12;
|
||||
int a = inst.RA;
|
||||
int s = inst.RS; // Fp numbers
|
||||
|
||||
if (stType == QUANTIZE_FLOAT)
|
||||
{
|
||||
gpr.Flush(FLUSH_VOLATILE);
|
||||
gpr.Lock(a);
|
||||
fpr.Lock(s);
|
||||
if (update)
|
||||
gpr.LoadToX64(a, true, true);
|
||||
MOV(32, R(EDX), gpr.R(a));
|
||||
if (offset)
|
||||
ADD(32, R(EDX), Imm32((u32)offset));
|
||||
TEST(32, R(EDX), Imm32(0x0C000000));
|
||||
if (update && offset)
|
||||
MOV(32, gpr.R(a), R(EDX));
|
||||
CVTPD2PS(XMM0, fpr.R(s));
|
||||
SHUFPS(XMM0, R(XMM0), 1);
|
||||
MOVAPS(M(&temp64), XMM0);
|
||||
MOV(64, R(ECX), M(&temp64));
|
||||
FixupBranch argh = J_CC(CC_NZ);
|
||||
BSWAP(64, ECX);
|
||||
MOV(64, MComplex(RBX, EDX, SCALE_1, 0), R(ECX));
|
||||
FixupBranch arg2 = J();
|
||||
SetJumpTarget(argh);
|
||||
CALL((void *)&WriteDual32);
|
||||
SetJumpTarget(arg2);
|
||||
if (update)
|
||||
MOV(32, gpr.R(a), R(EDX));
|
||||
gpr.UnlockAll();
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
else if (stType == QUANTIZE_U8)
|
||||
{
|
||||
gpr.Flush(FLUSH_VOLATILE);
|
||||
gpr.Lock(a);
|
||||
fpr.Lock(s);
|
||||
if (update)
|
||||
gpr.LoadToX64(a, true, update);
|
||||
MOV(32, R(EDX), gpr.R(a));
|
||||
if (offset)
|
||||
ADD(32,R(EDX),Imm32((u32)offset));
|
||||
MOVAPS(XMM0, fpr.R(s));
|
||||
MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale]));
|
||||
MULPD(XMM0, R(XMM1));
|
||||
CVTPD2DQ(XMM0, R(XMM0));
|
||||
PACKSSDW(XMM0, R(XMM0));
|
||||
PACKUSWB(XMM0, R(XMM0));
|
||||
MOVAPS(M(&temp64), XMM0);
|
||||
MOV(16, R(ECX), M(&temp64));
|
||||
#ifdef _M_X64
|
||||
MOV(16, MComplex(RBX, RDX, SCALE_1, 0), R(ECX));
|
||||
#else
|
||||
BSWAP(32, ECX);
|
||||
SHR(32, R(ECX), Imm8(16));
|
||||
CALL(&Memory::Write_U16);
|
||||
#endif
|
||||
if (update)
|
||||
MOV(32, gpr.R(a), R(EDX));
|
||||
gpr.UnlockAll();
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
else if (stType == QUANTIZE_S16)
|
||||
{
|
||||
gpr.Lock(a);
|
||||
fpr.Lock(s);
|
||||
if (update)
|
||||
gpr.LoadToX64(a, true, update);
|
||||
MOV(32, R(EDX), gpr.R(a));
|
||||
if (offset)
|
||||
ADD(32,R(EDX),Imm32((u32)offset));
|
||||
MOVAPS(XMM0, fpr.R(s));
|
||||
MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale]));
|
||||
MULPD(XMM0, R(XMM1));
|
||||
SHUFPD(XMM0, R(XMM0), 1);
|
||||
CVTPD2DQ(XMM0, R(XMM0));
|
||||
PACKSSDW(XMM0, R(XMM0));
|
||||
MOVD_xmm(M(&temp64), XMM0);
|
||||
MOV(32, R(ECX), M(&temp64));
|
||||
#ifdef _M_X64
|
||||
BSWAP(32, ECX);
|
||||
MOV(32, MComplex(RBX, RDX, SCALE_1, 0), R(ECX));
|
||||
#else
|
||||
BSWAP(32, ECX);
|
||||
CALL(&Memory::Write_U32);
|
||||
#endif
|
||||
if (update)
|
||||
MOV(32, gpr.R(a), R(EDX));
|
||||
gpr.UnlockAll();
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
else {
|
||||
// Dodger uses this.
|
||||
PanicAlert("st %i:%i", stType, inst.W);
|
||||
Default(inst);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void psq_l(UGeckoInstruction inst)
|
||||
{
|
||||
BIT32OLD;
|
||||
OLD;
|
||||
const UGQR gqr(rSPR(SPR_GQR0 + inst.I));
|
||||
const EQuantizeType ldType = static_cast<EQuantizeType>(gqr.LD_TYPE);
|
||||
int ldScale = gqr.LD_SCALE;
|
||||
if (!inst.RA || inst.W)
|
||||
{
|
||||
// 0 1 during load
|
||||
//PanicAlert("ld:%i %i", ldType, (int)inst.W);
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
bool update = inst.OPCD == 57;
|
||||
int offset = inst.SIMM_12;
|
||||
//INT3();
|
||||
switch (ldType) {
|
||||
#ifdef _M_X64
|
||||
case QUANTIZE_FLOAT:
|
||||
{
|
||||
gpr.LoadToX64(inst.RA);
|
||||
MOV(64, R(RAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
|
||||
BSWAP(64, RAX);
|
||||
MOV(64, M(&psTemp[0]),R(RAX));
|
||||
fpr.LoadToX64(inst.RS, false);
|
||||
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
|
||||
CVTPS2PD(r, M(&psTemp[0]));
|
||||
SHUFPD(r, R(r),1);
|
||||
if (update)
|
||||
ADD(32, gpr.R(inst.RA), Imm32(offset));
|
||||
break;
|
||||
}
|
||||
|
||||
case QUANTIZE_U8:
|
||||
{
|
||||
gpr.LoadToX64(inst.RA);
|
||||
XOR(32, R(EAX), R(EAX));
|
||||
MOV(16, R(EAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
|
||||
MOV(32, M(&temp64), R(EAX));
|
||||
MOVD_xmm(XMM0, M(&temp64));
|
||||
// SSE4 optimization opportunity here.
|
||||
PXOR(XMM1, R(XMM1));
|
||||
PUNPCKLBW(XMM0, R(XMM1));
|
||||
PUNPCKLWD(XMM0, R(XMM1));
|
||||
CVTDQ2PD(XMM0, R(XMM0));
|
||||
fpr.LoadToX64(inst.RS, false);
|
||||
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
|
||||
MOVDDUP(r, M((void *)&m_dequantizeTableD[ldScale]));
|
||||
MULPD(r, R(XMM0));
|
||||
if (update)
|
||||
ADD(32, gpr.R(inst.RA), Imm32(offset));
|
||||
}
|
||||
break;
|
||||
case QUANTIZE_S16:
|
||||
{
|
||||
gpr.LoadToX64(inst.RA);
|
||||
MOV(32, R(EAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
|
||||
BSWAP(32, EAX);
|
||||
MOV(32, M(&temp64), R(EAX));
|
||||
//INT3();
|
||||
fpr.LoadToX64(inst.RS, false);
|
||||
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
|
||||
MOVD_xmm(XMM0, M(&temp64));
|
||||
PUNPCKLWD(XMM0, R(XMM0)); // unpack to higher word in each dword..
|
||||
PSRAD(XMM0, 16); // then use this signed shift to sign extend. clever eh? :P
|
||||
CVTDQ2PD(XMM0, R(XMM0));
|
||||
MOVDDUP(r, M((void*)&m_dequantizeTableD[ldScale]));
|
||||
MULPD(r, R(XMM0));
|
||||
SHUFPD(r, R(r), 1);
|
||||
if (update)
|
||||
ADD(32, gpr.R(inst.RA), Imm32(offset));
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
// 4 0
|
||||
PanicAlert("ld:%i %i", ldType, (int)inst.W);
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
|
||||
//u32 EA = (m_GPR[_inst.RA] + _inst.SIMM_12) : _inst.SIMM_12;
|
||||
}
|
||||
|
||||
// Zero cache line.
|
||||
void dcbz(UGeckoInstruction inst)
|
||||
{
|
||||
#ifdef _M_IX86
|
||||
|
|
|
@ -0,0 +1,312 @@
|
|||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
// TODO(ector): Tons of pshufb optimization of the loads/stores, for SSSE3+, possibly SSE4, only.
|
||||
// Should give a very noticable speed boost to paired single heavy code.
|
||||
|
||||
#include "../PowerPC.h"
|
||||
#include "../../Core.h"
|
||||
#include "../../HW/GPFifo.h"
|
||||
#include "../../HW/CommandProcessor.h"
|
||||
#include "../../HW/PixelEngine.h"
|
||||
#include "../../HW/Memmap.h"
|
||||
#include "../PPCTables.h"
|
||||
#include "x64Emitter.h"
|
||||
|
||||
#include "Jit.h"
|
||||
#include "JitCache.h"
|
||||
#include "JitAsm.h"
|
||||
#include "JitRegCache.h"
|
||||
|
||||
#define OLD
|
||||
//#define OLD Default(inst); return;
|
||||
|
||||
#ifdef _M_IX86
|
||||
#define BIT32OLD Default(inst); return;
|
||||
#else
|
||||
#define BIT32OLD ;
|
||||
#endif
|
||||
|
||||
namespace Jit64 {
|
||||
|
||||
static double GC_ALIGNED16(psTemp[2]) = {1.0, 1.0};
|
||||
static u64 GC_ALIGNED16(temp64);
|
||||
static u32 GC_ALIGNED16(temp32);
|
||||
|
||||
|
||||
// TODO(ector): Improve 64-bit version
|
||||
void WriteDual32(u64 value, u32 address)
|
||||
{
|
||||
Memory::Write_U32((u32)(value>>32), address);
|
||||
Memory::Write_U32((u32)value, address+4);
|
||||
}
|
||||
|
||||
const double m_quantizeTableD[] =
|
||||
{
|
||||
(1 << 0), (1 << 1), (1 << 2), (1 << 3),
|
||||
(1 << 4), (1 << 5), (1 << 6), (1 << 7),
|
||||
(1 << 8), (1 << 9), (1 << 10), (1 << 11),
|
||||
(1 << 12), (1 << 13), (1 << 14), (1 << 15),
|
||||
(1 << 16), (1 << 17), (1 << 18), (1 << 19),
|
||||
(1 << 20), (1 << 21), (1 << 22), (1 << 23),
|
||||
(1 << 24), (1 << 25), (1 << 26), (1 << 27),
|
||||
(1 << 28), (1 << 29), (1 << 30), (1 << 31),
|
||||
1.0 / (1ULL << 32), 1.0 / (1 << 31), 1.0 / (1 << 30), 1.0 / (1 << 29),
|
||||
1.0 / (1 << 28), 1.0 / (1 << 27), 1.0 / (1 << 26), 1.0 / (1 << 25),
|
||||
1.0 / (1 << 24), 1.0 / (1 << 23), 1.0 / (1 << 22), 1.0 / (1 << 21),
|
||||
1.0 / (1 << 20), 1.0 / (1 << 19), 1.0 / (1 << 18), 1.0 / (1 << 17),
|
||||
1.0 / (1 << 16), 1.0 / (1 << 15), 1.0 / (1 << 14), 1.0 / (1 << 13),
|
||||
1.0 / (1 << 12), 1.0 / (1 << 11), 1.0 / (1 << 10), 1.0 / (1 << 9),
|
||||
1.0 / (1 << 8), 1.0 / (1 << 7), 1.0 / (1 << 6), 1.0 / (1 << 5),
|
||||
1.0 / (1 << 4), 1.0 / (1 << 3), 1.0 / (1 << 2), 1.0 / (1 << 1),
|
||||
};
|
||||
|
||||
const double m_dequantizeTableD[] =
|
||||
{
|
||||
1.0 / (1 << 0), 1.0 / (1 << 1), 1.0 / (1 << 2), 1.0 / (1 << 3),
|
||||
1.0 / (1 << 4), 1.0 / (1 << 5), 1.0 / (1 << 6), 1.0 / (1 << 7),
|
||||
1.0 / (1 << 8), 1.0 / (1 << 9), 1.0 / (1 << 10), 1.0 / (1 << 11),
|
||||
1.0 / (1 << 12), 1.0 / (1 << 13), 1.0 / (1 << 14), 1.0 / (1 << 15),
|
||||
1.0 / (1 << 16), 1.0 / (1 << 17), 1.0 / (1 << 18), 1.0 / (1 << 19),
|
||||
1.0 / (1 << 20), 1.0 / (1 << 21), 1.0 / (1 << 22), 1.0 / (1 << 23),
|
||||
1.0 / (1 << 24), 1.0 / (1 << 25), 1.0 / (1 << 26), 1.0 / (1 << 27),
|
||||
1.0 / (1 << 28), 1.0 / (1 << 29), 1.0 / (1 << 30), 1.0 / (1 << 31),
|
||||
(1ULL << 32), (1 << 31), (1 << 30), (1 << 29),
|
||||
(1 << 28), (1 << 27), (1 << 26), (1 << 25),
|
||||
(1 << 24), (1 << 23), (1 << 22), (1 << 21),
|
||||
(1 << 20), (1 << 19), (1 << 18), (1 << 17),
|
||||
(1 << 16), (1 << 15), (1 << 14), (1 << 13),
|
||||
(1 << 12), (1 << 11), (1 << 10), (1 << 9),
|
||||
(1 << 8), (1 << 7), (1 << 6), (1 << 5),
|
||||
(1 << 4), (1 << 3), (1 << 2), (1 << 1),
|
||||
};
|
||||
|
||||
u32 temp;
|
||||
void psq_st(UGeckoInstruction inst)
|
||||
{
|
||||
BIT32OLD;
|
||||
OLD;
|
||||
if (!Core::GetStartupParameter().bOptimizeQuantizers)
|
||||
{
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
const UGQR gqr(rSPR(SPR_GQR0 + inst.I));
|
||||
const EQuantizeType stType = static_cast<EQuantizeType>(gqr.ST_TYPE);
|
||||
int stScale = gqr.ST_SCALE;
|
||||
bool update = inst.OPCD == 61;
|
||||
if (!inst.RA || inst.W)
|
||||
{
|
||||
// PanicAlert(inst.RA ? "W" : "inst");
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
|
||||
int offset = inst.SIMM_12;
|
||||
int a = inst.RA;
|
||||
int s = inst.RS; // Fp numbers
|
||||
|
||||
if (stType == QUANTIZE_FLOAT)
|
||||
{
|
||||
gpr.Flush(FLUSH_VOLATILE);
|
||||
gpr.Lock(a);
|
||||
fpr.Lock(s);
|
||||
if (update)
|
||||
gpr.LoadToX64(a, true, true);
|
||||
MOV(32, R(EDX), gpr.R(a));
|
||||
if (offset)
|
||||
ADD(32, R(EDX), Imm32((u32)offset));
|
||||
TEST(32, R(EDX), Imm32(0x0C000000));
|
||||
if (update && offset)
|
||||
MOV(32, gpr.R(a), R(EDX));
|
||||
CVTPD2PS(XMM0, fpr.R(s));
|
||||
SHUFPS(XMM0, R(XMM0), 1);
|
||||
MOVAPS(M(&temp64), XMM0);
|
||||
MOV(64, R(ECX), M(&temp64));
|
||||
FixupBranch argh = J_CC(CC_NZ);
|
||||
BSWAP(64, ECX);
|
||||
MOV(64, MComplex(RBX, EDX, SCALE_1, 0), R(ECX));
|
||||
FixupBranch arg2 = J();
|
||||
SetJumpTarget(argh);
|
||||
CALL((void *)&WriteDual32);
|
||||
SetJumpTarget(arg2);
|
||||
if (update)
|
||||
MOV(32, gpr.R(a), R(EDX));
|
||||
gpr.UnlockAll();
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
else if (stType == QUANTIZE_U8)
|
||||
{
|
||||
gpr.Flush(FLUSH_VOLATILE);
|
||||
gpr.Lock(a);
|
||||
fpr.Lock(s);
|
||||
if (update)
|
||||
gpr.LoadToX64(a, true, update);
|
||||
MOV(32, R(EDX), gpr.R(a));
|
||||
if (offset)
|
||||
ADD(32,R(EDX),Imm32((u32)offset));
|
||||
MOVAPS(XMM0, fpr.R(s));
|
||||
MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale]));
|
||||
MULPD(XMM0, R(XMM1));
|
||||
CVTPD2DQ(XMM0, R(XMM0));
|
||||
PACKSSDW(XMM0, R(XMM0));
|
||||
PACKUSWB(XMM0, R(XMM0));
|
||||
MOVAPS(M(&temp64), XMM0);
|
||||
MOV(16, R(ECX), M(&temp64));
|
||||
#ifdef _M_X64
|
||||
MOV(16, MComplex(RBX, RDX, SCALE_1, 0), R(ECX));
|
||||
#else
|
||||
BSWAP(32, ECX);
|
||||
SHR(32, R(ECX), Imm8(16));
|
||||
CALL(&Memory::Write_U16);
|
||||
#endif
|
||||
if (update)
|
||||
MOV(32, gpr.R(a), R(EDX));
|
||||
gpr.UnlockAll();
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
else if (stType == QUANTIZE_S16)
|
||||
{
|
||||
gpr.Lock(a);
|
||||
fpr.Lock(s);
|
||||
if (update)
|
||||
gpr.LoadToX64(a, true, update);
|
||||
MOV(32, R(EDX), gpr.R(a));
|
||||
if (offset)
|
||||
ADD(32,R(EDX),Imm32((u32)offset));
|
||||
MOVAPS(XMM0, fpr.R(s));
|
||||
MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale]));
|
||||
MULPD(XMM0, R(XMM1));
|
||||
SHUFPD(XMM0, R(XMM0), 1);
|
||||
CVTPD2DQ(XMM0, R(XMM0));
|
||||
PACKSSDW(XMM0, R(XMM0));
|
||||
MOVD_xmm(M(&temp64), XMM0);
|
||||
MOV(32, R(ECX), M(&temp64));
|
||||
#ifdef _M_X64
|
||||
BSWAP(32, ECX);
|
||||
MOV(32, MComplex(RBX, RDX, SCALE_1, 0), R(ECX));
|
||||
#else
|
||||
BSWAP(32, ECX);
|
||||
CALL(&Memory::Write_U32);
|
||||
#endif
|
||||
if (update)
|
||||
MOV(32, gpr.R(a), R(EDX));
|
||||
gpr.UnlockAll();
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
else {
|
||||
// Dodger uses this.
|
||||
PanicAlert("st %i:%i", stType, inst.W);
|
||||
Default(inst);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void psq_l(UGeckoInstruction inst)
|
||||
{
|
||||
BIT32OLD;
|
||||
OLD;
|
||||
if (!Core::GetStartupParameter().bOptimizeQuantizers)
|
||||
{
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
const UGQR gqr(rSPR(SPR_GQR0 + inst.I));
|
||||
const EQuantizeType ldType = static_cast<EQuantizeType>(gqr.LD_TYPE);
|
||||
int ldScale = gqr.LD_SCALE;
|
||||
if (!inst.RA || inst.W)
|
||||
{
|
||||
// 0 1 during load
|
||||
//PanicAlert("ld:%i %i", ldType, (int)inst.W);
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
bool update = inst.OPCD == 57;
|
||||
int offset = inst.SIMM_12;
|
||||
//INT3();
|
||||
switch (ldType) {
|
||||
#ifdef _M_X64
|
||||
case QUANTIZE_FLOAT:
|
||||
{
|
||||
gpr.LoadToX64(inst.RA);
|
||||
MOV(64, R(RAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
|
||||
BSWAP(64, RAX);
|
||||
MOV(64, M(&psTemp[0]),R(RAX));
|
||||
fpr.LoadToX64(inst.RS, false);
|
||||
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
|
||||
CVTPS2PD(r, M(&psTemp[0]));
|
||||
SHUFPD(r, R(r),1);
|
||||
if (update)
|
||||
ADD(32, gpr.R(inst.RA), Imm32(offset));
|
||||
break;
|
||||
}
|
||||
|
||||
case QUANTIZE_U8:
|
||||
{
|
||||
gpr.LoadToX64(inst.RA);
|
||||
XOR(32, R(EAX), R(EAX));
|
||||
MOV(16, R(EAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
|
||||
MOV(32, M(&temp64), R(EAX));
|
||||
MOVD_xmm(XMM0, M(&temp64));
|
||||
// SSE4 optimization opportunity here.
|
||||
PXOR(XMM1, R(XMM1));
|
||||
PUNPCKLBW(XMM0, R(XMM1));
|
||||
PUNPCKLWD(XMM0, R(XMM1));
|
||||
CVTDQ2PD(XMM0, R(XMM0));
|
||||
fpr.LoadToX64(inst.RS, false);
|
||||
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
|
||||
MOVDDUP(r, M((void *)&m_dequantizeTableD[ldScale]));
|
||||
MULPD(r, R(XMM0));
|
||||
if (update)
|
||||
ADD(32, gpr.R(inst.RA), Imm32(offset));
|
||||
}
|
||||
break;
|
||||
|
||||
case QUANTIZE_S16:
|
||||
{
|
||||
gpr.LoadToX64(inst.RA);
|
||||
MOV(32, R(EAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
|
||||
BSWAP(32, EAX);
|
||||
MOV(32, M(&temp64), R(EAX));
|
||||
//INT3();
|
||||
fpr.LoadToX64(inst.RS, false);
|
||||
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
|
||||
MOVD_xmm(XMM0, M(&temp64));
|
||||
PUNPCKLWD(XMM0, R(XMM0)); // unpack to higher word in each dword..
|
||||
PSRAD(XMM0, 16); // then use this signed shift to sign extend. clever eh? :P
|
||||
CVTDQ2PD(XMM0, R(XMM0));
|
||||
MOVDDUP(r, M((void*)&m_dequantizeTableD[ldScale]));
|
||||
MULPD(r, R(XMM0));
|
||||
SHUFPD(r, R(r), 1);
|
||||
if (update)
|
||||
ADD(32, gpr.R(inst.RA), Imm32(offset));
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
// 4 0
|
||||
// 6 0 //power tennis
|
||||
// 5 0
|
||||
PanicAlert("ld:%i %i", ldType, (int)inst.W);
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
|
||||
//u32 EA = (m_GPR[_inst.RA] + _inst.SIMM_12) : _inst.SIMM_12;
|
||||
}
|
||||
|
||||
} // namespace
|
|
@ -24,6 +24,7 @@
|
|||
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
|
||||
ConfigurationType="4"
|
||||
CharacterSet="2"
|
||||
WholeProgramOptimization="0"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
|
@ -43,6 +44,7 @@
|
|||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="0"
|
||||
WholeProgramOptimization="false"
|
||||
AdditionalIncludeDirectories="..\..\..\Externals\wxWidgets\Include;..\..\..\Externals\wxWidgets\Include\msvc;..\Core\Src;;..\Common\Src"
|
||||
PreprocessorDefinitions="WIN32;_DEBUG;_LIB;__WXMSW__"
|
||||
MinimalRebuild="true"
|
||||
|
@ -88,6 +90,7 @@
|
|||
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
|
||||
ConfigurationType="4"
|
||||
CharacterSet="2"
|
||||
WholeProgramOptimization="0"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
|
@ -108,6 +111,7 @@
|
|||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="0"
|
||||
WholeProgramOptimization="false"
|
||||
AdditionalIncludeDirectories="..\..\..\Externals\wxWidgets\Include;..\..\..\Externals\wxWidgets\Include\msvc;..\Core\Src;;..\Common\Src"
|
||||
PreprocessorDefinitions="WIN32;_DEBUG;_LIB;__WXMSW__"
|
||||
MinimalRebuild="true"
|
||||
|
@ -153,7 +157,7 @@
|
|||
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
|
||||
ConfigurationType="4"
|
||||
CharacterSet="2"
|
||||
WholeProgramOptimization="1"
|
||||
WholeProgramOptimization="0"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
|
@ -172,6 +176,7 @@
|
|||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
WholeProgramOptimization="false"
|
||||
AdditionalIncludeDirectories="..\..\..\Externals\wxWidgets\Include;..\..\..\Externals\wxWidgets\Include\msvc;..\Core\Src;;..\Common\Src"
|
||||
PreprocessorDefinitions="WIN32;NDEBUG;_LIB;__WXMSW__"
|
||||
RuntimeLibrary="0"
|
||||
|
@ -215,7 +220,7 @@
|
|||
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
|
||||
ConfigurationType="4"
|
||||
CharacterSet="2"
|
||||
WholeProgramOptimization="1"
|
||||
WholeProgramOptimization="0"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
|
@ -235,6 +240,7 @@
|
|||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
WholeProgramOptimization="false"
|
||||
AdditionalIncludeDirectories="..\..\..\Externals\wxWidgets\Include;..\..\..\Externals\wxWidgets\Include\msvc;..\Core\Src;;..\Common\Src"
|
||||
PreprocessorDefinitions="WIN32;NDEBUG;_LIB;__WXMSW__"
|
||||
RuntimeLibrary="0"
|
||||
|
@ -279,6 +285,7 @@
|
|||
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
|
||||
ConfigurationType="4"
|
||||
CharacterSet="2"
|
||||
WholeProgramOptimization="0"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
|
@ -298,6 +305,7 @@
|
|||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="0"
|
||||
WholeProgramOptimization="false"
|
||||
AdditionalIncludeDirectories="..\..\..\Externals\wxWidgets\Include;..\..\..\Externals\wxWidgets\Include\msvc;..\Core\Src;;..\Common\Src"
|
||||
PreprocessorDefinitions="WIN32;__WXMSW__;_WINDOWS;NOPCH;_SECURE_SCL=0;_CRT_SECURE_NO_WARNINGS"
|
||||
MinimalRebuild="true"
|
||||
|
@ -343,6 +351,7 @@
|
|||
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
|
||||
ConfigurationType="4"
|
||||
CharacterSet="2"
|
||||
WholeProgramOptimization="0"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
|
@ -363,6 +372,7 @@
|
|||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="0"
|
||||
WholeProgramOptimization="false"
|
||||
AdditionalIncludeDirectories="..\..\..\Externals\wxWidgets\Include;..\..\..\Externals\wxWidgets\Include\msvc;..\Core\Src;;..\Common\Src"
|
||||
PreprocessorDefinitions="WIN32;__WXMSW__;_WINDOWS;NOPCH;_SECURE_SCL=0;_CRT_SECURE_NO_WARNINGS"
|
||||
MinimalRebuild="true"
|
||||
|
|
|
@ -64,10 +64,12 @@ void SConfig::SaveSettings()
|
|||
ini.Set("Core", "PadPlugin", m_LocalCoreStartupParameter.m_strPadPlugin);
|
||||
|
||||
ini.Set("Core", "HLEBios", m_LocalCoreStartupParameter.bHLEBios);
|
||||
ini.Set("Core", "UseDynarec", m_LocalCoreStartupParameter.bUseDynarec);
|
||||
ini.Set("Core", "UseDynarec", m_LocalCoreStartupParameter.bUseDynarec);
|
||||
ini.Set("Core", "UseDualCore", m_LocalCoreStartupParameter.bUseDualCore);
|
||||
ini.Set("Core", "Throttle", m_LocalCoreStartupParameter.bThrottle);
|
||||
ini.Set("Core", "DefaultGCM", m_LocalCoreStartupParameter.m_strDefaultGCM);
|
||||
ini.Set("Core", "LockThreads", m_LocalCoreStartupParameter.bLockThreads);
|
||||
ini.Set("Core", "DefaultGCM", m_LocalCoreStartupParameter.m_strDefaultGCM);
|
||||
ini.Set("Core", "OptimizeQuantizers", m_LocalCoreStartupParameter.bOptimizeQuantizers);
|
||||
}
|
||||
|
||||
ini.Save("Dolphin.ini");
|
||||
|
@ -115,7 +117,7 @@ void SConfig::LoadSettings()
|
|||
ini.Get("Core", "UseDynarec", &m_LocalCoreStartupParameter.bUseDynarec, false);
|
||||
ini.Get("Core", "UseDualCore", &m_LocalCoreStartupParameter.bUseDualCore, false);
|
||||
ini.Get("Core", "Throttle", &m_LocalCoreStartupParameter.bThrottle, true);
|
||||
ini.Get("Core", "LockThreads", &m_LocalCoreStartupParameter.bLockThreads, true);
|
||||
ini.Get("Core", "OptimizeQuantizers", &m_LocalCoreStartupParameter.bOptimizeQuantizers, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue