RIP "Optimize Quantizers" option. Now using the safe quantizer code from JITIL in all builds.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4854 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2010-01-16 19:00:09 +00:00
parent 1a25dfe279
commit 1848e93790
27 changed files with 613 additions and 823 deletions

View File

@ -107,6 +107,7 @@ void XEmitter::ABI_CallFunctionR(void *func, X64Reg reg1) {
ABI_RestoreStack(1 * 4);
}
// Pass two registers as parameters.
void XEmitter::ABI_CallFunctionRR(void *func, Gen::X64Reg reg1, Gen::X64Reg reg2)
{
ABI_AlignStack(2 * 4);
@ -216,18 +217,18 @@ void XEmitter::ABI_CallFunctionR(void *func, X64Reg reg1) {
CALL(func);
}
// Pass a register as a paremeter.
// Pass two registers as paremeters.
void XEmitter::ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2) {
if (reg2 != ABI_PARAM1) {
if (reg1 != ABI_PARAM1)
MOV(32, R(ABI_PARAM1), R(reg1));
MOV(64, R(ABI_PARAM1), R(reg1));
if (reg2 != ABI_PARAM2)
MOV(32, R(ABI_PARAM2), R(reg2));
MOV(64, R(ABI_PARAM2), R(reg2));
} else {
if (reg2 != ABI_PARAM2)
MOV(32, R(ABI_PARAM2), R(reg2));
MOV(64, R(ABI_PARAM2), R(reg2));
if (reg1 != ABI_PARAM1)
MOV(32, R(ABI_PARAM1), R(reg1));
MOV(64, R(ABI_PARAM1), R(reg1));
}
CALL(func);
}

View File

@ -1938,6 +1938,18 @@
RelativePath=".\Src\PowerPC\JitCommon\Jit_Util.cpp"
>
</File>
<File
RelativePath=".\Src\PowerPC\JitCommon\Jit_Util.h"
>
</File>
<File
RelativePath=".\Src\PowerPC\JitCommon\JitAsmCommon.cpp"
>
</File>
<File
RelativePath=".\Src\PowerPC\JitCommon\JitAsmCommon.h"
>
</File>
<File
RelativePath=".\Src\PowerPC\JitCommon\JitBackpatch.cpp"
>

View File

@ -108,7 +108,6 @@ void SConfig::SaveSettings()
ini.Set("Core", "DefaultGCM", m_LocalCoreStartupParameter.m_strDefaultGCM);
ini.Set("Core", "DVDRoot", m_LocalCoreStartupParameter.m_strDVDRoot);
ini.Set("Core", "Apploader", m_LocalCoreStartupParameter.m_strApploader);
ini.Set("Core", "OptimizeQuantizers", m_LocalCoreStartupParameter.bOptimizeQuantizers);
ini.Set("Core", "EnableCheats", m_LocalCoreStartupParameter.bEnableCheats);
ini.Set("Core", "SelectedLanguage", m_LocalCoreStartupParameter.SelectedLanguage);
ini.Set("Core", "MemcardA", m_strMemoryCardA);
@ -225,7 +224,6 @@ void SConfig::LoadSettings()
ini.Get("Core", "DefaultGCM", &m_LocalCoreStartupParameter.m_strDefaultGCM);
ini.Get("Core", "DVDRoot", &m_LocalCoreStartupParameter.m_strDVDRoot);
ini.Get("Core", "Apploader", &m_LocalCoreStartupParameter.m_strApploader);
ini.Get("Core", "OptimizeQuantizers", &m_LocalCoreStartupParameter.bOptimizeQuantizers, true);
ini.Get("Core", "EnableCheats", &m_LocalCoreStartupParameter.bEnableCheats, false);
ini.Get("Core", "SelectedLanguage", &m_LocalCoreStartupParameter.SelectedLanguage, 0);
ini.Get("Core", "MemcardA", &m_strMemoryCardA);

View File

@ -63,7 +63,6 @@ struct SCoreStartupParameter
bool bHLE_BS2;
bool bUseFastMem;
bool bLockThreads;
bool bOptimizeQuantizers;
bool bEnableCheats;
bool bEnableIsoCache;

View File

@ -126,7 +126,7 @@ inline void hwWriteIOBridge(u32 var, u32 addr) {WII_IOBridge::Write32(var, addr)
inline void hwWriteIOBridge(u64 var, u32 addr) {PanicAlert("hwWriteIOBridge: There's no 64-bit HW write. %08x", addr);}
template <class T>
void ReadFromHardware(T &_var, u32 em_address, u32 effective_address, Memory::XCheckTLBFlag flag)
inline void ReadFromHardware(T &_var, u32 em_address, u32 effective_address, Memory::XCheckTLBFlag flag)
{
// TODO: Figure out the fastest order of tests for both read and write (they are probably different).
if ((em_address & 0xC8000000) == 0xC8000000)
@ -204,7 +204,7 @@ void ReadFromHardware(T &_var, u32 em_address, u32 effective_address, Memory::XC
template <class T>
void WriteToHardware(u32 em_address, const T data, u32 effective_address, Memory::XCheckTLBFlag flag)
inline void WriteToHardware(u32 em_address, const T data, u32 effective_address, Memory::XCheckTLBFlag flag)
{
/* Debugging: CheckForBadAddresses##_type(em_address, data, false);*/
if ((em_address & 0xC8000000) == 0xC8000000)
@ -343,13 +343,6 @@ u16 Read_U16(const u32 _Address)
u32 Read_U32(const u32 _Address)
{
/*#if MAX_LOGLEVEL >= 4
if (_Address == 0x00000000)
{
//PanicAlert("Program tried to read from [00000000]");
//return 0x00000000;
}
#endif*/
u32 _var = 0;
ReadFromHardware<u32>(_var, _Address, _Address, FLAG_READ);
#ifdef ENABLE_MEM_CHECK

View File

@ -2751,7 +2751,6 @@ DEFINE_LUA_FUNCTION(emulua_loadrom, "filename")
// General settings
game_ini.Get("Core", "CPUOnThread", &StartUp.bCPUThread, StartUp.bCPUThread);
game_ini.Get("Core", "SkipIdle", &StartUp.bSkipIdle, StartUp.bSkipIdle);
game_ini.Get("Core", "OptimizeQuantizers", &StartUp.bOptimizeQuantizers, StartUp.bOptimizeQuantizers);
game_ini.Get("Core", "EnableFPRF", &StartUp.bEnableFPRF, StartUp.bEnableFPRF);
game_ini.Get("Core", "TLBHack", &StartUp.iTLBHack, StartUp.iTLBHack);
// Wii settings

View File

@ -42,6 +42,7 @@
#include "../PPCAnalyst.h"
#include "../JitCommon/JitCache.h"
#include "../JitCommon/Jit_Util.h"
#include "JitRegCache.h"
#include "x64Emitter.h"
#include "x64Analyzer.h"
@ -93,7 +94,7 @@ public:
};
class Jit64 : public Gen::XCodeBlock
class Jit64 : public EmuCodeBlock
{
private:
struct JitState
@ -182,26 +183,14 @@ public:
void WriteRfiExitDestInEAX();
void WriteCallInterpreter(UGeckoInstruction _inst);
void Cleanup();
void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false);
void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0);
void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset, bool signExtend = false);
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset);
void WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address);
void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address);
void GenerateCarry(Gen::X64Reg temp_reg);
void ForceSinglePrecisionS(Gen::X64Reg xmm);
void ForceSinglePrecisionP(Gen::X64Reg xmm);
void JitClearCA();
void JitSetCA();
void tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg));
typedef u32 (*Operation)(u32 a, u32 b);
void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false);
void fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg));
// OPCODES
void unknown_instruction(UGeckoInstruction _inst);
void Default(UGeckoInstruction _inst);

View File

@ -216,61 +216,6 @@ void AsmRoutineManager::Generate()
GenerateCommon();
}
void AsmRoutineManager::GenFifoWrite(int size)
{
// Assume value in ABI_PARAM1
PUSH(ESI);
if (size != 32)
PUSH(EDX);
BSWAP(size, ABI_PARAM1);
MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
if (size != 32) {
MOV(32, R(EDX), R(ABI_PARAM1));
MOV(size, MComplex(RAX, RSI, 1, 0), R(EDX));
} else {
MOV(size, MComplex(RAX, RSI, 1, 0), R(ABI_PARAM1));
}
ADD(32, R(ESI), Imm8(size >> 3));
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
if (size != 32)
POP(EDX);
POP(ESI);
RET();
}
void AsmRoutineManager::GenFifoFloatWrite()
{
// Assume value in XMM0
PUSH(ESI);
PUSH(EDX);
MOVSS(M(&temp32), XMM0);
MOV(32, R(EDX), M(&temp32));
BSWAP(32, EDX);
MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
MOV(32, MComplex(RAX, RSI, 1, 0), R(EDX));
ADD(32, R(ESI), Imm8(4));
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
POP(EDX);
POP(ESI);
RET();
}
void AsmRoutineManager::GenFifoXmm64Write()
{
// Assume value in XMM0. Assume pre-byteswapped (unlike the others here!)
PUSH(ESI);
MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
MOVQ_xmm(MComplex(RAX, RSI, 1, 0), XMM0);
ADD(32, R(ESI), Imm8(8));
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
POP(ESI);
RET();
}
void AsmRoutineManager::GenerateCommon()
{
// USES_CR
@ -298,7 +243,9 @@ void AsmRoutineManager::GenerateCommon()
fifoDirectWriteXmm64 = AlignCode4();
GenFifoXmm64Write();
computeRcFp = AlignCode16();
GenQuantizedLoads();
GenQuantizedStores();
//CMPSD(R(XMM0), M(&zero),
// TODO

View File

@ -19,6 +19,7 @@
#define _JITASM_H
#include "x64Emitter.h"
#include "../JitCommon/JitAsmCommon.h"
// In Dolphin, we don't use inline assembly. Instead, we generate all machine-near
// code at runtime. In the case of fixed code like this, after writing it, we write
@ -34,14 +35,11 @@
// To add a new asm routine, just add another const here, and add the code to Generate.
// Also, possibly increase the size of the code buffer.
class AsmRoutineManager : public Gen::XCodeBlock
class AsmRoutineManager : public CommonAsmRoutines
{
private:
void Generate();
void GenerateCommon();
void GenFifoWrite(int size);
void GenFifoFloatWrite();
void GenFifoXmm64Write();
public:
void Init() {
@ -65,7 +63,6 @@ public:
const u8 *fpException;
const u8 *computeRc;
const u8 *computeRcFp;
const u8 *testExceptions;
const u8 *dispatchPcInEAX;
const u8 *doTiming;

View File

@ -70,7 +70,6 @@ protected:
PPCCachedReg saved_regs[32];
X64CachedReg saved_xregs[NUMXREGS];
void DiscardRegContentsIfCached(int preg);
virtual const int *GetAllocationOrder(int &count) = 0;
XEmitter *emit;
@ -79,6 +78,7 @@ public:
virtual ~RegCache() {}
virtual void Start(PPCAnalyst::BlockRegStats &stats) = 0;
void DiscardRegContentsIfCached(int preg);
void SetEmitter(XEmitter *emitter) {emit = emitter;}
void FlushR(X64Reg reg);

View File

@ -39,7 +39,7 @@ const u8 GC_ALIGNED16(pbswapShuffleNoop[16]) = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
static double GC_ALIGNED16(psTemp[2]) = {1.0, 1.0};
static u64 GC_ALIGNED16(temp64);
// TODO(ector): Improve 64-bit version
static void WriteDual32(u64 value, u32 address)
{
@ -95,27 +95,23 @@ void Jit64::psq_st(UGeckoInstruction inst)
JITDISABLE(LoadStorePaired)
js.block_flags |= BLOCK_USE_GQR0 << inst.I;
if (js.blockSetsQuantizers || !Core::GetStartupParameter().bOptimizeQuantizers)
if (js.blockSetsQuantizers || !inst.RA)
{
Default(inst);
return;
}
if (!inst.RA)
{
// This really should never happen. Unless we change this to also support stwux
// TODO: Support these cases if it becomes necessary.
Default(inst);
return;
}
const UGQR gqr(rSPR(SPR_GQR0 + inst.I));
const EQuantizeType stType = static_cast<EQuantizeType>(gqr.ST_TYPE);
int stScale = gqr.ST_SCALE;
bool update = inst.OPCD == 61;
int offset = inst.SIMM_12;
int a = inst.RA;
int s = inst.RS; // Fp numbers
const UGQR gqr(rSPR(SPR_GQR0 + inst.I));
const EQuantizeType stType = static_cast<EQuantizeType>(gqr.ST_TYPE);
int stScale = gqr.ST_SCALE;
if (inst.W) {
// PanicAlert("W=1: stType %i stScale %i update %i", (int)stType, (int)stScale, (int)update);
// It's fairly common that games write stuff to the pipe using this. Then, it's pretty much only
@ -165,9 +161,11 @@ void Jit64::psq_st(UGeckoInstruction inst)
Default(inst);
return;
}
return;
}
// Is this specialization still worth it? Let's keep it for now. It's probably
// not very risky since a game most likely wouldn't use the same code to process
// floats as integers (but you never know....).
if (stType == QUANTIZE_FLOAT)
{
if (gpr.R(a).IsImm() && !update && cpu_info.bSSSE3)
@ -182,115 +180,30 @@ void Jit64::psq_st(UGeckoInstruction inst)
return;
}
}
}
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
gpr.Lock(a);
fpr.Lock(s);
if (update)
gpr.LoadToX64(a, true, true);
MOV(32, R(ABI_PARAM2), gpr.R(a));
if (offset)
ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
TEST(32, R(ABI_PARAM2), Imm32(0x0C000000));
if (update && offset)
MOV(32, gpr.R(a), R(ABI_PARAM2));
CVTPD2PS(XMM0, fpr.R(s));
SHUFPS(XMM0, R(XMM0), 1);
MOVQ_xmm(M(&temp64), XMM0);
#ifdef _M_X64
MOV(64, R(ABI_PARAM1), M(&temp64));
FixupBranch argh = J_CC(CC_NZ);
BSWAP(64, ABI_PARAM1);
MOV(64, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1));
FixupBranch arg2 = J();
SetJumpTarget(argh);
CALL(thunks.ProtectFunction((void *)&WriteDual32, 0));
gpr.FlushLockX(EAX, EDX);
gpr.FlushLockX(ECX);
if (update)
gpr.LoadToX64(inst.RA, true, true);
fpr.LoadToX64(inst.RS, true);
MOV(32, R(ECX), gpr.R(inst.RA));
if (offset)
ADD(32, R(ECX), Imm32((u32)offset));
if (update && offset)
MOV(32, gpr.R(a), R(ECX));
MOVZX(32, 16, EAX, M(&PowerPC::ppcState.spr[SPR_GQR0 + inst.I]));
MOVZX(32, 8, EDX, R(AL));
// FIXME: Fix ModR/M encoding to allow [EDX*4+disp32]!
#ifdef _M_IX86
SHL(32, R(EDX), Imm8(2));
#else
FixupBranch argh = J_CC(CC_NZ);
MOV(32, R(ABI_PARAM1), M(((char*)&temp64) + 4));
BSWAP(32, ABI_PARAM1);
AND(32, R(ABI_PARAM2), Imm32(Memory::MEMVIEW32_MASK));
MOV(32, MDisp(ABI_PARAM2, (u32)Memory::base), R(ABI_PARAM1));
MOV(32, R(ABI_PARAM1), M(&temp64));
BSWAP(32, ABI_PARAM1);
MOV(32, MDisp(ABI_PARAM2, 4+(u32)Memory::base), R(ABI_PARAM1));
FixupBranch arg2 = J();
SetJumpTarget(argh);
MOV(32, R(ABI_PARAM1), M(((char*)&temp64) + 4));
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2);
MOV(32, R(ABI_PARAM1), M(((char*)&temp64)));
ADD(32, R(ABI_PARAM2), Imm32(4));
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2);
SHL(32, R(EDX), Imm8(3));
#endif
SetJumpTarget(arg2);
gpr.UnlockAll();
gpr.UnlockAllX();
fpr.UnlockAll();
}
else if (stType == QUANTIZE_U8)
{
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
gpr.Lock(a);
fpr.Lock(s);
if (update)
gpr.LoadToX64(a, true, update);
MOV(32, R(ABI_PARAM2), gpr.R(a));
if (offset)
ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
if (update && offset)
MOV(32, gpr.R(a), R(ABI_PARAM2));
MOVAPD(XMM0, fpr.R(s));
MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale]));
MULPD(XMM0, R(XMM1));
CVTPD2DQ(XMM0, R(XMM0));
PACKSSDW(XMM0, R(XMM0));
PACKUSWB(XMM0, R(XMM0));
MOVD_xmm(M(&temp64), XMM0);
MOV(16, R(ABI_PARAM1), M(&temp64));
#ifdef _M_X64
MOV(16, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1));
#else
MOV(32, R(EAX), R(ABI_PARAM2));
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
MOV(16, MDisp(EAX, (u32)Memory::base), R(ABI_PARAM1));
#endif
if (update)
MOV(32, gpr.R(a), R(ABI_PARAM2));
gpr.UnlockAll();
gpr.UnlockAllX();
fpr.UnlockAll();
}
else if (stType == QUANTIZE_S16)
{
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
gpr.Lock(a);
fpr.Lock(s);
if (update)
gpr.LoadToX64(a, true, update);
MOV(32, R(ABI_PARAM2), gpr.R(a));
if (offset)
ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
if (update)
MOV(32, gpr.R(a), R(ABI_PARAM2));
MOVAPD(XMM0, fpr.R(s));
MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale]));
MULPD(XMM0, R(XMM1));
SHUFPD(XMM0, R(XMM0), 1);
CVTPD2DQ(XMM0, R(XMM0));
PACKSSDW(XMM0, R(XMM0));
MOVD_xmm(M(&temp64), XMM0);
MOV(32, R(ABI_PARAM1), M(&temp64));
SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, 32, 0);
gpr.UnlockAll();
gpr.UnlockAllX();
fpr.UnlockAll();
}
else {
// Dodger uses this.
// mario tennis
//PanicAlert("st %i:%i", stType, inst.W);
Default(inst);
}
CVTPD2PS(XMM0, fpr.R(s));
CALLptr(MDisp(EDX, (u32)(u64)asm_routines.pairedStoreQuantized));
gpr.UnlockAll();
gpr.UnlockAllX();
}
void Jit64::psq_l(UGeckoInstruction inst)
@ -300,144 +213,35 @@ void Jit64::psq_l(UGeckoInstruction inst)
js.block_flags |= BLOCK_USE_GQR0 << inst.I;
if (js.blockSetsQuantizers || !Core::GetStartupParameter().bOptimizeQuantizers)
if (js.blockSetsQuantizers || !inst.RA || inst.W)
{
Default(inst);
return;
}
const UGQR gqr(rSPR(SPR_GQR0 + inst.I));
const EQuantizeType ldType = static_cast<EQuantizeType>(gqr.LD_TYPE);
int ldScale = gqr.LD_SCALE;
bool update = inst.OPCD == 57;
if (!inst.RA || inst.W)
{
// 0 1 during load
//PanicAlert("ld:%i %i", ldType, (int)inst.W);
Default(inst);
return;
}
int offset = inst.SIMM_12;
switch (ldType) {
case QUANTIZE_FLOAT: // We know this is from RAM, so we don't need to check the address.
{
#ifdef _M_X64
gpr.LoadToX64(inst.RA, true, update);
fpr.LoadToX64(inst.RS, false);
if (cpu_info.bSSSE3) {
X64Reg xd = fpr.R(inst.RS).GetSimpleReg();
MOVQ_xmm(xd, MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
PSHUFB(xd, M((void *)pbswapShuffle2x4));
CVTPS2PD(xd, R(xd));
} else {
MOV(64, R(RAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
BSWAP(64, RAX);
MOV(64, M(&psTemp[0]), R(RAX));
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
CVTPS2PD(r, M(&psTemp[0]));
SHUFPD(r, R(r), 1);
}
if (update && offset != 0)
ADD(32, gpr.R(inst.RA), Imm32(offset));
break;
#else
if (cpu_info.bSSSE3) {
gpr.LoadToX64(inst.RA, true, update);
fpr.LoadToX64(inst.RS, false);
X64Reg xd = fpr.R(inst.RS).GetSimpleReg();
MOV(32, R(EAX), gpr.R(inst.RA));
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
MOVQ_xmm(xd, MDisp(EAX, (u32)Memory::base + offset));
PSHUFB(xd, M((void *)pbswapShuffle2x4));
CVTPS2PD(xd, R(xd));
} else {
gpr.FlushLockX(ECX);
gpr.LoadToX64(inst.RA, true, update);
// This can probably be optimized somewhat.
LEA(32, ECX, MDisp(gpr.R(inst.RA).GetSimpleReg(), offset));
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base));
BSWAP(32, RAX);
MOV(32, M(&psTemp[0]), R(RAX));
MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base + 4));
BSWAP(32, RAX);
MOV(32, M(((float *)&psTemp[0]) + 1), R(RAX));
fpr.LoadToX64(inst.RS, false, true);
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
CVTPS2PD(r, M(&psTemp[0]));
gpr.UnlockAllX();
}
if (update && offset != 0)
ADD(32, gpr.R(inst.RA), Imm32(offset));
break;
#endif
}
case QUANTIZE_U8:
{
gpr.LoadToX64(inst.RA, true, update);
#ifdef _M_X64
MOVZX(32, 16, EAX, MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
#else
LEA(32, EAX, MDisp(gpr.R(inst.RA).GetSimpleReg(), offset));
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
MOVZX(32, 16, EAX, MDisp(EAX, (u32)Memory::base));
#endif
MOV(32, M(&temp64), R(EAX));
MOVD_xmm(XMM0, M(&temp64));
// SSE4 optimization opportunity here.
PXOR(XMM1, R(XMM1));
PUNPCKLBW(XMM0, R(XMM1));
PUNPCKLWD(XMM0, R(XMM1));
CVTDQ2PD(XMM0, R(XMM0));
fpr.LoadToX64(inst.RS, false, true);
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
MOVDDUP(r, M((void *)&m_dequantizeTableD[ldScale]));
MULPD(r, R(XMM0));
if (update && offset != 0)
ADD(32, gpr.R(inst.RA), Imm32(offset));
}
break;
case QUANTIZE_S16:
{
gpr.LoadToX64(inst.RA, true, update);
#ifdef _M_X64
MOV(32, R(EAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
#else
LEA(32, EAX, MDisp(gpr.R(inst.RA).GetSimpleReg(), offset));
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
MOV(32, R(EAX), MDisp(EAX, (u32)Memory::base));
#endif
BSWAP(32, EAX);
MOV(32, M(&temp64), R(EAX));
fpr.LoadToX64(inst.RS, false, true);
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
MOVD_xmm(XMM0, M(&temp64));
PUNPCKLWD(XMM0, R(XMM0)); // unpack to higher word in each dword..
PSRAD(XMM0, 16); // then use this signed shift to sign extend. clever eh? :P
CVTDQ2PD(XMM0, R(XMM0));
MOVDDUP(r, M((void*)&m_dequantizeTableD[ldScale]));
MULPD(r, R(XMM0));
SHUFPD(r, R(r), 1);
if (update && offset != 0)
ADD(32, gpr.R(inst.RA), Imm32(offset));
}
break;
/*
Dynamic quantizer. Todo when we have a test set.
MOVZX(32, 8, EAX, M(((char *)&PowerPC::ppcState.spr[SPR_GQR0 + inst.I]) + 3)); // it's in the high byte.
AND(32, R(EAX), Imm8(0x3F));
MOV(32, R(ECX), Imm32((u32)&m_dequantizeTableD));
MOVDDUP(r, MComplex(RCX, EAX, 8, 0));
*/
default:
// 4 0
// 6 0 //power tennis
// 5 0
// PanicAlert("ld:%i %i", ldType, (int)inst.W);
Default(inst);
return;
}
//u32 EA = (m_GPR[_inst.RA] + _inst.SIMM_12) : _inst.SIMM_12;
gpr.FlushLockX(EAX, EDX);
gpr.FlushLockX(ECX);
gpr.LoadToX64(inst.RA, true, true);
fpr.LoadToX64(inst.RS, false, true);
if (offset)
LEA(32, ECX, MDisp(gpr.RX(inst.RA), offset));
else
MOV(32, R(ECX), gpr.R(inst.RA));
if (update && offset)
MOV(32, gpr.R(inst.RA), R(ECX));
MOVZX(32, 16, EAX, M(((char *)&GQR(inst.I)) + 2));
MOVZX(32, 8, EDX, R(AL));
// FIXME: Fix ModR/M encoding to allow [EDX*4+disp32]! (MComplex can do this, no?)
#ifdef _M_IX86
SHL(32, R(EDX), Imm8(2));
#else
SHL(32, R(EDX), Imm8(3));
#endif
CALLptr(MDisp(EDX, (u32)(u64)asm_routines.pairedLoadQuantized));
CVTPS2PD(fpr.RX(inst.RS), R(XMM0));
gpr.UnlockAll();
gpr.UnlockAllX();
}

View File

@ -32,6 +32,7 @@
#include "../PPCAnalyst.h"
#include "../JitCommon/JitCache.h"
#include "../JitCommon/Jit_Util.h"
#include "x64Emitter.h"
#include "x64Analyzer.h"
#include "IR.h"
@ -85,7 +86,7 @@ public:
};
class Jit64 : public Gen::XCodeBlock
class Jit64 : public EmuCodeBlock
{
private:
struct JitState
@ -175,19 +176,10 @@ public:
void WriteCallInterpreter(UGeckoInstruction _inst);
void Cleanup();
void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false);
void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0);
void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset, bool signExtend = false);
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset);
void WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address);
void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address);
void GenerateCarry(Gen::X64Reg temp_reg);
void ForceSinglePrecisionS(Gen::X64Reg xmm);
void ForceSinglePrecisionP(Gen::X64Reg xmm);
void JitClearCA();
void JitSetCA();
void tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg));
typedef u32 (*Operation)(u32 a, u32 b);
void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false);

View File

@ -215,403 +215,6 @@ void AsmRoutineManager::Generate()
GenerateCommon();
}
const u8 GC_ALIGNED16(pbswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
const float m_quantizeTableS[] =
{
(1 << 0), (1 << 1), (1 << 2), (1 << 3),
(1 << 4), (1 << 5), (1 << 6), (1 << 7),
(1 << 8), (1 << 9), (1 << 10), (1 << 11),
(1 << 12), (1 << 13), (1 << 14), (1 << 15),
(1 << 16), (1 << 17), (1 << 18), (1 << 19),
(1 << 20), (1 << 21), (1 << 22), (1 << 23),
(1 << 24), (1 << 25), (1 << 26), (1 << 27),
(1 << 28), (1 << 29), (1 << 30), (1 << 31),
1.0 / (1ULL << 32), 1.0 / (1 << 31), 1.0 / (1 << 30), 1.0 / (1 << 29),
1.0 / (1 << 28), 1.0 / (1 << 27), 1.0 / (1 << 26), 1.0 / (1 << 25),
1.0 / (1 << 24), 1.0 / (1 << 23), 1.0 / (1 << 22), 1.0 / (1 << 21),
1.0 / (1 << 20), 1.0 / (1 << 19), 1.0 / (1 << 18), 1.0 / (1 << 17),
1.0 / (1 << 16), 1.0 / (1 << 15), 1.0 / (1 << 14), 1.0 / (1 << 13),
1.0 / (1 << 12), 1.0 / (1 << 11), 1.0 / (1 << 10), 1.0 / (1 << 9),
1.0 / (1 << 8), 1.0 / (1 << 7), 1.0 / (1 << 6), 1.0 / (1 << 5),
1.0 / (1 << 4), 1.0 / (1 << 3), 1.0 / (1 << 2), 1.0 / (1 << 1),
};
const float m_dequantizeTableS[] =
{
1.0 / (1 << 0), 1.0 / (1 << 1), 1.0 / (1 << 2), 1.0 / (1 << 3),
1.0 / (1 << 4), 1.0 / (1 << 5), 1.0 / (1 << 6), 1.0 / (1 << 7),
1.0 / (1 << 8), 1.0 / (1 << 9), 1.0 / (1 << 10), 1.0 / (1 << 11),
1.0 / (1 << 12), 1.0 / (1 << 13), 1.0 / (1 << 14), 1.0 / (1 << 15),
1.0 / (1 << 16), 1.0 / (1 << 17), 1.0 / (1 << 18), 1.0 / (1 << 19),
1.0 / (1 << 20), 1.0 / (1 << 21), 1.0 / (1 << 22), 1.0 / (1 << 23),
1.0 / (1 << 24), 1.0 / (1 << 25), 1.0 / (1 << 26), 1.0 / (1 << 27),
1.0 / (1 << 28), 1.0 / (1 << 29), 1.0 / (1 << 30), 1.0 / (1 << 31),
(1ULL << 32), (1 << 31), (1 << 30), (1 << 29),
(1 << 28), (1 << 27), (1 << 26), (1 << 25),
(1 << 24), (1 << 23), (1 << 22), (1 << 21),
(1 << 20), (1 << 19), (1 << 18), (1 << 17),
(1 << 16), (1 << 15), (1 << 14), (1 << 13),
(1 << 12), (1 << 11), (1 << 10), (1 << 9),
(1 << 8), (1 << 7), (1 << 6), (1 << 5),
(1 << 4), (1 << 3), (1 << 2), (1 << 1),
};
float psTemp[2];
const float m_65535 = 65535.0f;
#define QUANTIZE_OVERFLOW_SAFE
// according to Intel Docs CVTPS2DQ writes 0x80000000 if the source floating point value is out of int32 range
// while it's OK for large negatives, it isn't for positives
// I don't know whether the overflow actually happens in any games
// but it potentially can cause problems, so we need some clamping
// TODO(ector): Improve 64-bit version
static void WriteDual32(u64 value, u32 address)
{
Memory::Write_U32((u32)(value >> 32), address);
Memory::Write_U32((u32)value, address + 4);
}
void AsmRoutineManager::GenQuantizedStores() {
const u8* storePairedIllegal = AlignCode4();
UD2();
const u8* storePairedFloat = AlignCode4();
// IN: value = XMM0, two singles in bottom. PPC address = ECX.
#ifdef _M_X64
// INT3();
MOVQ_xmm(M(&psTemp[0]), XMM0);
MOV(64, R(RAX), M(&psTemp[0]));
//INT3();
//MOVQ_xmm(R(RAX), XMM0);
//INT3();
ROL(64, R(RAX), Imm8(32)); // Swap the two - the big BSWAP will unswap.
TEST(32, R(ECX), Imm32(0x0C000000));
FixupBranch argh = J_CC(CC_NZ);
BSWAP(64, RAX);
MOV(64, MComplex(RBX, RCX, SCALE_1, 0), R(RAX));
FixupBranch arg2 = J();
SetJumpTarget(argh);
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&WriteDual32, 2), RAX, RCX);
SetJumpTarget(arg2);
#else
MOVQ_xmm(M(&psTemp[0]), XMM0);
TEST(32, R(ECX), Imm32(0x0C000000));
FixupBranch argh = J_CC(CC_NZ);
MOV(32, R(EAX), M(&psTemp));
BSWAP(32, EAX);
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOV(32, MDisp(ECX, (u32)Memory::base), R(EAX));
MOV(32, R(EAX), M(((char*)&psTemp) + 4));
BSWAP(32, EAX);
MOV(32, MDisp(ECX, 4+(u32)Memory::base), R(EAX));
FixupBranch arg2 = J();
SetJumpTarget(argh);
MOV(32, R(EAX), M(((char*)&psTemp)));
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX);
MOV(32, R(EAX), M(((char*)&psTemp)+4));
ADD(32, R(ECX), Imm32(4));
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX);
SetJumpTarget(arg2);
#endif
RET();
const u8* storePairedU8 = AlignCode4();
//INT3();
SHR(32, R(EAX), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1));
#ifdef QUANTIZE_OVERFLOW_SAFE
MOVSS(XMM1, M((void *)&m_65535));
PUNPCKLDQ(XMM1, R(XMM1));
MINPS(XMM0, R(XMM1));
#endif
CVTPS2DQ(XMM0, R(XMM0));
PACKSSDW(XMM0, R(XMM0));
PACKUSWB(XMM0, R(XMM0));
MOVD_xmm(R(EAX), XMM0);
#ifdef _M_X64
MOV(16, MComplex(RBX, RCX, 1, 0), R(AX));
#else
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOV(16, MDisp(ECX, (u32)Memory::base), R(AX));
#endif
RET();
const u8* storePairedS8 = AlignCode4();
//INT3();
SHR(32, R(EAX), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1));
#ifdef QUANTIZE_OVERFLOW_SAFE
MOVSS(XMM1, M((void *)&m_65535));
PUNPCKLDQ(XMM1, R(XMM1));
MINPS(XMM0, R(XMM1));
#endif
CVTPS2DQ(XMM0, R(XMM0));
PACKSSDW(XMM0, R(XMM0));
PACKSSWB(XMM0, R(XMM0));
MOVD_xmm(R(EAX), XMM0);
#ifdef _M_X64
MOV(16, MComplex(RBX, RCX, 1, 0), R(AX));
#else
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOV(16, MDisp(ECX, (u32)Memory::base), R(AX));
#endif
RET();
const u8* storePairedU16 = AlignCode4();
//INT3();
SHR(32, R(EAX), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1));
// PACKUSDW is available only in SSE4
PXOR(XMM1, R(XMM1));
MAXPS(XMM0, R(XMM1));
MOVSS(XMM1, M((void *)&m_65535));
PUNPCKLDQ(XMM1, R(XMM1));
MINPS(XMM0, R(XMM1));
CVTPS2DQ(XMM0, R(XMM0));
MOVQ_xmm(M(psTemp), XMM0);
// place ps[0] into the higher word, ps[1] into the lower
// so no need in ROL after BSWAP
MOVZX(32, 16, EAX, M((char*)psTemp + 0));
SHL(32, R(EAX), Imm8(16));
MOV(16, R(AX), M((char*)psTemp + 4));
BSWAP(32, EAX);
//ROL(32, R(EAX), Imm8(16));
#ifdef _M_X64
MOV(32, MComplex(RBX, RCX, 1, 0), R(EAX));
#else
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOV(32, MDisp(ECX, (u32)Memory::base), R(EAX));
#endif
RET();
const u8* storePairedS16 = AlignCode4();
//INT3();
SHR(32, R(EAX), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1));
#ifdef QUANTIZE_OVERFLOW_SAFE
MOVSS(XMM1, M((void *)&m_65535));
PUNPCKLDQ(XMM1, R(XMM1));
MINPS(XMM0, R(XMM1));
#endif
CVTPS2DQ(XMM0, R(XMM0));
PACKSSDW(XMM0, R(XMM0));
MOVD_xmm(R(EAX), XMM0);
BSWAP(32, EAX);
ROL(32, R(EAX), Imm8(16));
#ifdef _M_X64
MOV(32, MComplex(RBX, RCX, 1, 0), R(EAX));
#else
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOV(32, MDisp(ECX, (u32)Memory::base), R(EAX));
#endif
RET();
pairedStoreQuantized[0] = storePairedFloat;
pairedStoreQuantized[1] = storePairedIllegal;
pairedStoreQuantized[2] = storePairedIllegal;
pairedStoreQuantized[3] = storePairedIllegal;
pairedStoreQuantized[4] = storePairedU8;
pairedStoreQuantized[5] = storePairedU16;
pairedStoreQuantized[6] = storePairedS8;
pairedStoreQuantized[7] = storePairedS16;
}
void AsmRoutineManager::GenQuantizedLoads() {
const u8* loadPairedIllegal = AlignCode4();
UD2();
const u8* loadPairedFloat = AlignCode4();
if (cpu_info.bSSSE3) {
#ifdef _M_X64
MOVQ_xmm(XMM0, MComplex(RBX, RCX, 1, 0));
#else
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOVQ_xmm(XMM0, MDisp(ECX, (u32)Memory::base));
#endif
PSHUFB(XMM0, M((void *)pbswapShuffle2x4));
} else {
#ifdef _M_X64
MOV(64, R(RCX), MComplex(RBX, RCX, 1, 0));
BSWAP(64, RCX);
ROL(64, R(RCX), Imm8(32));
MOVQ_xmm(XMM0, R(RCX));
#else
#if 0
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOVQ_xmm(XMM0, MDisp(ECX, (u32)Memory::base));
PXOR(XMM1, R(XMM1));
PSHUFLW(XMM0, R(XMM0), 0xB1);
MOVAPD(XMM1, R(XMM0));
PSRLW(XMM0, 8);
PSLLW(XMM1, 8);
POR(XMM0, R(XMM1));
#else
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base));
BSWAP(32, EAX);
MOV(32, M(&psTemp[0]), R(RAX));
MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base + 4));
BSWAP(32, EAX);
MOV(32, M(((float *)&psTemp[0]) + 1), R(RAX));
MOVQ_xmm(XMM0, M(&psTemp[0]));
#endif
#endif
}
RET();
const u8* loadPairedU8 = AlignCode4();
#ifdef _M_X64
MOVZX(32, 16, ECX, MComplex(RBX, RCX, 1, 0));
#else
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOVZX(32, 16, ECX, MDisp(ECX, (u32)Memory::base));
#endif
MOVD_xmm(XMM0, R(ECX));
PXOR(XMM1, R(XMM1));
PUNPCKLBW(XMM0, R(XMM1));
PUNPCKLWD(XMM0, R(XMM1));
CVTDQ2PS(XMM0, R(XMM0));
SHR(32, R(EAX), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1));
RET();
const u8* loadPairedS8 = AlignCode4();
#ifdef _M_X64
MOVZX(32, 16, ECX, MComplex(RBX, RCX, 1, 0));
#else
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOVZX(32, 16, ECX, MDisp(ECX, (u32)Memory::base));
#endif
MOVD_xmm(XMM0, R(ECX));
PUNPCKLBW(XMM0, R(XMM0));
PUNPCKLWD(XMM0, R(XMM0));
PSRAD(XMM0, 24);
CVTDQ2PS(XMM0, R(XMM0));
SHR(32, R(EAX), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1));
RET();
const u8* loadPairedU16 = AlignCode4();
#ifdef _M_X64
MOV(32, R(ECX), MComplex(RBX, RCX, 1, 0));
#else
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOV(32, R(ECX), MDisp(ECX, (u32)Memory::base));
#endif
BSWAP(32, ECX);
ROL(32, R(ECX), Imm8(16));
MOVD_xmm(XMM0, R(ECX));
PXOR(XMM1, R(XMM1));
PUNPCKLWD(XMM0, R(XMM1));
CVTDQ2PS(XMM0, R(XMM0));
SHR(32, R(EAX), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1));
RET();
const u8* loadPairedS16 = AlignCode4();
#ifdef _M_X64
MOV(32, R(ECX), MComplex(RBX, RCX, 1, 0));
#else
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOV(32, R(ECX), MDisp(ECX, (u32)Memory::base));
#endif
BSWAP(32, ECX);
ROL(32, R(ECX), Imm8(16));
MOVD_xmm(XMM0, R(ECX));
PUNPCKLWD(XMM0, R(XMM0));
PSRAD(XMM0, 16);
CVTDQ2PS(XMM0, R(XMM0));
SHR(32, R(EAX), Imm8(6));
AND(32, R(EAX), Imm32(0xFC));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1));
RET();
pairedLoadQuantized[0] = loadPairedFloat;
pairedLoadQuantized[1] = loadPairedIllegal;
pairedLoadQuantized[2] = loadPairedIllegal;
pairedLoadQuantized[3] = loadPairedIllegal;
pairedLoadQuantized[4] = loadPairedU8;
pairedLoadQuantized[5] = loadPairedU16;
pairedLoadQuantized[6] = loadPairedS8;
pairedLoadQuantized[7] = loadPairedS16;
}
void AsmRoutineManager::GenFifoWrite(int size)
{
// Assume value in ABI_PARAM1
PUSH(ESI);
if (size != 32)
PUSH(EDX);
BSWAP(size, ABI_PARAM1);
MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
if (size != 32) {
MOV(32, R(EDX), R(ABI_PARAM1));
MOV(size, MComplex(RAX, RSI, 1, 0), R(EDX));
} else {
MOV(size, MComplex(RAX, RSI, 1, 0), R(ABI_PARAM1));
}
ADD(32, R(ESI), Imm8(size >> 3));
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
if (size != 32)
POP(EDX);
POP(ESI);
RET();
}
void AsmRoutineManager::GenFifoFloatWrite()
{
// Assume value in XMM0
PUSH(ESI);
PUSH(EDX);
MOVSS(M(&temp32), XMM0);
MOV(32, R(EDX), M(&temp32));
BSWAP(32, EDX);
MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
MOV(32, MComplex(RAX, RSI, 1, 0), R(EDX));
ADD(32, R(ESI), Imm8(4));
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
POP(EDX);
POP(ESI);
RET();
}
void AsmRoutineManager::GenFifoXmm64Write()
{
// Assume value in XMM0. Assume pre-byteswapped (unlike the others here!)
PUSH(ESI);
MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
MOVQ_xmm(MComplex(RAX, RSI, 1, 0), XMM0);
ADD(32, R(ESI), Imm8(8));
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
POP(ESI);
RET();
}
void AsmRoutineManager::GenerateCommon()
{
// USES_CR
@ -649,7 +252,6 @@ void AsmRoutineManager::GenerateCommon()
GenQuantizedLoads();
GenQuantizedStores();
computeRcFp = AlignCode16();
//CMPSD(R(XMM0), M(&zero),
// TODO

View File

@ -19,6 +19,7 @@
#define _JITASM_H
#include "x64Emitter.h"
#include "../JitCommon/JitAsmCommon.h"
// In Dolphin, we don't use inline assembly. Instead, we generate all machine-near
// code at runtime. In the case of fixed code like this, after writing it, we write
@ -34,16 +35,11 @@
// To add a new asm routine, just add another const here, and add the code to Generate.
// Also, possibly increase the size of the code buffer.
class AsmRoutineManager : public Gen::XCodeBlock
class AsmRoutineManager : public CommonAsmRoutines
{
private:
void Generate();
void GenerateCommon();
void GenFifoWrite(int size);
void GenFifoFloatWrite();
void GenFifoXmm64Write(); // yes, 32 & 64-bit compatible
void GenQuantizedLoads();
void GenQuantizedStores();
public:
void Init() {
@ -67,7 +63,6 @@ public:
const u8 *fpException;
const u8 *computeRc;
const u8 *computeRcFp;
const u8 *testExceptions;
const u8 *dispatchPcInEAX;
const u8 *doTiming;
@ -82,8 +77,6 @@ public:
const u8 *doReJit;
const u8 *pairedLoadQuantized[8];
const u8 *pairedStoreQuantized[8];
bool compareEnabled;
};

View File

@ -15,9 +15,6 @@
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
// TODO(ector): Tons of pshufb optimization of the loads/stores, for SSSE3+, possibly SSE4, only.
// Should give a very noticable speed boost to paired single heavy code.
#include "Common.h"
#include "Thunk.h"
@ -39,9 +36,8 @@
void Jit64::psq_st(UGeckoInstruction inst)
{
INSTRUCTION_START
DISABLE64
JITDISABLE(LoadStorePaired)
if (inst.W || !Core::GetStartupParameter().bOptimizeQuantizers) {Default(inst); return;}
if (inst.W) {Default(inst); return;}
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_12), val;
if (inst.RA)
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
@ -55,9 +51,8 @@ void Jit64::psq_st(UGeckoInstruction inst)
void Jit64::psq_l(UGeckoInstruction inst)
{
INSTRUCTION_START
DISABLE64
JITDISABLE(LoadStorePaired)
if (inst.W || !Core::GetStartupParameter().bOptimizeQuantizers) {Default(inst); return;}
if (inst.W) {Default(inst); return;}
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_12), val;
if (inst.RA)
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));

View File

@ -0,0 +1,394 @@
// Copyright (C) 2003 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include "ABI.h"
#include "Thunk.h"
#include "CPUDetect.h"
#include "x64Emitter.h"
#include "../../HW/Memmap.h"
#include "../PowerPC.h"
#include "../../CoreTiming.h"
#include "MemoryUtil.h"
#include "ABI.h"
#include "../JitCommon/JitCache.h"
#include "../../HW/GPFifo.h"
#include "../../Core.h"
#include "JitAsmCommon.h"
using namespace Gen;
static int temp32;
void CommonAsmRoutines::GenFifoWrite(int size)
{
// Assume value in ABI_PARAM1
PUSH(ESI);
if (size != 32)
PUSH(EDX);
BSWAP(size, ABI_PARAM1);
MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
if (size != 32) {
MOV(32, R(EDX), R(ABI_PARAM1));
MOV(size, MComplex(RAX, RSI, 1, 0), R(EDX));
} else {
MOV(size, MComplex(RAX, RSI, 1, 0), R(ABI_PARAM1));
}
ADD(32, R(ESI), Imm8(size >> 3));
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
if (size != 32)
POP(EDX);
POP(ESI);
RET();
}
void CommonAsmRoutines::GenFifoFloatWrite()
{
// Assume value in XMM0
PUSH(ESI);
PUSH(EDX);
MOVSS(M(&temp32), XMM0);
MOV(32, R(EDX), M(&temp32));
BSWAP(32, EDX);
MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
MOV(32, MComplex(RAX, RSI, 1, 0), R(EDX));
ADD(32, R(ESI), Imm8(4));
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
POP(EDX);
POP(ESI);
RET();
}
void CommonAsmRoutines::GenFifoXmm64Write()
{
// Assume value in XMM0. Assume pre-byteswapped (unlike the others here!)
PUSH(ESI);
MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
MOVQ_xmm(MComplex(RAX, RSI, 1, 0), XMM0);
ADD(32, R(ESI), Imm8(8));
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
POP(ESI);
RET();
}
// Safe + Fast Quantizers, originally from JITIL by magumagu
static const u8 GC_ALIGNED16(pbswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
static const float GC_ALIGNED16(m_quantizeTableS[]) =
{
(1 << 0), (1 << 1), (1 << 2), (1 << 3),
(1 << 4), (1 << 5), (1 << 6), (1 << 7),
(1 << 8), (1 << 9), (1 << 10), (1 << 11),
(1 << 12), (1 << 13), (1 << 14), (1 << 15),
(1 << 16), (1 << 17), (1 << 18), (1 << 19),
(1 << 20), (1 << 21), (1 << 22), (1 << 23),
(1 << 24), (1 << 25), (1 << 26), (1 << 27),
(1 << 28), (1 << 29), (1 << 30), (1 << 31),
1.0 / (1ULL << 32), 1.0 / (1 << 31), 1.0 / (1 << 30), 1.0 / (1 << 29),
1.0 / (1 << 28), 1.0 / (1 << 27), 1.0 / (1 << 26), 1.0 / (1 << 25),
1.0 / (1 << 24), 1.0 / (1 << 23), 1.0 / (1 << 22), 1.0 / (1 << 21),
1.0 / (1 << 20), 1.0 / (1 << 19), 1.0 / (1 << 18), 1.0 / (1 << 17),
1.0 / (1 << 16), 1.0 / (1 << 15), 1.0 / (1 << 14), 1.0 / (1 << 13),
1.0 / (1 << 12), 1.0 / (1 << 11), 1.0 / (1 << 10), 1.0 / (1 << 9),
1.0 / (1 << 8), 1.0 / (1 << 7), 1.0 / (1 << 6), 1.0 / (1 << 5),
1.0 / (1 << 4), 1.0 / (1 << 3), 1.0 / (1 << 2), 1.0 / (1 << 1),
};
static const float GC_ALIGNED16(m_dequantizeTableS[]) =
{
1.0 / (1 << 0), 1.0 / (1 << 1), 1.0 / (1 << 2), 1.0 / (1 << 3),
1.0 / (1 << 4), 1.0 / (1 << 5), 1.0 / (1 << 6), 1.0 / (1 << 7),
1.0 / (1 << 8), 1.0 / (1 << 9), 1.0 / (1 << 10), 1.0 / (1 << 11),
1.0 / (1 << 12), 1.0 / (1 << 13), 1.0 / (1 << 14), 1.0 / (1 << 15),
1.0 / (1 << 16), 1.0 / (1 << 17), 1.0 / (1 << 18), 1.0 / (1 << 19),
1.0 / (1 << 20), 1.0 / (1 << 21), 1.0 / (1 << 22), 1.0 / (1 << 23),
1.0 / (1 << 24), 1.0 / (1 << 25), 1.0 / (1 << 26), 1.0 / (1 << 27),
1.0 / (1 << 28), 1.0 / (1 << 29), 1.0 / (1 << 30), 1.0 / (1 << 31),
(1ULL << 32), (1 << 31), (1 << 30), (1 << 29),
(1 << 28), (1 << 27), (1 << 26), (1 << 25),
(1 << 24), (1 << 23), (1 << 22), (1 << 21),
(1 << 20), (1 << 19), (1 << 18), (1 << 17),
(1 << 16), (1 << 15), (1 << 14), (1 << 13),
(1 << 12), (1 << 11), (1 << 10), (1 << 9),
(1 << 8), (1 << 7), (1 << 6), (1 << 5),
(1 << 4), (1 << 3), (1 << 2), (1 << 1),
};
static float GC_ALIGNED16(psTemp[4]);
static const float m_65535 = 65535.0f;
#define QUANTIZE_OVERFLOW_SAFE
// according to Intel Docs CVTPS2DQ writes 0x80000000 if the source floating point value is out of int32 range
// while it's OK for large negatives, it isn't for positives
// I don't know whether the overflow actually happens in any games
// but it potentially can cause problems, so we need some clamping
// TODO(ector): Improve 64-bit version
static void WriteDual32(u64 value, u32 address)
{
Memory::Write_U32((u32)(value >> 32), address);
Memory::Write_U32((u32)value, address + 4);
}
// See comment in header for in/outs.
void CommonAsmRoutines::GenQuantizedStores() {
const u8* storePairedIllegal = AlignCode4();
UD2();
const u8* storePairedFloat = AlignCode4();
#ifdef _M_X64
SHUFPS(XMM0, R(XMM0), 1);
MOVQ_xmm(M(&psTemp[0]), XMM0);
MOV(64, R(RAX), M(&psTemp[0]));
TEST(32, R(ECX), Imm32(0x0C000000));
FixupBranch too_complex = J_CC(CC_NZ);
BSWAP(64, RAX);
MOV(64, MComplex(RBX, RCX, SCALE_1, 0), R(RAX));
FixupBranch skip_complex = J();
SetJumpTarget(too_complex);
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&WriteDual32, 2), RAX, RCX);
SetJumpTarget(skip_complex);
RET();
#else
MOVQ_xmm(M(&psTemp[0]), XMM0);
TEST(32, R(ECX), Imm32(0x0C000000));
FixupBranch argh = J_CC(CC_NZ);
MOV(32, R(EAX), M(&psTemp));
BSWAP(32, EAX);
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOV(32, MDisp(ECX, (u32)Memory::base), R(EAX));
MOV(32, R(EAX), M(((char*)&psTemp) + 4));
BSWAP(32, EAX);
MOV(32, MDisp(ECX, 4+(u32)Memory::base), R(EAX));
FixupBranch arg2 = J();
SetJumpTarget(argh);
MOV(32, R(EAX), M(((char*)&psTemp)));
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX);
MOV(32, R(EAX), M(((char*)&psTemp)+4));
ADD(32, R(ECX), Imm32(4));
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX);
SetJumpTarget(arg2);
RET();
#endif
const u8* storePairedU8 = AlignCode4();
SHR(32, R(EAX), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1));
#ifdef QUANTIZE_OVERFLOW_SAFE
MOVSS(XMM1, M((void *)&m_65535));
PUNPCKLDQ(XMM1, R(XMM1));
MINPS(XMM0, R(XMM1));
#endif
CVTPS2DQ(XMM0, R(XMM0));
PACKSSDW(XMM0, R(XMM0));
PACKUSWB(XMM0, R(XMM0));
MOVD_xmm(R(EAX), XMM0);
SafeWriteRegToReg(AX, ECX, 16, 0, false);
RET();
const u8* storePairedS8 = AlignCode4();
SHR(32, R(EAX), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1));
#ifdef QUANTIZE_OVERFLOW_SAFE
MOVSS(XMM1, M((void *)&m_65535));
PUNPCKLDQ(XMM1, R(XMM1));
MINPS(XMM0, R(XMM1));
#endif
CVTPS2DQ(XMM0, R(XMM0));
PACKSSDW(XMM0, R(XMM0));
PACKSSWB(XMM0, R(XMM0));
MOVD_xmm(R(EAX), XMM0);
SafeWriteRegToReg(AX, ECX, 16, 0, false);
RET();
const u8* storePairedU16 = AlignCode4();
SHR(32, R(EAX), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1));
// PACKUSDW is available only in SSE4
PXOR(XMM1, R(XMM1));
MAXPS(XMM0, R(XMM1));
MOVSS(XMM1, M((void *)&m_65535));
PUNPCKLDQ(XMM1, R(XMM1));
MINPS(XMM0, R(XMM1));
CVTPS2DQ(XMM0, R(XMM0));
MOVQ_xmm(M(psTemp), XMM0);
// place ps[0] into the higher word, ps[1] into the lower
// so no need in ROL after BSWAP
MOVZX(32, 16, EAX, M((char*)psTemp + 0));
SHL(32, R(EAX), Imm8(16));
MOV(16, R(AX), M((char*)psTemp + 4));
BSWAP(32, EAX);
SafeWriteRegToReg(EAX, ECX, 32, 0, false);
RET();
const u8* storePairedS16 = AlignCode4();
SHR(32, R(EAX), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
// SHUFPS or UNPCKLPS might be a better choice here. The last one might just be an alias though.
PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1));
#ifdef QUANTIZE_OVERFLOW_SAFE
MOVSS(XMM1, M((void *)&m_65535));
PUNPCKLDQ(XMM1, R(XMM1));
MINPS(XMM0, R(XMM1));
#endif
CVTPS2DQ(XMM0, R(XMM0));
PACKSSDW(XMM0, R(XMM0));
MOVD_xmm(R(EAX), XMM0);
BSWAP(32, EAX);
ROL(32, R(EAX), Imm8(16));
SafeWriteRegToReg(EAX, ECX, 32, 0, false);
RET();
pairedStoreQuantized[0] = storePairedFloat;
pairedStoreQuantized[1] = storePairedIllegal;
pairedStoreQuantized[2] = storePairedIllegal;
pairedStoreQuantized[3] = storePairedIllegal;
pairedStoreQuantized[4] = storePairedU8;
pairedStoreQuantized[5] = storePairedU16;
pairedStoreQuantized[6] = storePairedS8;
pairedStoreQuantized[7] = storePairedS16;
}
void CommonAsmRoutines::GenQuantizedLoads() {
const u8* loadPairedIllegal = AlignCode4();
UD2();
const u8* loadPairedFloat = AlignCode4();
if (cpu_info.bSSSE3) {
#ifdef _M_X64
MOVQ_xmm(XMM0, MComplex(RBX, RCX, 1, 0));
#else
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOVQ_xmm(XMM0, MDisp(ECX, (u32)Memory::base));
#endif
PSHUFB(XMM0, M((void *)pbswapShuffle2x4));
} else {
#ifdef _M_X64
MOV(64, R(RCX), MComplex(RBX, RCX, 1, 0));
BSWAP(64, RCX);
ROL(64, R(RCX), Imm8(32));
MOVQ_xmm(XMM0, R(RCX));
#else
#if 0
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOVQ_xmm(XMM0, MDisp(ECX, (u32)Memory::base));
PXOR(XMM1, R(XMM1));
PSHUFLW(XMM0, R(XMM0), 0xB1);
MOVAPD(XMM1, R(XMM0));
PSRLW(XMM0, 8);
PSLLW(XMM1, 8);
POR(XMM0, R(XMM1));
#else
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base));
BSWAP(32, EAX);
MOV(32, M(&psTemp[0]), R(RAX));
MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base + 4));
BSWAP(32, EAX);
MOV(32, M(((float *)&psTemp[0]) + 1), R(RAX));
MOVQ_xmm(XMM0, M(&psTemp[0]));
#endif
#endif
}
RET();
const u8* loadPairedU8 = AlignCode4();
UnsafeLoadRegToRegNoSwap(ECX, ECX, 16, 0);
MOVD_xmm(XMM0, R(ECX));
PXOR(XMM1, R(XMM1));
PUNPCKLBW(XMM0, R(XMM1));
PUNPCKLWD(XMM0, R(XMM1));
CVTDQ2PS(XMM0, R(XMM0));
SHR(32, R(EAX), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1));
RET();
const u8* loadPairedS8 = AlignCode4();
UnsafeLoadRegToRegNoSwap(ECX, ECX, 16, 0);
MOVD_xmm(XMM0, R(ECX));
PUNPCKLBW(XMM0, R(XMM0));
PUNPCKLWD(XMM0, R(XMM0));
PSRAD(XMM0, 24);
CVTDQ2PS(XMM0, R(XMM0));
SHR(32, R(EAX), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1));
RET();
const u8* loadPairedU16 = AlignCode4();
UnsafeLoadRegToReg(ECX, ECX, 32, 0, false);
ROL(32, R(ECX), Imm8(16));
MOVD_xmm(XMM0, R(ECX));
PXOR(XMM1, R(XMM1));
PUNPCKLWD(XMM0, R(XMM1));
CVTDQ2PS(XMM0, R(XMM0));
SHR(32, R(EAX), Imm8(6));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1));
RET();
const u8* loadPairedS16 = AlignCode4();
UnsafeLoadRegToReg(ECX, ECX, 32, 0, false);
ROL(32, R(ECX), Imm8(16));
MOVD_xmm(XMM0, R(ECX));
PUNPCKLWD(XMM0, R(XMM0));
PSRAD(XMM0, 16);
CVTDQ2PS(XMM0, R(XMM0));
SHR(32, R(EAX), Imm8(6));
AND(32, R(EAX), Imm32(0xFC));
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
PUNPCKLDQ(XMM1, R(XMM1));
MULPS(XMM0, R(XMM1));
RET();
pairedLoadQuantized[0] = loadPairedFloat;
pairedLoadQuantized[1] = loadPairedIllegal;
pairedLoadQuantized[2] = loadPairedIllegal;
pairedLoadQuantized[3] = loadPairedIllegal;
pairedLoadQuantized[4] = loadPairedU8;
pairedLoadQuantized[5] = loadPairedU16;
pairedLoadQuantized[6] = loadPairedS8;
pairedLoadQuantized[7] = loadPairedS16;
}

View File

@ -0,0 +1,47 @@
// Copyright (C) 2003 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef _JITASMCOMMON_H
#define _JITASMCOMMON_H
#include "../JitCommon/Jit_Util.h"
class CommonAsmRoutines : public EmuCodeBlock {
protected:
void GenQuantizedLoads();
void GenQuantizedStores();
public:
void GenFifoWrite(int size);
void GenFifoXmm64Write();
void GenFifoFloatWrite();
// In: array index: GQR to use.
// In: ECX: Address to read from.
// Out: XMM0: Bottom two 32-bit slots hold the read value,
// converted to a pair of floats.
// Trashes: EAX ECX EDX
const u8 GC_ALIGNED16(*pairedLoadQuantized[8]);
// In: array index: GQR to use.
// In: ECX: Address to write to.
// In: XMM0: Bottom two 32-bit slots hold the pair of floats to be written.
// Out: Nothing.
// Trashes: EAX ECX EDX
const u8 GC_ALIGNED16(*pairedStoreQuantized[8]);
};
#endif

View File

@ -288,7 +288,7 @@ bool JitBlock::ContainsAddress(u32 em_address)
block_numbers->push_back(i);
}
u32 JitBlockCache::GetOriginalFirstOp(u32 block_num)
u32 JitBlockCache::GetOriginalFirstOp(int block_num)
{
if (block_num >= num_blocks)
{
@ -298,9 +298,9 @@ bool JitBlock::ContainsAddress(u32 em_address)
return blocks[block_num].originalFirstOpcode;
}
CompiledCode JitBlockCache::GetCompiledCodeFromBlock(int blockNumber)
CompiledCode JitBlockCache::GetCompiledCodeFromBlock(int block_num)
{
return (CompiledCode)blockCodePointers[blockNumber];
return (CompiledCode)blockCodePointers[block_num];
}
//Block linker
@ -351,25 +351,25 @@ bool JitBlock::ContainsAddress(u32 em_address)
}
}
void JitBlockCache::DestroyBlock(int blocknum, bool invalidate)
void JitBlockCache::DestroyBlock(int block_num, bool invalidate)
{
if (blocknum < 0 || blocknum >= num_blocks)
if (block_num < 0 || block_num >= num_blocks)
{
PanicAlert("DestroyBlock: Invalid block number %d", blocknum);
PanicAlert("DestroyBlock: Invalid block number %d", block_num);
return;
}
JitBlock &b = blocks[blocknum];
JitBlock &b = blocks[block_num];
if (b.invalid)
{
if (invalidate)
PanicAlert("Invalidating invalid block %d", blocknum);
PanicAlert("Invalidating invalid block %d", block_num);
return;
}
b.invalid = true;
#ifdef JIT_UNLIMITED_ICACHE
Memory::Write_Opcode_JIT(b.originalAddress, b.originalFirstOpcode);
#else
if (Memory::ReadFast32(b.originalAddress) == blocknum)
if (Memory::ReadFast32(b.originalAddress) == block_num)
Memory::WriteUnchecked_U32(b.originalFirstOpcode, b.originalAddress);
#endif

View File

@ -130,12 +130,12 @@ public:
// This one is slow so should only be used for one-shots from the debugger UI, not for anything during runtime.
void GetBlockNumbersFromAddress(u32 em_address, std::vector<int> *block_numbers);
u32 GetOriginalFirstOp(u32 block_num);
CompiledCode GetCompiledCodeFromBlock(int blockNumber);
u32 GetOriginalFirstOp(int block_num);
CompiledCode GetCompiledCodeFromBlock(int block_num);
// DOES NOT WORK CORRECTLY WITH INLINING
void InvalidateICache(u32 em_address);
void DestroyBlock(int blocknum, bool invalidate);
void DestroyBlock(int block_num, bool invalidate);
// Not currently used
//void DestroyBlocksWithFlag(BlockFlag death_flag);

View File

@ -39,17 +39,17 @@
using namespace Gen;
void Jit64::JitClearCA()
void EmuCodeBlock::JitClearCA()
{
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0
}
void Jit64::JitSetCA()
void EmuCodeBlock::JitSetCA()
{
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1
}
void Jit64::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend)
void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend)
{
#ifdef _M_IX86
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
@ -74,7 +74,17 @@ void Jit64::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize
}
}
void Jit64::SafeLoadRegToEAX(X64Reg reg, int accessSize, s32 offset, bool signExtend)
void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset)
{
#ifdef _M_IX86
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
MOVZX(32, accessSize, reg_value, MDisp(reg_addr, (u32)Memory::base + offset));
#else
MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset));
#endif
}
void EmuCodeBlock::SafeLoadRegToEAX(X64Reg reg, int accessSize, s32 offset, bool signExtend)
{
if (offset)
ADD(32, R(reg), Imm32((u32)offset));
@ -96,12 +106,12 @@ void Jit64::SafeLoadRegToEAX(X64Reg reg, int accessSize, s32 offset, bool signEx
SetJumpTarget(arg2);
}
void Jit64::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset)
void EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap)
{
if (accessSize == 8 && reg_value >= 4) {
PanicAlert("WARNING: likely incorrect use of UnsafeWriteRegToReg!");
}
BSWAP(accessSize, reg_value);
if (swap) BSWAP(accessSize, reg_value);
#ifdef _M_IX86
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
MOV(accessSize, MDisp(reg_addr, (u32)Memory::base + offset), R(reg_value));
@ -111,7 +121,7 @@ void Jit64::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSiz
}
// Destroys both arg registers
void Jit64::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset)
void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap)
{
if (offset)
ADD(32, R(reg_addr), Imm32(offset));
@ -125,11 +135,11 @@ void Jit64::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize,
}
FixupBranch arg2 = J();
SetJumpTarget(argh);
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0);
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap);
SetJumpTarget(arg2);
}
void Jit64::WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address)
void EmuCodeBlock::WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address)
{
#ifdef _M_X64
MOV(accessSize, MDisp(RBX, address & 0x3FFFFFFF), arg);
@ -138,7 +148,7 @@ void Jit64::WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 ad
#endif
}
void Jit64::WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address)
void EmuCodeBlock::WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address)
{
#ifdef _M_X64
MOV(32, R(RAX), Imm32(address));
@ -148,18 +158,18 @@ void Jit64::WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address)
#endif
}
void Jit64::ForceSinglePrecisionS(X64Reg xmm) {
void EmuCodeBlock::ForceSinglePrecisionS(X64Reg xmm) {
// Most games don't need these. Zelda requires it though - some platforms get stuck without them.
if (jo.accurateSinglePrecision)
if (jit.jo.accurateSinglePrecision)
{
CVTSD2SS(xmm, R(xmm));
CVTSS2SD(xmm, R(xmm));
}
}
void Jit64::ForceSinglePrecisionP(X64Reg xmm) {
void EmuCodeBlock::ForceSinglePrecisionP(X64Reg xmm) {
// Most games don't need these. Zelda requires it though - some platforms get stuck without them.
if (jo.accurateSinglePrecision)
if (jit.jo.accurateSinglePrecision)
{
CVTPD2PS(xmm, R(xmm));
CVTPS2PD(xmm, R(xmm));

View File

@ -0,0 +1,41 @@
// Copyright (C) 2003 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef _JITUTIL_H
#define _JITUTIL_H
#include "x64Emitter.h"
// Like XCodeBlock but has some utilities for memory access.
class EmuCodeBlock : public Gen::XCodeBlock {
public:
void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false);
void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset);
void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true);
void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset, bool signExtend = false);
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, bool swap = true);
void WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address);
void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address);
void JitClearCA();
void JitSetCA();
void ForceSinglePrecisionS(Gen::X64Reg xmm);
void ForceSinglePrecisionP(Gen::X64Reg xmm);
};
#endif // _JITUTIL_H

View File

@ -85,7 +85,8 @@ files = ["ActionReplay.cpp",
"PowerPC/Interpreter/Interpreter_LoadStore.cpp",
"PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp",
"PowerPC/Interpreter/Interpreter_SystemRegisters.cpp",
"PowerPC/Interpreter/Interpreter_Tables.cpp",
"PowerPC/Interpreter/Interpreter_Tables.cpp",
"PowerPC/JitCommon/JitAsmCommon.cpp",
"PowerPC/JitCommon/JitCache.cpp",
"PowerPC/JitCommon/JitBackpatch.cpp",
"PowerPC/JitCommon/Jit_Util.cpp",

View File

@ -122,7 +122,6 @@ bool BootCore(const std::string& _rFilename)
// General settings
game_ini.Get("Core", "CPUOnThread", &StartUp.bCPUThread, StartUp.bCPUThread);
game_ini.Get("Core", "SkipIdle", &StartUp.bSkipIdle, StartUp.bSkipIdle);
game_ini.Get("Core", "OptimizeQuantizers", &StartUp.bOptimizeQuantizers, StartUp.bOptimizeQuantizers);
game_ini.Get("Core", "EnableFPRF", &StartUp.bEnableFPRF, StartUp.bEnableFPRF);
game_ini.Get("Core", "TLBHack", &StartUp.iTLBHack, StartUp.iTLBHack);
// Wii settings

View File

@ -60,13 +60,12 @@ EVT_CHECKBOX(ID_INTERFACE_WIIMOTE_LEDS, CConfigMain::CoreSettingsChanged)
EVT_CHECKBOX(ID_INTERFACE_WIIMOTE_SPEAKERS, CConfigMain::CoreSettingsChanged)
EVT_CHOICE(ID_INTERFACE_LANG, CConfigMain::CoreSettingsChanged)
EVT_CHECKBOX(ID_ALLWAYS_HLE_BS2, CConfigMain::CoreSettingsChanged)
EVT_CHECKBOX(ID_ALWAYS_HLE_BS2, CConfigMain::CoreSettingsChanged)
EVT_RADIOBUTTON(ID_RADIOJIT, CConfigMain::CoreSettingsChanged)
EVT_RADIOBUTTON(ID_RADIOINT, CConfigMain::CoreSettingsChanged)
EVT_CHECKBOX(ID_CPUTHREAD, CConfigMain::CoreSettingsChanged)
EVT_CHECKBOX(ID_DSPTHREAD, CConfigMain::CoreSettingsChanged)
EVT_CHECKBOX(ID_LOCKTHREADS, CConfigMain::CoreSettingsChanged)
EVT_CHECKBOX(ID_OPTIMIZEQUANTIZERS, CConfigMain::CoreSettingsChanged)
EVT_CHECKBOX(ID_IDLESKIP, CConfigMain::CoreSettingsChanged)
EVT_CHECKBOX(ID_ENABLECHEATS, CConfigMain::CoreSettingsChanged)
EVT_CHOICE(ID_FRAMELIMIT, CConfigMain::CoreSettingsChanged)
@ -142,7 +141,6 @@ void CConfigMain::UpdateGUI()
CPUThread->Disable();
DSPThread->Disable();
LockThreads->Disable();
OptimizeQuantizers->Disable();
SkipIdle->Disable();
EnableCheats->Disable();
@ -222,15 +220,13 @@ void CConfigMain::CreateGUIControls()
// Core Settings - Advanced
//
AlwaysHLE_BS2 = new wxCheckBox(GeneralPage, ID_ALLWAYS_HLE_BS2, wxT("HLE the IPL (recommended)"), wxDefaultPosition, wxDefaultSize, 0, wxDefaultValidator);
AlwaysHLE_BS2 = new wxCheckBox(GeneralPage, ID_ALWAYS_HLE_BS2, wxT("HLE the IPL (recommended)"), wxDefaultPosition, wxDefaultSize, 0, wxDefaultValidator);
AlwaysHLE_BS2->SetValue(SConfig::GetInstance().m_LocalCoreStartupParameter.bHLE_BS2);
m_RadioJIT = new wxRadioButton(GeneralPage, ID_RADIOJIT, wxT("JIT Recompiler (recommended)"));
m_RadioInt = new wxRadioButton(GeneralPage, ID_RADIOINT, wxT("Interpreter (very slow)"));
SConfig::GetInstance().m_LocalCoreStartupParameter.bUseJIT ? m_RadioJIT->SetValue(true) : m_RadioInt->SetValue(true);
LockThreads = new wxCheckBox(GeneralPage, ID_LOCKTHREADS, wxT("Lock threads to cores"), wxDefaultPosition, wxDefaultSize, 0, wxDefaultValidator);
LockThreads->SetValue(SConfig::GetInstance().m_LocalCoreStartupParameter.bLockThreads);
OptimizeQuantizers = new wxCheckBox(GeneralPage, ID_OPTIMIZEQUANTIZERS, wxT("Optimize Quantizers (speedup)"), wxDefaultPosition, wxDefaultSize, 0, wxDefaultValidator);
OptimizeQuantizers->SetValue(SConfig::GetInstance().m_LocalCoreStartupParameter.bOptimizeQuantizers);
DSPThread = new wxCheckBox(GeneralPage, ID_DSPTHREAD, wxT("DSP on thread (recommended)"), wxDefaultPosition, wxDefaultSize, 0, wxDefaultValidator);
DSPThread->SetValue(SConfig::GetInstance().m_LocalCoreStartupParameter.bDSPThread);
@ -317,7 +313,6 @@ void CConfigMain::CreateGUIControls()
sizerCoreType->Add(m_RadioInt, 0, wxALL | wxEXPAND, 5);
sbAdvanced->Add(sizerCoreType, 0, wxALL, 5);
sbAdvanced->Add(LockThreads, 0, wxALL, 5);
sbAdvanced->Add(OptimizeQuantizers, 0, wxALL, 5);
sbAdvanced->Add(DSPThread, 0, wxALL, 5);
sCore->Add(sbBasic, 0, wxEXPAND);
sCore->AddStretchSpacer();
@ -690,7 +685,7 @@ void CConfigMain::CoreSettingsChanged(wxCommandEvent& event)
case ID_FRAMELIMIT:
SConfig::GetInstance().m_Framelimit = (u32)Framelimit->GetSelection();
break;
case ID_ALLWAYS_HLE_BS2: // Core
case ID_ALWAYS_HLE_BS2: // Core
SConfig::GetInstance().m_LocalCoreStartupParameter.bHLE_BS2 = AlwaysHLE_BS2->IsChecked();
break;
case ID_RADIOJIT:
@ -710,9 +705,6 @@ void CConfigMain::CoreSettingsChanged(wxCommandEvent& event)
case ID_LOCKTHREADS:
SConfig::GetInstance().m_LocalCoreStartupParameter.bLockThreads = LockThreads->IsChecked();
break;
case ID_OPTIMIZEQUANTIZERS:
SConfig::GetInstance().m_LocalCoreStartupParameter.bOptimizeQuantizers = OptimizeQuantizers->IsChecked();
break;
case ID_IDLESKIP:
SConfig::GetInstance().m_LocalCoreStartupParameter.bSkipIdle = SkipIdle->IsChecked();
break;

View File

@ -68,7 +68,6 @@ private:
wxCheckBox* CPUThread;
wxCheckBox* DSPThread;
wxCheckBox* LockThreads;
wxCheckBox* OptimizeQuantizers;
wxCheckBox* SkipIdle;
wxCheckBox* EnableCheats;
@ -159,13 +158,12 @@ private:
ID_PATHSPAGE,
ID_PLUGINPAGE,
ID_ALLWAYS_HLE_BS2,
ID_ALWAYS_HLE_BS2,
ID_RADIOJIT,
ID_RADIOINT,
ID_CPUTHREAD,
ID_DSPTHREAD,
ID_LOCKTHREADS,
ID_OPTIMIZEQUANTIZERS,
ID_IDLESKIP,
ID_ENABLECHEATS,

View File

@ -290,7 +290,6 @@ void CISOProperties::CreateGUIControls(bool IsWad)
sbCoreOverrides = new wxStaticBoxSizer(wxVERTICAL, m_GameConfig, _("Core"));
CPUThread = new wxCheckBox(m_GameConfig, ID_USEDUALCORE, _("Enable Dual Core"), wxDefaultPosition, wxDefaultSize, wxCHK_3STATE|wxCHK_ALLOW_3RD_STATE_FOR_USER, wxDefaultValidator);
SkipIdle = new wxCheckBox(m_GameConfig, ID_IDLESKIP, _("Enable Idle Skipping"), wxDefaultPosition, wxDefaultSize, wxCHK_3STATE|wxCHK_ALLOW_3RD_STATE_FOR_USER, wxDefaultValidator);
OptimizeQuantizers = new wxCheckBox(m_GameConfig, ID_OPTIMIZEQUANTIZERS, _("Optimize Quantizers"), wxDefaultPosition, wxDefaultSize, wxCHK_3STATE|wxCHK_ALLOW_3RD_STATE_FOR_USER, wxDefaultValidator);
TLBHack = new wxCheckBox(m_GameConfig, ID_TLBHACK, _("TLB Hack"), wxDefaultPosition, wxDefaultSize, wxCHK_3STATE|wxCHK_ALLOW_3RD_STATE_FOR_USER, wxDefaultValidator);
// Wii Console
sbWiiOverrides = new wxStaticBoxSizer(wxVERTICAL, m_GameConfig, _("Wii Console"));
@ -347,7 +346,6 @@ void CISOProperties::CreateGUIControls(bool IsWad)
sbCoreOverrides->Add(CPUThread, 0, wxEXPAND|wxLEFT, 5);
sbCoreOverrides->Add(SkipIdle, 0, wxEXPAND|wxLEFT, 5);
sbCoreOverrides->Add(TLBHack, 0, wxEXPAND|wxLEFT, 5);
sbCoreOverrides->Add(OptimizeQuantizers, 0, wxEXPAND|wxLEFT, 5);
sbWiiOverrides->Add(EnableProgressiveScan, 0, wxEXPAND|wxLEFT, 5);
sbWiiOverrides->Add(EnableWideScreen, 0, wxEXPAND|wxLEFT, 5);
sbVideoOverrides->Add(ForceFiltering, 0, wxEXPAND|wxLEFT, 5);
@ -806,11 +804,6 @@ void CISOProperties::LoadGameConfig()
else
SkipIdle->Set3StateValue(wxCHK_UNDETERMINED);
if (GameIni.Get("Core", "OptimizeQuantizers", &bTemp))
OptimizeQuantizers->Set3StateValue((wxCheckBoxState)bTemp);
else
OptimizeQuantizers->Set3StateValue(wxCHK_UNDETERMINED);
if (GameIni.Get("Core", "TLBHack", &bTemp))
TLBHack->Set3StateValue((wxCheckBoxState)bTemp);
else
@ -896,11 +889,6 @@ bool CISOProperties::SaveGameConfig()
else
GameIni.Set("Core", "SkipIdle", SkipIdle->Get3StateValue());
if (OptimizeQuantizers->Get3StateValue() == wxCHK_UNDETERMINED)
GameIni.DeleteKey("Core", "OptimizeQuantizers");
else
GameIni.Set("Core", "OptimizeQuantizers", OptimizeQuantizers->Get3StateValue());
if (TLBHack->Get3StateValue() == wxCHK_UNDETERMINED)
GameIni.DeleteKey("Core", "TLBHack");
else

View File

@ -81,7 +81,7 @@ class CISOProperties : public wxDialog
wxStaticText *OverrideText;
// Core
wxCheckBox *CPUThread, *SkipIdle, *OptimizeQuantizers, *TLBHack, *BPHack;
wxCheckBox *CPUThread, *SkipIdle, *TLBHack, *BPHack;
// Wii
wxCheckBox *EnableProgressiveScan, *EnableWideScreen;
// Video
@ -172,7 +172,6 @@ class CISOProperties : public wxDialog
ID_RE0FIX,
ID_ENABLEPROGRESSIVESCAN,
ID_ENABLEWIDESCREEN,
ID_OPTIMIZEQUANTIZERS,
ID_EDITCONFIG,
ID_EMUSTATE_TEXT,
ID_EMUSTATE,