RIP "Optimize Quantizers" option. Now using the safe quantizer code from JITIL in all builds.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4854 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
1a25dfe279
commit
1848e93790
|
@ -107,6 +107,7 @@ void XEmitter::ABI_CallFunctionR(void *func, X64Reg reg1) {
|
|||
ABI_RestoreStack(1 * 4);
|
||||
}
|
||||
|
||||
// Pass two registers as parameters.
|
||||
void XEmitter::ABI_CallFunctionRR(void *func, Gen::X64Reg reg1, Gen::X64Reg reg2)
|
||||
{
|
||||
ABI_AlignStack(2 * 4);
|
||||
|
@ -216,18 +217,18 @@ void XEmitter::ABI_CallFunctionR(void *func, X64Reg reg1) {
|
|||
CALL(func);
|
||||
}
|
||||
|
||||
// Pass a register as a paremeter.
|
||||
// Pass two registers as paremeters.
|
||||
void XEmitter::ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2) {
|
||||
if (reg2 != ABI_PARAM1) {
|
||||
if (reg1 != ABI_PARAM1)
|
||||
MOV(32, R(ABI_PARAM1), R(reg1));
|
||||
MOV(64, R(ABI_PARAM1), R(reg1));
|
||||
if (reg2 != ABI_PARAM2)
|
||||
MOV(32, R(ABI_PARAM2), R(reg2));
|
||||
MOV(64, R(ABI_PARAM2), R(reg2));
|
||||
} else {
|
||||
if (reg2 != ABI_PARAM2)
|
||||
MOV(32, R(ABI_PARAM2), R(reg2));
|
||||
MOV(64, R(ABI_PARAM2), R(reg2));
|
||||
if (reg1 != ABI_PARAM1)
|
||||
MOV(32, R(ABI_PARAM1), R(reg1));
|
||||
MOV(64, R(ABI_PARAM1), R(reg1));
|
||||
}
|
||||
CALL(func);
|
||||
}
|
||||
|
|
|
@ -1938,6 +1938,18 @@
|
|||
RelativePath=".\Src\PowerPC\JitCommon\Jit_Util.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\Src\PowerPC\JitCommon\Jit_Util.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\Src\PowerPC\JitCommon\JitAsmCommon.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\Src\PowerPC\JitCommon\JitAsmCommon.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\Src\PowerPC\JitCommon\JitBackpatch.cpp"
|
||||
>
|
||||
|
|
|
@ -108,7 +108,6 @@ void SConfig::SaveSettings()
|
|||
ini.Set("Core", "DefaultGCM", m_LocalCoreStartupParameter.m_strDefaultGCM);
|
||||
ini.Set("Core", "DVDRoot", m_LocalCoreStartupParameter.m_strDVDRoot);
|
||||
ini.Set("Core", "Apploader", m_LocalCoreStartupParameter.m_strApploader);
|
||||
ini.Set("Core", "OptimizeQuantizers", m_LocalCoreStartupParameter.bOptimizeQuantizers);
|
||||
ini.Set("Core", "EnableCheats", m_LocalCoreStartupParameter.bEnableCheats);
|
||||
ini.Set("Core", "SelectedLanguage", m_LocalCoreStartupParameter.SelectedLanguage);
|
||||
ini.Set("Core", "MemcardA", m_strMemoryCardA);
|
||||
|
@ -225,7 +224,6 @@ void SConfig::LoadSettings()
|
|||
ini.Get("Core", "DefaultGCM", &m_LocalCoreStartupParameter.m_strDefaultGCM);
|
||||
ini.Get("Core", "DVDRoot", &m_LocalCoreStartupParameter.m_strDVDRoot);
|
||||
ini.Get("Core", "Apploader", &m_LocalCoreStartupParameter.m_strApploader);
|
||||
ini.Get("Core", "OptimizeQuantizers", &m_LocalCoreStartupParameter.bOptimizeQuantizers, true);
|
||||
ini.Get("Core", "EnableCheats", &m_LocalCoreStartupParameter.bEnableCheats, false);
|
||||
ini.Get("Core", "SelectedLanguage", &m_LocalCoreStartupParameter.SelectedLanguage, 0);
|
||||
ini.Get("Core", "MemcardA", &m_strMemoryCardA);
|
||||
|
|
|
@ -63,7 +63,6 @@ struct SCoreStartupParameter
|
|||
bool bHLE_BS2;
|
||||
bool bUseFastMem;
|
||||
bool bLockThreads;
|
||||
bool bOptimizeQuantizers;
|
||||
bool bEnableCheats;
|
||||
bool bEnableIsoCache;
|
||||
|
||||
|
|
|
@ -126,7 +126,7 @@ inline void hwWriteIOBridge(u32 var, u32 addr) {WII_IOBridge::Write32(var, addr)
|
|||
inline void hwWriteIOBridge(u64 var, u32 addr) {PanicAlert("hwWriteIOBridge: There's no 64-bit HW write. %08x", addr);}
|
||||
|
||||
template <class T>
|
||||
void ReadFromHardware(T &_var, u32 em_address, u32 effective_address, Memory::XCheckTLBFlag flag)
|
||||
inline void ReadFromHardware(T &_var, u32 em_address, u32 effective_address, Memory::XCheckTLBFlag flag)
|
||||
{
|
||||
// TODO: Figure out the fastest order of tests for both read and write (they are probably different).
|
||||
if ((em_address & 0xC8000000) == 0xC8000000)
|
||||
|
@ -204,7 +204,7 @@ void ReadFromHardware(T &_var, u32 em_address, u32 effective_address, Memory::XC
|
|||
|
||||
|
||||
template <class T>
|
||||
void WriteToHardware(u32 em_address, const T data, u32 effective_address, Memory::XCheckTLBFlag flag)
|
||||
inline void WriteToHardware(u32 em_address, const T data, u32 effective_address, Memory::XCheckTLBFlag flag)
|
||||
{
|
||||
/* Debugging: CheckForBadAddresses##_type(em_address, data, false);*/
|
||||
if ((em_address & 0xC8000000) == 0xC8000000)
|
||||
|
@ -343,13 +343,6 @@ u16 Read_U16(const u32 _Address)
|
|||
|
||||
u32 Read_U32(const u32 _Address)
|
||||
{
|
||||
/*#if MAX_LOGLEVEL >= 4
|
||||
if (_Address == 0x00000000)
|
||||
{
|
||||
//PanicAlert("Program tried to read from [00000000]");
|
||||
//return 0x00000000;
|
||||
}
|
||||
#endif*/
|
||||
u32 _var = 0;
|
||||
ReadFromHardware<u32>(_var, _Address, _Address, FLAG_READ);
|
||||
#ifdef ENABLE_MEM_CHECK
|
||||
|
|
|
@ -2751,7 +2751,6 @@ DEFINE_LUA_FUNCTION(emulua_loadrom, "filename")
|
|||
// General settings
|
||||
game_ini.Get("Core", "CPUOnThread", &StartUp.bCPUThread, StartUp.bCPUThread);
|
||||
game_ini.Get("Core", "SkipIdle", &StartUp.bSkipIdle, StartUp.bSkipIdle);
|
||||
game_ini.Get("Core", "OptimizeQuantizers", &StartUp.bOptimizeQuantizers, StartUp.bOptimizeQuantizers);
|
||||
game_ini.Get("Core", "EnableFPRF", &StartUp.bEnableFPRF, StartUp.bEnableFPRF);
|
||||
game_ini.Get("Core", "TLBHack", &StartUp.iTLBHack, StartUp.iTLBHack);
|
||||
// Wii settings
|
||||
|
|
|
@ -42,6 +42,7 @@
|
|||
|
||||
#include "../PPCAnalyst.h"
|
||||
#include "../JitCommon/JitCache.h"
|
||||
#include "../JitCommon/Jit_Util.h"
|
||||
#include "JitRegCache.h"
|
||||
#include "x64Emitter.h"
|
||||
#include "x64Analyzer.h"
|
||||
|
@ -93,7 +94,7 @@ public:
|
|||
};
|
||||
|
||||
|
||||
class Jit64 : public Gen::XCodeBlock
|
||||
class Jit64 : public EmuCodeBlock
|
||||
{
|
||||
private:
|
||||
struct JitState
|
||||
|
@ -182,26 +183,14 @@ public:
|
|||
void WriteRfiExitDestInEAX();
|
||||
void WriteCallInterpreter(UGeckoInstruction _inst);
|
||||
void Cleanup();
|
||||
|
||||
void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false);
|
||||
void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0);
|
||||
void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset, bool signExtend = false);
|
||||
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset);
|
||||
|
||||
void WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address);
|
||||
void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address);
|
||||
void GenerateCarry(Gen::X64Reg temp_reg);
|
||||
|
||||
void ForceSinglePrecisionS(Gen::X64Reg xmm);
|
||||
void ForceSinglePrecisionP(Gen::X64Reg xmm);
|
||||
void JitClearCA();
|
||||
void JitSetCA();
|
||||
void tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg));
|
||||
typedef u32 (*Operation)(u32 a, u32 b);
|
||||
void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false);
|
||||
void fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg));
|
||||
|
||||
|
||||
// OPCODES
|
||||
void unknown_instruction(UGeckoInstruction _inst);
|
||||
void Default(UGeckoInstruction _inst);
|
||||
|
|
|
@ -216,61 +216,6 @@ void AsmRoutineManager::Generate()
|
|||
GenerateCommon();
|
||||
}
|
||||
|
||||
|
||||
void AsmRoutineManager::GenFifoWrite(int size)
|
||||
{
|
||||
// Assume value in ABI_PARAM1
|
||||
PUSH(ESI);
|
||||
if (size != 32)
|
||||
PUSH(EDX);
|
||||
BSWAP(size, ABI_PARAM1);
|
||||
MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
|
||||
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
|
||||
if (size != 32) {
|
||||
MOV(32, R(EDX), R(ABI_PARAM1));
|
||||
MOV(size, MComplex(RAX, RSI, 1, 0), R(EDX));
|
||||
} else {
|
||||
MOV(size, MComplex(RAX, RSI, 1, 0), R(ABI_PARAM1));
|
||||
}
|
||||
ADD(32, R(ESI), Imm8(size >> 3));
|
||||
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
|
||||
if (size != 32)
|
||||
POP(EDX);
|
||||
POP(ESI);
|
||||
RET();
|
||||
}
|
||||
|
||||
void AsmRoutineManager::GenFifoFloatWrite()
|
||||
{
|
||||
// Assume value in XMM0
|
||||
PUSH(ESI);
|
||||
PUSH(EDX);
|
||||
MOVSS(M(&temp32), XMM0);
|
||||
MOV(32, R(EDX), M(&temp32));
|
||||
BSWAP(32, EDX);
|
||||
MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
|
||||
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
|
||||
MOV(32, MComplex(RAX, RSI, 1, 0), R(EDX));
|
||||
ADD(32, R(ESI), Imm8(4));
|
||||
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
|
||||
POP(EDX);
|
||||
POP(ESI);
|
||||
RET();
|
||||
}
|
||||
|
||||
void AsmRoutineManager::GenFifoXmm64Write()
|
||||
{
|
||||
// Assume value in XMM0. Assume pre-byteswapped (unlike the others here!)
|
||||
PUSH(ESI);
|
||||
MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
|
||||
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
|
||||
MOVQ_xmm(MComplex(RAX, RSI, 1, 0), XMM0);
|
||||
ADD(32, R(ESI), Imm8(8));
|
||||
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
|
||||
POP(ESI);
|
||||
RET();
|
||||
}
|
||||
|
||||
void AsmRoutineManager::GenerateCommon()
|
||||
{
|
||||
// USES_CR
|
||||
|
@ -298,7 +243,9 @@ void AsmRoutineManager::GenerateCommon()
|
|||
fifoDirectWriteXmm64 = AlignCode4();
|
||||
GenFifoXmm64Write();
|
||||
|
||||
computeRcFp = AlignCode16();
|
||||
GenQuantizedLoads();
|
||||
GenQuantizedStores();
|
||||
|
||||
//CMPSD(R(XMM0), M(&zero),
|
||||
// TODO
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#define _JITASM_H
|
||||
|
||||
#include "x64Emitter.h"
|
||||
#include "../JitCommon/JitAsmCommon.h"
|
||||
|
||||
// In Dolphin, we don't use inline assembly. Instead, we generate all machine-near
|
||||
// code at runtime. In the case of fixed code like this, after writing it, we write
|
||||
|
@ -34,14 +35,11 @@
|
|||
// To add a new asm routine, just add another const here, and add the code to Generate.
|
||||
// Also, possibly increase the size of the code buffer.
|
||||
|
||||
class AsmRoutineManager : public Gen::XCodeBlock
|
||||
class AsmRoutineManager : public CommonAsmRoutines
|
||||
{
|
||||
private:
|
||||
void Generate();
|
||||
void GenerateCommon();
|
||||
void GenFifoWrite(int size);
|
||||
void GenFifoFloatWrite();
|
||||
void GenFifoXmm64Write();
|
||||
|
||||
public:
|
||||
void Init() {
|
||||
|
@ -65,7 +63,6 @@ public:
|
|||
|
||||
const u8 *fpException;
|
||||
const u8 *computeRc;
|
||||
const u8 *computeRcFp;
|
||||
const u8 *testExceptions;
|
||||
const u8 *dispatchPcInEAX;
|
||||
const u8 *doTiming;
|
||||
|
|
|
@ -70,7 +70,6 @@ protected:
|
|||
PPCCachedReg saved_regs[32];
|
||||
X64CachedReg saved_xregs[NUMXREGS];
|
||||
|
||||
void DiscardRegContentsIfCached(int preg);
|
||||
virtual const int *GetAllocationOrder(int &count) = 0;
|
||||
|
||||
XEmitter *emit;
|
||||
|
@ -79,6 +78,7 @@ public:
|
|||
virtual ~RegCache() {}
|
||||
virtual void Start(PPCAnalyst::BlockRegStats &stats) = 0;
|
||||
|
||||
void DiscardRegContentsIfCached(int preg);
|
||||
void SetEmitter(XEmitter *emitter) {emit = emitter;}
|
||||
|
||||
void FlushR(X64Reg reg);
|
||||
|
|
|
@ -39,7 +39,7 @@ const u8 GC_ALIGNED16(pbswapShuffleNoop[16]) = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
|
|||
|
||||
static double GC_ALIGNED16(psTemp[2]) = {1.0, 1.0};
|
||||
static u64 GC_ALIGNED16(temp64);
|
||||
|
||||
|
||||
// TODO(ector): Improve 64-bit version
|
||||
static void WriteDual32(u64 value, u32 address)
|
||||
{
|
||||
|
@ -95,27 +95,23 @@ void Jit64::psq_st(UGeckoInstruction inst)
|
|||
JITDISABLE(LoadStorePaired)
|
||||
js.block_flags |= BLOCK_USE_GQR0 << inst.I;
|
||||
|
||||
if (js.blockSetsQuantizers || !Core::GetStartupParameter().bOptimizeQuantizers)
|
||||
if (js.blockSetsQuantizers || !inst.RA)
|
||||
{
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
if (!inst.RA)
|
||||
{
|
||||
// This really should never happen. Unless we change this to also support stwux
|
||||
// TODO: Support these cases if it becomes necessary.
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
|
||||
const UGQR gqr(rSPR(SPR_GQR0 + inst.I));
|
||||
const EQuantizeType stType = static_cast<EQuantizeType>(gqr.ST_TYPE);
|
||||
int stScale = gqr.ST_SCALE;
|
||||
bool update = inst.OPCD == 61;
|
||||
|
||||
int offset = inst.SIMM_12;
|
||||
int a = inst.RA;
|
||||
int s = inst.RS; // Fp numbers
|
||||
|
||||
const UGQR gqr(rSPR(SPR_GQR0 + inst.I));
|
||||
const EQuantizeType stType = static_cast<EQuantizeType>(gqr.ST_TYPE);
|
||||
int stScale = gqr.ST_SCALE;
|
||||
|
||||
if (inst.W) {
|
||||
// PanicAlert("W=1: stType %i stScale %i update %i", (int)stType, (int)stScale, (int)update);
|
||||
// It's fairly common that games write stuff to the pipe using this. Then, it's pretty much only
|
||||
|
@ -165,9 +161,11 @@ void Jit64::psq_st(UGeckoInstruction inst)
|
|||
Default(inst);
|
||||
return;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Is this specialization still worth it? Let's keep it for now. It's probably
|
||||
// not very risky since a game most likely wouldn't use the same code to process
|
||||
// floats as integers (but you never know....).
|
||||
if (stType == QUANTIZE_FLOAT)
|
||||
{
|
||||
if (gpr.R(a).IsImm() && !update && cpu_info.bSSSE3)
|
||||
|
@ -182,115 +180,30 @@ void Jit64::psq_st(UGeckoInstruction inst)
|
|||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
|
||||
gpr.Lock(a);
|
||||
fpr.Lock(s);
|
||||
if (update)
|
||||
gpr.LoadToX64(a, true, true);
|
||||
MOV(32, R(ABI_PARAM2), gpr.R(a));
|
||||
if (offset)
|
||||
ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
|
||||
TEST(32, R(ABI_PARAM2), Imm32(0x0C000000));
|
||||
if (update && offset)
|
||||
MOV(32, gpr.R(a), R(ABI_PARAM2));
|
||||
CVTPD2PS(XMM0, fpr.R(s));
|
||||
SHUFPS(XMM0, R(XMM0), 1);
|
||||
MOVQ_xmm(M(&temp64), XMM0);
|
||||
#ifdef _M_X64
|
||||
MOV(64, R(ABI_PARAM1), M(&temp64));
|
||||
FixupBranch argh = J_CC(CC_NZ);
|
||||
BSWAP(64, ABI_PARAM1);
|
||||
MOV(64, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1));
|
||||
FixupBranch arg2 = J();
|
||||
SetJumpTarget(argh);
|
||||
CALL(thunks.ProtectFunction((void *)&WriteDual32, 0));
|
||||
gpr.FlushLockX(EAX, EDX);
|
||||
gpr.FlushLockX(ECX);
|
||||
if (update)
|
||||
gpr.LoadToX64(inst.RA, true, true);
|
||||
fpr.LoadToX64(inst.RS, true);
|
||||
MOV(32, R(ECX), gpr.R(inst.RA));
|
||||
if (offset)
|
||||
ADD(32, R(ECX), Imm32((u32)offset));
|
||||
if (update && offset)
|
||||
MOV(32, gpr.R(a), R(ECX));
|
||||
MOVZX(32, 16, EAX, M(&PowerPC::ppcState.spr[SPR_GQR0 + inst.I]));
|
||||
MOVZX(32, 8, EDX, R(AL));
|
||||
// FIXME: Fix ModR/M encoding to allow [EDX*4+disp32]!
|
||||
#ifdef _M_IX86
|
||||
SHL(32, R(EDX), Imm8(2));
|
||||
#else
|
||||
FixupBranch argh = J_CC(CC_NZ);
|
||||
MOV(32, R(ABI_PARAM1), M(((char*)&temp64) + 4));
|
||||
BSWAP(32, ABI_PARAM1);
|
||||
AND(32, R(ABI_PARAM2), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(32, MDisp(ABI_PARAM2, (u32)Memory::base), R(ABI_PARAM1));
|
||||
MOV(32, R(ABI_PARAM1), M(&temp64));
|
||||
BSWAP(32, ABI_PARAM1);
|
||||
MOV(32, MDisp(ABI_PARAM2, 4+(u32)Memory::base), R(ABI_PARAM1));
|
||||
FixupBranch arg2 = J();
|
||||
SetJumpTarget(argh);
|
||||
MOV(32, R(ABI_PARAM1), M(((char*)&temp64) + 4));
|
||||
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2);
|
||||
MOV(32, R(ABI_PARAM1), M(((char*)&temp64)));
|
||||
ADD(32, R(ABI_PARAM2), Imm32(4));
|
||||
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2);
|
||||
SHL(32, R(EDX), Imm8(3));
|
||||
#endif
|
||||
SetJumpTarget(arg2);
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
else if (stType == QUANTIZE_U8)
|
||||
{
|
||||
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
|
||||
gpr.Lock(a);
|
||||
fpr.Lock(s);
|
||||
if (update)
|
||||
gpr.LoadToX64(a, true, update);
|
||||
MOV(32, R(ABI_PARAM2), gpr.R(a));
|
||||
if (offset)
|
||||
ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
|
||||
if (update && offset)
|
||||
MOV(32, gpr.R(a), R(ABI_PARAM2));
|
||||
MOVAPD(XMM0, fpr.R(s));
|
||||
MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale]));
|
||||
MULPD(XMM0, R(XMM1));
|
||||
CVTPD2DQ(XMM0, R(XMM0));
|
||||
PACKSSDW(XMM0, R(XMM0));
|
||||
PACKUSWB(XMM0, R(XMM0));
|
||||
MOVD_xmm(M(&temp64), XMM0);
|
||||
MOV(16, R(ABI_PARAM1), M(&temp64));
|
||||
#ifdef _M_X64
|
||||
MOV(16, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1));
|
||||
#else
|
||||
MOV(32, R(EAX), R(ABI_PARAM2));
|
||||
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(16, MDisp(EAX, (u32)Memory::base), R(ABI_PARAM1));
|
||||
#endif
|
||||
if (update)
|
||||
MOV(32, gpr.R(a), R(ABI_PARAM2));
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
else if (stType == QUANTIZE_S16)
|
||||
{
|
||||
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
|
||||
gpr.Lock(a);
|
||||
fpr.Lock(s);
|
||||
if (update)
|
||||
gpr.LoadToX64(a, true, update);
|
||||
MOV(32, R(ABI_PARAM2), gpr.R(a));
|
||||
if (offset)
|
||||
ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
|
||||
if (update)
|
||||
MOV(32, gpr.R(a), R(ABI_PARAM2));
|
||||
MOVAPD(XMM0, fpr.R(s));
|
||||
MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale]));
|
||||
MULPD(XMM0, R(XMM1));
|
||||
SHUFPD(XMM0, R(XMM0), 1);
|
||||
CVTPD2DQ(XMM0, R(XMM0));
|
||||
PACKSSDW(XMM0, R(XMM0));
|
||||
MOVD_xmm(M(&temp64), XMM0);
|
||||
MOV(32, R(ABI_PARAM1), M(&temp64));
|
||||
SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, 32, 0);
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
else {
|
||||
// Dodger uses this.
|
||||
// mario tennis
|
||||
//PanicAlert("st %i:%i", stType, inst.W);
|
||||
Default(inst);
|
||||
}
|
||||
CVTPD2PS(XMM0, fpr.R(s));
|
||||
CALLptr(MDisp(EDX, (u32)(u64)asm_routines.pairedStoreQuantized));
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
||||
void Jit64::psq_l(UGeckoInstruction inst)
|
||||
|
@ -300,144 +213,35 @@ void Jit64::psq_l(UGeckoInstruction inst)
|
|||
|
||||
js.block_flags |= BLOCK_USE_GQR0 << inst.I;
|
||||
|
||||
if (js.blockSetsQuantizers || !Core::GetStartupParameter().bOptimizeQuantizers)
|
||||
if (js.blockSetsQuantizers || !inst.RA || inst.W)
|
||||
{
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
|
||||
const UGQR gqr(rSPR(SPR_GQR0 + inst.I));
|
||||
const EQuantizeType ldType = static_cast<EQuantizeType>(gqr.LD_TYPE);
|
||||
int ldScale = gqr.LD_SCALE;
|
||||
bool update = inst.OPCD == 57;
|
||||
if (!inst.RA || inst.W)
|
||||
{
|
||||
// 0 1 during load
|
||||
//PanicAlert("ld:%i %i", ldType, (int)inst.W);
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
int offset = inst.SIMM_12;
|
||||
switch (ldType) {
|
||||
case QUANTIZE_FLOAT: // We know this is from RAM, so we don't need to check the address.
|
||||
{
|
||||
#ifdef _M_X64
|
||||
gpr.LoadToX64(inst.RA, true, update);
|
||||
fpr.LoadToX64(inst.RS, false);
|
||||
if (cpu_info.bSSSE3) {
|
||||
X64Reg xd = fpr.R(inst.RS).GetSimpleReg();
|
||||
MOVQ_xmm(xd, MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
|
||||
PSHUFB(xd, M((void *)pbswapShuffle2x4));
|
||||
CVTPS2PD(xd, R(xd));
|
||||
} else {
|
||||
MOV(64, R(RAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
|
||||
BSWAP(64, RAX);
|
||||
MOV(64, M(&psTemp[0]), R(RAX));
|
||||
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
|
||||
CVTPS2PD(r, M(&psTemp[0]));
|
||||
SHUFPD(r, R(r), 1);
|
||||
}
|
||||
if (update && offset != 0)
|
||||
ADD(32, gpr.R(inst.RA), Imm32(offset));
|
||||
break;
|
||||
#else
|
||||
if (cpu_info.bSSSE3) {
|
||||
gpr.LoadToX64(inst.RA, true, update);
|
||||
fpr.LoadToX64(inst.RS, false);
|
||||
X64Reg xd = fpr.R(inst.RS).GetSimpleReg();
|
||||
MOV(32, R(EAX), gpr.R(inst.RA));
|
||||
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVQ_xmm(xd, MDisp(EAX, (u32)Memory::base + offset));
|
||||
PSHUFB(xd, M((void *)pbswapShuffle2x4));
|
||||
CVTPS2PD(xd, R(xd));
|
||||
} else {
|
||||
gpr.FlushLockX(ECX);
|
||||
gpr.LoadToX64(inst.RA, true, update);
|
||||
// This can probably be optimized somewhat.
|
||||
LEA(32, ECX, MDisp(gpr.R(inst.RA).GetSimpleReg(), offset));
|
||||
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base));
|
||||
BSWAP(32, RAX);
|
||||
MOV(32, M(&psTemp[0]), R(RAX));
|
||||
MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base + 4));
|
||||
BSWAP(32, RAX);
|
||||
MOV(32, M(((float *)&psTemp[0]) + 1), R(RAX));
|
||||
fpr.LoadToX64(inst.RS, false, true);
|
||||
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
|
||||
CVTPS2PD(r, M(&psTemp[0]));
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
if (update && offset != 0)
|
||||
ADD(32, gpr.R(inst.RA), Imm32(offset));
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
case QUANTIZE_U8:
|
||||
{
|
||||
gpr.LoadToX64(inst.RA, true, update);
|
||||
#ifdef _M_X64
|
||||
MOVZX(32, 16, EAX, MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
|
||||
#else
|
||||
LEA(32, EAX, MDisp(gpr.R(inst.RA).GetSimpleReg(), offset));
|
||||
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVZX(32, 16, EAX, MDisp(EAX, (u32)Memory::base));
|
||||
#endif
|
||||
MOV(32, M(&temp64), R(EAX));
|
||||
MOVD_xmm(XMM0, M(&temp64));
|
||||
// SSE4 optimization opportunity here.
|
||||
PXOR(XMM1, R(XMM1));
|
||||
PUNPCKLBW(XMM0, R(XMM1));
|
||||
PUNPCKLWD(XMM0, R(XMM1));
|
||||
CVTDQ2PD(XMM0, R(XMM0));
|
||||
fpr.LoadToX64(inst.RS, false, true);
|
||||
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
|
||||
MOVDDUP(r, M((void *)&m_dequantizeTableD[ldScale]));
|
||||
MULPD(r, R(XMM0));
|
||||
if (update && offset != 0)
|
||||
ADD(32, gpr.R(inst.RA), Imm32(offset));
|
||||
}
|
||||
break;
|
||||
case QUANTIZE_S16:
|
||||
{
|
||||
gpr.LoadToX64(inst.RA, true, update);
|
||||
#ifdef _M_X64
|
||||
MOV(32, R(EAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
|
||||
#else
|
||||
LEA(32, EAX, MDisp(gpr.R(inst.RA).GetSimpleReg(), offset));
|
||||
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(32, R(EAX), MDisp(EAX, (u32)Memory::base));
|
||||
#endif
|
||||
BSWAP(32, EAX);
|
||||
MOV(32, M(&temp64), R(EAX));
|
||||
fpr.LoadToX64(inst.RS, false, true);
|
||||
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
|
||||
MOVD_xmm(XMM0, M(&temp64));
|
||||
PUNPCKLWD(XMM0, R(XMM0)); // unpack to higher word in each dword..
|
||||
PSRAD(XMM0, 16); // then use this signed shift to sign extend. clever eh? :P
|
||||
CVTDQ2PD(XMM0, R(XMM0));
|
||||
MOVDDUP(r, M((void*)&m_dequantizeTableD[ldScale]));
|
||||
MULPD(r, R(XMM0));
|
||||
SHUFPD(r, R(r), 1);
|
||||
if (update && offset != 0)
|
||||
ADD(32, gpr.R(inst.RA), Imm32(offset));
|
||||
}
|
||||
break;
|
||||
|
||||
/*
|
||||
Dynamic quantizer. Todo when we have a test set.
|
||||
MOVZX(32, 8, EAX, M(((char *)&PowerPC::ppcState.spr[SPR_GQR0 + inst.I]) + 3)); // it's in the high byte.
|
||||
AND(32, R(EAX), Imm8(0x3F));
|
||||
MOV(32, R(ECX), Imm32((u32)&m_dequantizeTableD));
|
||||
MOVDDUP(r, MComplex(RCX, EAX, 8, 0));
|
||||
*/
|
||||
default:
|
||||
// 4 0
|
||||
// 6 0 //power tennis
|
||||
// 5 0
|
||||
// PanicAlert("ld:%i %i", ldType, (int)inst.W);
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
|
||||
//u32 EA = (m_GPR[_inst.RA] + _inst.SIMM_12) : _inst.SIMM_12;
|
||||
gpr.FlushLockX(EAX, EDX);
|
||||
gpr.FlushLockX(ECX);
|
||||
gpr.LoadToX64(inst.RA, true, true);
|
||||
fpr.LoadToX64(inst.RS, false, true);
|
||||
if (offset)
|
||||
LEA(32, ECX, MDisp(gpr.RX(inst.RA), offset));
|
||||
else
|
||||
MOV(32, R(ECX), gpr.R(inst.RA));
|
||||
if (update && offset)
|
||||
MOV(32, gpr.R(inst.RA), R(ECX));
|
||||
MOVZX(32, 16, EAX, M(((char *)&GQR(inst.I)) + 2));
|
||||
MOVZX(32, 8, EDX, R(AL));
|
||||
// FIXME: Fix ModR/M encoding to allow [EDX*4+disp32]! (MComplex can do this, no?)
|
||||
#ifdef _M_IX86
|
||||
SHL(32, R(EDX), Imm8(2));
|
||||
#else
|
||||
SHL(32, R(EDX), Imm8(3));
|
||||
#endif
|
||||
CALLptr(MDisp(EDX, (u32)(u64)asm_routines.pairedLoadQuantized));
|
||||
CVTPS2PD(fpr.RX(inst.RS), R(XMM0));
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
|
||||
#include "../PPCAnalyst.h"
|
||||
#include "../JitCommon/JitCache.h"
|
||||
#include "../JitCommon/Jit_Util.h"
|
||||
#include "x64Emitter.h"
|
||||
#include "x64Analyzer.h"
|
||||
#include "IR.h"
|
||||
|
@ -85,7 +86,7 @@ public:
|
|||
};
|
||||
|
||||
|
||||
class Jit64 : public Gen::XCodeBlock
|
||||
class Jit64 : public EmuCodeBlock
|
||||
{
|
||||
private:
|
||||
struct JitState
|
||||
|
@ -175,19 +176,10 @@ public:
|
|||
void WriteCallInterpreter(UGeckoInstruction _inst);
|
||||
void Cleanup();
|
||||
|
||||
void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false);
|
||||
void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0);
|
||||
void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset, bool signExtend = false);
|
||||
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset);
|
||||
|
||||
void WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address);
|
||||
void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address);
|
||||
void GenerateCarry(Gen::X64Reg temp_reg);
|
||||
|
||||
void ForceSinglePrecisionS(Gen::X64Reg xmm);
|
||||
void ForceSinglePrecisionP(Gen::X64Reg xmm);
|
||||
void JitClearCA();
|
||||
void JitSetCA();
|
||||
void tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg));
|
||||
typedef u32 (*Operation)(u32 a, u32 b);
|
||||
void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false);
|
||||
|
|
|
@ -215,403 +215,6 @@ void AsmRoutineManager::Generate()
|
|||
GenerateCommon();
|
||||
}
|
||||
|
||||
const u8 GC_ALIGNED16(pbswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||
|
||||
const float m_quantizeTableS[] =
|
||||
{
|
||||
(1 << 0), (1 << 1), (1 << 2), (1 << 3),
|
||||
(1 << 4), (1 << 5), (1 << 6), (1 << 7),
|
||||
(1 << 8), (1 << 9), (1 << 10), (1 << 11),
|
||||
(1 << 12), (1 << 13), (1 << 14), (1 << 15),
|
||||
(1 << 16), (1 << 17), (1 << 18), (1 << 19),
|
||||
(1 << 20), (1 << 21), (1 << 22), (1 << 23),
|
||||
(1 << 24), (1 << 25), (1 << 26), (1 << 27),
|
||||
(1 << 28), (1 << 29), (1 << 30), (1 << 31),
|
||||
1.0 / (1ULL << 32), 1.0 / (1 << 31), 1.0 / (1 << 30), 1.0 / (1 << 29),
|
||||
1.0 / (1 << 28), 1.0 / (1 << 27), 1.0 / (1 << 26), 1.0 / (1 << 25),
|
||||
1.0 / (1 << 24), 1.0 / (1 << 23), 1.0 / (1 << 22), 1.0 / (1 << 21),
|
||||
1.0 / (1 << 20), 1.0 / (1 << 19), 1.0 / (1 << 18), 1.0 / (1 << 17),
|
||||
1.0 / (1 << 16), 1.0 / (1 << 15), 1.0 / (1 << 14), 1.0 / (1 << 13),
|
||||
1.0 / (1 << 12), 1.0 / (1 << 11), 1.0 / (1 << 10), 1.0 / (1 << 9),
|
||||
1.0 / (1 << 8), 1.0 / (1 << 7), 1.0 / (1 << 6), 1.0 / (1 << 5),
|
||||
1.0 / (1 << 4), 1.0 / (1 << 3), 1.0 / (1 << 2), 1.0 / (1 << 1),
|
||||
};
|
||||
|
||||
const float m_dequantizeTableS[] =
|
||||
{
|
||||
1.0 / (1 << 0), 1.0 / (1 << 1), 1.0 / (1 << 2), 1.0 / (1 << 3),
|
||||
1.0 / (1 << 4), 1.0 / (1 << 5), 1.0 / (1 << 6), 1.0 / (1 << 7),
|
||||
1.0 / (1 << 8), 1.0 / (1 << 9), 1.0 / (1 << 10), 1.0 / (1 << 11),
|
||||
1.0 / (1 << 12), 1.0 / (1 << 13), 1.0 / (1 << 14), 1.0 / (1 << 15),
|
||||
1.0 / (1 << 16), 1.0 / (1 << 17), 1.0 / (1 << 18), 1.0 / (1 << 19),
|
||||
1.0 / (1 << 20), 1.0 / (1 << 21), 1.0 / (1 << 22), 1.0 / (1 << 23),
|
||||
1.0 / (1 << 24), 1.0 / (1 << 25), 1.0 / (1 << 26), 1.0 / (1 << 27),
|
||||
1.0 / (1 << 28), 1.0 / (1 << 29), 1.0 / (1 << 30), 1.0 / (1 << 31),
|
||||
(1ULL << 32), (1 << 31), (1 << 30), (1 << 29),
|
||||
(1 << 28), (1 << 27), (1 << 26), (1 << 25),
|
||||
(1 << 24), (1 << 23), (1 << 22), (1 << 21),
|
||||
(1 << 20), (1 << 19), (1 << 18), (1 << 17),
|
||||
(1 << 16), (1 << 15), (1 << 14), (1 << 13),
|
||||
(1 << 12), (1 << 11), (1 << 10), (1 << 9),
|
||||
(1 << 8), (1 << 7), (1 << 6), (1 << 5),
|
||||
(1 << 4), (1 << 3), (1 << 2), (1 << 1),
|
||||
};
|
||||
|
||||
float psTemp[2];
|
||||
|
||||
const float m_65535 = 65535.0f;
|
||||
|
||||
|
||||
#define QUANTIZE_OVERFLOW_SAFE
|
||||
|
||||
// according to Intel Docs CVTPS2DQ writes 0x80000000 if the source floating point value is out of int32 range
|
||||
// while it's OK for large negatives, it isn't for positives
|
||||
// I don't know whether the overflow actually happens in any games
|
||||
// but it potentially can cause problems, so we need some clamping
|
||||
|
||||
// TODO(ector): Improve 64-bit version
|
||||
static void WriteDual32(u64 value, u32 address)
|
||||
{
|
||||
Memory::Write_U32((u32)(value >> 32), address);
|
||||
Memory::Write_U32((u32)value, address + 4);
|
||||
}
|
||||
|
||||
void AsmRoutineManager::GenQuantizedStores() {
|
||||
const u8* storePairedIllegal = AlignCode4();
|
||||
UD2();
|
||||
const u8* storePairedFloat = AlignCode4();
|
||||
// IN: value = XMM0, two singles in bottom. PPC address = ECX.
|
||||
#ifdef _M_X64
|
||||
// INT3();
|
||||
MOVQ_xmm(M(&psTemp[0]), XMM0);
|
||||
MOV(64, R(RAX), M(&psTemp[0]));
|
||||
//INT3();
|
||||
//MOVQ_xmm(R(RAX), XMM0);
|
||||
//INT3();
|
||||
ROL(64, R(RAX), Imm8(32)); // Swap the two - the big BSWAP will unswap.
|
||||
TEST(32, R(ECX), Imm32(0x0C000000));
|
||||
FixupBranch argh = J_CC(CC_NZ);
|
||||
BSWAP(64, RAX);
|
||||
MOV(64, MComplex(RBX, RCX, SCALE_1, 0), R(RAX));
|
||||
FixupBranch arg2 = J();
|
||||
SetJumpTarget(argh);
|
||||
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&WriteDual32, 2), RAX, RCX);
|
||||
SetJumpTarget(arg2);
|
||||
#else
|
||||
MOVQ_xmm(M(&psTemp[0]), XMM0);
|
||||
TEST(32, R(ECX), Imm32(0x0C000000));
|
||||
FixupBranch argh = J_CC(CC_NZ);
|
||||
MOV(32, R(EAX), M(&psTemp));
|
||||
BSWAP(32, EAX);
|
||||
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(32, MDisp(ECX, (u32)Memory::base), R(EAX));
|
||||
MOV(32, R(EAX), M(((char*)&psTemp) + 4));
|
||||
BSWAP(32, EAX);
|
||||
MOV(32, MDisp(ECX, 4+(u32)Memory::base), R(EAX));
|
||||
FixupBranch arg2 = J();
|
||||
SetJumpTarget(argh);
|
||||
MOV(32, R(EAX), M(((char*)&psTemp)));
|
||||
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX);
|
||||
MOV(32, R(EAX), M(((char*)&psTemp)+4));
|
||||
ADD(32, R(ECX), Imm32(4));
|
||||
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX);
|
||||
SetJumpTarget(arg2);
|
||||
#endif
|
||||
RET();
|
||||
|
||||
const u8* storePairedU8 = AlignCode4();
|
||||
//INT3();
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
#ifdef QUANTIZE_OVERFLOW_SAFE
|
||||
MOVSS(XMM1, M((void *)&m_65535));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MINPS(XMM0, R(XMM1));
|
||||
#endif
|
||||
CVTPS2DQ(XMM0, R(XMM0));
|
||||
PACKSSDW(XMM0, R(XMM0));
|
||||
PACKUSWB(XMM0, R(XMM0));
|
||||
MOVD_xmm(R(EAX), XMM0);
|
||||
#ifdef _M_X64
|
||||
MOV(16, MComplex(RBX, RCX, 1, 0), R(AX));
|
||||
#else
|
||||
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(16, MDisp(ECX, (u32)Memory::base), R(AX));
|
||||
#endif
|
||||
RET();
|
||||
|
||||
const u8* storePairedS8 = AlignCode4();
|
||||
//INT3();
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
#ifdef QUANTIZE_OVERFLOW_SAFE
|
||||
MOVSS(XMM1, M((void *)&m_65535));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MINPS(XMM0, R(XMM1));
|
||||
#endif
|
||||
CVTPS2DQ(XMM0, R(XMM0));
|
||||
PACKSSDW(XMM0, R(XMM0));
|
||||
PACKSSWB(XMM0, R(XMM0));
|
||||
MOVD_xmm(R(EAX), XMM0);
|
||||
#ifdef _M_X64
|
||||
MOV(16, MComplex(RBX, RCX, 1, 0), R(AX));
|
||||
#else
|
||||
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(16, MDisp(ECX, (u32)Memory::base), R(AX));
|
||||
#endif
|
||||
RET();
|
||||
|
||||
const u8* storePairedU16 = AlignCode4();
|
||||
//INT3();
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
|
||||
// PACKUSDW is available only in SSE4
|
||||
PXOR(XMM1, R(XMM1));
|
||||
MAXPS(XMM0, R(XMM1));
|
||||
MOVSS(XMM1, M((void *)&m_65535));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MINPS(XMM0, R(XMM1));
|
||||
|
||||
CVTPS2DQ(XMM0, R(XMM0));
|
||||
MOVQ_xmm(M(psTemp), XMM0);
|
||||
// place ps[0] into the higher word, ps[1] into the lower
|
||||
// so no need in ROL after BSWAP
|
||||
MOVZX(32, 16, EAX, M((char*)psTemp + 0));
|
||||
SHL(32, R(EAX), Imm8(16));
|
||||
MOV(16, R(AX), M((char*)psTemp + 4));
|
||||
|
||||
BSWAP(32, EAX);
|
||||
//ROL(32, R(EAX), Imm8(16));
|
||||
#ifdef _M_X64
|
||||
MOV(32, MComplex(RBX, RCX, 1, 0), R(EAX));
|
||||
#else
|
||||
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(32, MDisp(ECX, (u32)Memory::base), R(EAX));
|
||||
#endif
|
||||
RET();
|
||||
|
||||
const u8* storePairedS16 = AlignCode4();
|
||||
//INT3();
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
#ifdef QUANTIZE_OVERFLOW_SAFE
|
||||
MOVSS(XMM1, M((void *)&m_65535));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MINPS(XMM0, R(XMM1));
|
||||
#endif
|
||||
CVTPS2DQ(XMM0, R(XMM0));
|
||||
PACKSSDW(XMM0, R(XMM0));
|
||||
MOVD_xmm(R(EAX), XMM0);
|
||||
BSWAP(32, EAX);
|
||||
ROL(32, R(EAX), Imm8(16));
|
||||
#ifdef _M_X64
|
||||
MOV(32, MComplex(RBX, RCX, 1, 0), R(EAX));
|
||||
#else
|
||||
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(32, MDisp(ECX, (u32)Memory::base), R(EAX));
|
||||
#endif
|
||||
RET();
|
||||
|
||||
pairedStoreQuantized[0] = storePairedFloat;
|
||||
pairedStoreQuantized[1] = storePairedIllegal;
|
||||
pairedStoreQuantized[2] = storePairedIllegal;
|
||||
pairedStoreQuantized[3] = storePairedIllegal;
|
||||
pairedStoreQuantized[4] = storePairedU8;
|
||||
pairedStoreQuantized[5] = storePairedU16;
|
||||
pairedStoreQuantized[6] = storePairedS8;
|
||||
pairedStoreQuantized[7] = storePairedS16;
|
||||
}
|
||||
|
||||
void AsmRoutineManager::GenQuantizedLoads() {
|
||||
const u8* loadPairedIllegal = AlignCode4();
|
||||
UD2();
|
||||
const u8* loadPairedFloat = AlignCode4();
|
||||
if (cpu_info.bSSSE3) {
|
||||
#ifdef _M_X64
|
||||
MOVQ_xmm(XMM0, MComplex(RBX, RCX, 1, 0));
|
||||
#else
|
||||
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVQ_xmm(XMM0, MDisp(ECX, (u32)Memory::base));
|
||||
#endif
|
||||
PSHUFB(XMM0, M((void *)pbswapShuffle2x4));
|
||||
} else {
|
||||
#ifdef _M_X64
|
||||
MOV(64, R(RCX), MComplex(RBX, RCX, 1, 0));
|
||||
BSWAP(64, RCX);
|
||||
ROL(64, R(RCX), Imm8(32));
|
||||
MOVQ_xmm(XMM0, R(RCX));
|
||||
#else
|
||||
#if 0
|
||||
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVQ_xmm(XMM0, MDisp(ECX, (u32)Memory::base));
|
||||
PXOR(XMM1, R(XMM1));
|
||||
PSHUFLW(XMM0, R(XMM0), 0xB1);
|
||||
MOVAPD(XMM1, R(XMM0));
|
||||
PSRLW(XMM0, 8);
|
||||
PSLLW(XMM1, 8);
|
||||
POR(XMM0, R(XMM1));
|
||||
#else
|
||||
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base));
|
||||
BSWAP(32, EAX);
|
||||
MOV(32, M(&psTemp[0]), R(RAX));
|
||||
MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base + 4));
|
||||
BSWAP(32, EAX);
|
||||
MOV(32, M(((float *)&psTemp[0]) + 1), R(RAX));
|
||||
MOVQ_xmm(XMM0, M(&psTemp[0]));
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
RET();
|
||||
|
||||
const u8* loadPairedU8 = AlignCode4();
|
||||
#ifdef _M_X64
|
||||
MOVZX(32, 16, ECX, MComplex(RBX, RCX, 1, 0));
|
||||
#else
|
||||
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVZX(32, 16, ECX, MDisp(ECX, (u32)Memory::base));
|
||||
#endif
|
||||
MOVD_xmm(XMM0, R(ECX));
|
||||
PXOR(XMM1, R(XMM1));
|
||||
PUNPCKLBW(XMM0, R(XMM1));
|
||||
PUNPCKLWD(XMM0, R(XMM1));
|
||||
CVTDQ2PS(XMM0, R(XMM0));
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
RET();
|
||||
|
||||
const u8* loadPairedS8 = AlignCode4();
|
||||
#ifdef _M_X64
|
||||
MOVZX(32, 16, ECX, MComplex(RBX, RCX, 1, 0));
|
||||
#else
|
||||
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVZX(32, 16, ECX, MDisp(ECX, (u32)Memory::base));
|
||||
#endif
|
||||
MOVD_xmm(XMM0, R(ECX));
|
||||
PUNPCKLBW(XMM0, R(XMM0));
|
||||
PUNPCKLWD(XMM0, R(XMM0));
|
||||
PSRAD(XMM0, 24);
|
||||
CVTDQ2PS(XMM0, R(XMM0));
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
RET();
|
||||
|
||||
const u8* loadPairedU16 = AlignCode4();
|
||||
#ifdef _M_X64
|
||||
MOV(32, R(ECX), MComplex(RBX, RCX, 1, 0));
|
||||
#else
|
||||
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(32, R(ECX), MDisp(ECX, (u32)Memory::base));
|
||||
#endif
|
||||
BSWAP(32, ECX);
|
||||
ROL(32, R(ECX), Imm8(16));
|
||||
MOVD_xmm(XMM0, R(ECX));
|
||||
PXOR(XMM1, R(XMM1));
|
||||
PUNPCKLWD(XMM0, R(XMM1));
|
||||
CVTDQ2PS(XMM0, R(XMM0));
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
RET();
|
||||
|
||||
const u8* loadPairedS16 = AlignCode4();
|
||||
#ifdef _M_X64
|
||||
MOV(32, R(ECX), MComplex(RBX, RCX, 1, 0));
|
||||
#else
|
||||
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(32, R(ECX), MDisp(ECX, (u32)Memory::base));
|
||||
#endif
|
||||
BSWAP(32, ECX);
|
||||
ROL(32, R(ECX), Imm8(16));
|
||||
MOVD_xmm(XMM0, R(ECX));
|
||||
PUNPCKLWD(XMM0, R(XMM0));
|
||||
PSRAD(XMM0, 16);
|
||||
CVTDQ2PS(XMM0, R(XMM0));
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
AND(32, R(EAX), Imm32(0xFC));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
RET();
|
||||
|
||||
pairedLoadQuantized[0] = loadPairedFloat;
|
||||
pairedLoadQuantized[1] = loadPairedIllegal;
|
||||
pairedLoadQuantized[2] = loadPairedIllegal;
|
||||
pairedLoadQuantized[3] = loadPairedIllegal;
|
||||
pairedLoadQuantized[4] = loadPairedU8;
|
||||
pairedLoadQuantized[5] = loadPairedU16;
|
||||
pairedLoadQuantized[6] = loadPairedS8;
|
||||
pairedLoadQuantized[7] = loadPairedS16;
|
||||
}
|
||||
|
||||
void AsmRoutineManager::GenFifoWrite(int size)
|
||||
{
|
||||
// Assume value in ABI_PARAM1
|
||||
PUSH(ESI);
|
||||
if (size != 32)
|
||||
PUSH(EDX);
|
||||
BSWAP(size, ABI_PARAM1);
|
||||
MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
|
||||
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
|
||||
if (size != 32) {
|
||||
MOV(32, R(EDX), R(ABI_PARAM1));
|
||||
MOV(size, MComplex(RAX, RSI, 1, 0), R(EDX));
|
||||
} else {
|
||||
MOV(size, MComplex(RAX, RSI, 1, 0), R(ABI_PARAM1));
|
||||
}
|
||||
ADD(32, R(ESI), Imm8(size >> 3));
|
||||
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
|
||||
if (size != 32)
|
||||
POP(EDX);
|
||||
POP(ESI);
|
||||
RET();
|
||||
}
|
||||
|
||||
void AsmRoutineManager::GenFifoFloatWrite()
|
||||
{
|
||||
// Assume value in XMM0
|
||||
PUSH(ESI);
|
||||
PUSH(EDX);
|
||||
MOVSS(M(&temp32), XMM0);
|
||||
MOV(32, R(EDX), M(&temp32));
|
||||
BSWAP(32, EDX);
|
||||
MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
|
||||
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
|
||||
MOV(32, MComplex(RAX, RSI, 1, 0), R(EDX));
|
||||
ADD(32, R(ESI), Imm8(4));
|
||||
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
|
||||
POP(EDX);
|
||||
POP(ESI);
|
||||
RET();
|
||||
}
|
||||
|
||||
void AsmRoutineManager::GenFifoXmm64Write()
|
||||
{
|
||||
// Assume value in XMM0. Assume pre-byteswapped (unlike the others here!)
|
||||
PUSH(ESI);
|
||||
MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
|
||||
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
|
||||
MOVQ_xmm(MComplex(RAX, RSI, 1, 0), XMM0);
|
||||
ADD(32, R(ESI), Imm8(8));
|
||||
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
|
||||
POP(ESI);
|
||||
RET();
|
||||
}
|
||||
|
||||
void AsmRoutineManager::GenerateCommon()
|
||||
{
|
||||
// USES_CR
|
||||
|
@ -649,7 +252,6 @@ void AsmRoutineManager::GenerateCommon()
|
|||
GenQuantizedLoads();
|
||||
GenQuantizedStores();
|
||||
|
||||
computeRcFp = AlignCode16();
|
||||
//CMPSD(R(XMM0), M(&zero),
|
||||
// TODO
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#define _JITASM_H
|
||||
|
||||
#include "x64Emitter.h"
|
||||
#include "../JitCommon/JitAsmCommon.h"
|
||||
|
||||
// In Dolphin, we don't use inline assembly. Instead, we generate all machine-near
|
||||
// code at runtime. In the case of fixed code like this, after writing it, we write
|
||||
|
@ -34,16 +35,11 @@
|
|||
// To add a new asm routine, just add another const here, and add the code to Generate.
|
||||
// Also, possibly increase the size of the code buffer.
|
||||
|
||||
class AsmRoutineManager : public Gen::XCodeBlock
|
||||
class AsmRoutineManager : public CommonAsmRoutines
|
||||
{
|
||||
private:
|
||||
void Generate();
|
||||
void GenerateCommon();
|
||||
void GenFifoWrite(int size);
|
||||
void GenFifoFloatWrite();
|
||||
void GenFifoXmm64Write(); // yes, 32 & 64-bit compatible
|
||||
void GenQuantizedLoads();
|
||||
void GenQuantizedStores();
|
||||
|
||||
public:
|
||||
void Init() {
|
||||
|
@ -67,7 +63,6 @@ public:
|
|||
|
||||
const u8 *fpException;
|
||||
const u8 *computeRc;
|
||||
const u8 *computeRcFp;
|
||||
const u8 *testExceptions;
|
||||
const u8 *dispatchPcInEAX;
|
||||
const u8 *doTiming;
|
||||
|
@ -82,8 +77,6 @@ public:
|
|||
|
||||
const u8 *doReJit;
|
||||
|
||||
const u8 *pairedLoadQuantized[8];
|
||||
const u8 *pairedStoreQuantized[8];
|
||||
|
||||
bool compareEnabled;
|
||||
};
|
||||
|
|
|
@ -15,9 +15,6 @@
|
|||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
// TODO(ector): Tons of pshufb optimization of the loads/stores, for SSSE3+, possibly SSE4, only.
|
||||
// Should give a very noticable speed boost to paired single heavy code.
|
||||
|
||||
#include "Common.h"
|
||||
|
||||
#include "Thunk.h"
|
||||
|
@ -39,9 +36,8 @@
|
|||
void Jit64::psq_st(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
DISABLE64
|
||||
JITDISABLE(LoadStorePaired)
|
||||
if (inst.W || !Core::GetStartupParameter().bOptimizeQuantizers) {Default(inst); return;}
|
||||
if (inst.W) {Default(inst); return;}
|
||||
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_12), val;
|
||||
if (inst.RA)
|
||||
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
|
||||
|
@ -55,9 +51,8 @@ void Jit64::psq_st(UGeckoInstruction inst)
|
|||
void Jit64::psq_l(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
DISABLE64
|
||||
JITDISABLE(LoadStorePaired)
|
||||
if (inst.W || !Core::GetStartupParameter().bOptimizeQuantizers) {Default(inst); return;}
|
||||
if (inst.W) {Default(inst); return;}
|
||||
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_12), val;
|
||||
if (inst.RA)
|
||||
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
|
||||
|
|
|
@ -0,0 +1,394 @@
|
|||
// Copyright (C) 2003 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#include "ABI.h"
|
||||
#include "Thunk.h"
|
||||
#include "CPUDetect.h"
|
||||
#include "x64Emitter.h"
|
||||
|
||||
#include "../../HW/Memmap.h"
|
||||
|
||||
#include "../PowerPC.h"
|
||||
#include "../../CoreTiming.h"
|
||||
#include "MemoryUtil.h"
|
||||
|
||||
#include "ABI.h"
|
||||
#include "../JitCommon/JitCache.h"
|
||||
|
||||
#include "../../HW/GPFifo.h"
|
||||
#include "../../Core.h"
|
||||
#include "JitAsmCommon.h"
|
||||
|
||||
using namespace Gen;
|
||||
|
||||
static int temp32;
|
||||
|
||||
void CommonAsmRoutines::GenFifoWrite(int size)
|
||||
{
|
||||
// Assume value in ABI_PARAM1
|
||||
PUSH(ESI);
|
||||
if (size != 32)
|
||||
PUSH(EDX);
|
||||
BSWAP(size, ABI_PARAM1);
|
||||
MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
|
||||
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
|
||||
if (size != 32) {
|
||||
MOV(32, R(EDX), R(ABI_PARAM1));
|
||||
MOV(size, MComplex(RAX, RSI, 1, 0), R(EDX));
|
||||
} else {
|
||||
MOV(size, MComplex(RAX, RSI, 1, 0), R(ABI_PARAM1));
|
||||
}
|
||||
ADD(32, R(ESI), Imm8(size >> 3));
|
||||
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
|
||||
if (size != 32)
|
||||
POP(EDX);
|
||||
POP(ESI);
|
||||
RET();
|
||||
}
|
||||
|
||||
void CommonAsmRoutines::GenFifoFloatWrite()
|
||||
{
|
||||
// Assume value in XMM0
|
||||
PUSH(ESI);
|
||||
PUSH(EDX);
|
||||
MOVSS(M(&temp32), XMM0);
|
||||
MOV(32, R(EDX), M(&temp32));
|
||||
BSWAP(32, EDX);
|
||||
MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
|
||||
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
|
||||
MOV(32, MComplex(RAX, RSI, 1, 0), R(EDX));
|
||||
ADD(32, R(ESI), Imm8(4));
|
||||
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
|
||||
POP(EDX);
|
||||
POP(ESI);
|
||||
RET();
|
||||
}
|
||||
|
||||
void CommonAsmRoutines::GenFifoXmm64Write()
|
||||
{
|
||||
// Assume value in XMM0. Assume pre-byteswapped (unlike the others here!)
|
||||
PUSH(ESI);
|
||||
MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
|
||||
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
|
||||
MOVQ_xmm(MComplex(RAX, RSI, 1, 0), XMM0);
|
||||
ADD(32, R(ESI), Imm8(8));
|
||||
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
|
||||
POP(ESI);
|
||||
RET();
|
||||
}
|
||||
|
||||
// Safe + Fast Quantizers, originally from JITIL by magumagu
|
||||
|
||||
static const u8 GC_ALIGNED16(pbswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||
|
||||
static const float GC_ALIGNED16(m_quantizeTableS[]) =
|
||||
{
|
||||
(1 << 0), (1 << 1), (1 << 2), (1 << 3),
|
||||
(1 << 4), (1 << 5), (1 << 6), (1 << 7),
|
||||
(1 << 8), (1 << 9), (1 << 10), (1 << 11),
|
||||
(1 << 12), (1 << 13), (1 << 14), (1 << 15),
|
||||
(1 << 16), (1 << 17), (1 << 18), (1 << 19),
|
||||
(1 << 20), (1 << 21), (1 << 22), (1 << 23),
|
||||
(1 << 24), (1 << 25), (1 << 26), (1 << 27),
|
||||
(1 << 28), (1 << 29), (1 << 30), (1 << 31),
|
||||
1.0 / (1ULL << 32), 1.0 / (1 << 31), 1.0 / (1 << 30), 1.0 / (1 << 29),
|
||||
1.0 / (1 << 28), 1.0 / (1 << 27), 1.0 / (1 << 26), 1.0 / (1 << 25),
|
||||
1.0 / (1 << 24), 1.0 / (1 << 23), 1.0 / (1 << 22), 1.0 / (1 << 21),
|
||||
1.0 / (1 << 20), 1.0 / (1 << 19), 1.0 / (1 << 18), 1.0 / (1 << 17),
|
||||
1.0 / (1 << 16), 1.0 / (1 << 15), 1.0 / (1 << 14), 1.0 / (1 << 13),
|
||||
1.0 / (1 << 12), 1.0 / (1 << 11), 1.0 / (1 << 10), 1.0 / (1 << 9),
|
||||
1.0 / (1 << 8), 1.0 / (1 << 7), 1.0 / (1 << 6), 1.0 / (1 << 5),
|
||||
1.0 / (1 << 4), 1.0 / (1 << 3), 1.0 / (1 << 2), 1.0 / (1 << 1),
|
||||
};
|
||||
|
||||
static const float GC_ALIGNED16(m_dequantizeTableS[]) =
|
||||
{
|
||||
1.0 / (1 << 0), 1.0 / (1 << 1), 1.0 / (1 << 2), 1.0 / (1 << 3),
|
||||
1.0 / (1 << 4), 1.0 / (1 << 5), 1.0 / (1 << 6), 1.0 / (1 << 7),
|
||||
1.0 / (1 << 8), 1.0 / (1 << 9), 1.0 / (1 << 10), 1.0 / (1 << 11),
|
||||
1.0 / (1 << 12), 1.0 / (1 << 13), 1.0 / (1 << 14), 1.0 / (1 << 15),
|
||||
1.0 / (1 << 16), 1.0 / (1 << 17), 1.0 / (1 << 18), 1.0 / (1 << 19),
|
||||
1.0 / (1 << 20), 1.0 / (1 << 21), 1.0 / (1 << 22), 1.0 / (1 << 23),
|
||||
1.0 / (1 << 24), 1.0 / (1 << 25), 1.0 / (1 << 26), 1.0 / (1 << 27),
|
||||
1.0 / (1 << 28), 1.0 / (1 << 29), 1.0 / (1 << 30), 1.0 / (1 << 31),
|
||||
(1ULL << 32), (1 << 31), (1 << 30), (1 << 29),
|
||||
(1 << 28), (1 << 27), (1 << 26), (1 << 25),
|
||||
(1 << 24), (1 << 23), (1 << 22), (1 << 21),
|
||||
(1 << 20), (1 << 19), (1 << 18), (1 << 17),
|
||||
(1 << 16), (1 << 15), (1 << 14), (1 << 13),
|
||||
(1 << 12), (1 << 11), (1 << 10), (1 << 9),
|
||||
(1 << 8), (1 << 7), (1 << 6), (1 << 5),
|
||||
(1 << 4), (1 << 3), (1 << 2), (1 << 1),
|
||||
};
|
||||
|
||||
static float GC_ALIGNED16(psTemp[4]);
|
||||
|
||||
static const float m_65535 = 65535.0f;
|
||||
|
||||
|
||||
#define QUANTIZE_OVERFLOW_SAFE
|
||||
|
||||
// according to Intel Docs CVTPS2DQ writes 0x80000000 if the source floating point value is out of int32 range
|
||||
// while it's OK for large negatives, it isn't for positives
|
||||
// I don't know whether the overflow actually happens in any games
|
||||
// but it potentially can cause problems, so we need some clamping
|
||||
|
||||
// TODO(ector): Improve 64-bit version
|
||||
static void WriteDual32(u64 value, u32 address)
|
||||
{
|
||||
Memory::Write_U32((u32)(value >> 32), address);
|
||||
Memory::Write_U32((u32)value, address + 4);
|
||||
}
|
||||
|
||||
// See comment in header for in/outs.
|
||||
void CommonAsmRoutines::GenQuantizedStores() {
|
||||
const u8* storePairedIllegal = AlignCode4();
|
||||
UD2();
|
||||
const u8* storePairedFloat = AlignCode4();
|
||||
|
||||
#ifdef _M_X64
|
||||
SHUFPS(XMM0, R(XMM0), 1);
|
||||
MOVQ_xmm(M(&psTemp[0]), XMM0);
|
||||
MOV(64, R(RAX), M(&psTemp[0]));
|
||||
TEST(32, R(ECX), Imm32(0x0C000000));
|
||||
FixupBranch too_complex = J_CC(CC_NZ);
|
||||
BSWAP(64, RAX);
|
||||
MOV(64, MComplex(RBX, RCX, SCALE_1, 0), R(RAX));
|
||||
FixupBranch skip_complex = J();
|
||||
SetJumpTarget(too_complex);
|
||||
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&WriteDual32, 2), RAX, RCX);
|
||||
SetJumpTarget(skip_complex);
|
||||
RET();
|
||||
#else
|
||||
MOVQ_xmm(M(&psTemp[0]), XMM0);
|
||||
TEST(32, R(ECX), Imm32(0x0C000000));
|
||||
FixupBranch argh = J_CC(CC_NZ);
|
||||
MOV(32, R(EAX), M(&psTemp));
|
||||
BSWAP(32, EAX);
|
||||
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(32, MDisp(ECX, (u32)Memory::base), R(EAX));
|
||||
MOV(32, R(EAX), M(((char*)&psTemp) + 4));
|
||||
BSWAP(32, EAX);
|
||||
MOV(32, MDisp(ECX, 4+(u32)Memory::base), R(EAX));
|
||||
FixupBranch arg2 = J();
|
||||
SetJumpTarget(argh);
|
||||
MOV(32, R(EAX), M(((char*)&psTemp)));
|
||||
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX);
|
||||
MOV(32, R(EAX), M(((char*)&psTemp)+4));
|
||||
ADD(32, R(ECX), Imm32(4));
|
||||
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX);
|
||||
SetJumpTarget(arg2);
|
||||
RET();
|
||||
#endif
|
||||
|
||||
const u8* storePairedU8 = AlignCode4();
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
#ifdef QUANTIZE_OVERFLOW_SAFE
|
||||
MOVSS(XMM1, M((void *)&m_65535));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MINPS(XMM0, R(XMM1));
|
||||
#endif
|
||||
CVTPS2DQ(XMM0, R(XMM0));
|
||||
PACKSSDW(XMM0, R(XMM0));
|
||||
PACKUSWB(XMM0, R(XMM0));
|
||||
MOVD_xmm(R(EAX), XMM0);
|
||||
SafeWriteRegToReg(AX, ECX, 16, 0, false);
|
||||
|
||||
RET();
|
||||
|
||||
const u8* storePairedS8 = AlignCode4();
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
#ifdef QUANTIZE_OVERFLOW_SAFE
|
||||
MOVSS(XMM1, M((void *)&m_65535));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MINPS(XMM0, R(XMM1));
|
||||
#endif
|
||||
CVTPS2DQ(XMM0, R(XMM0));
|
||||
PACKSSDW(XMM0, R(XMM0));
|
||||
PACKSSWB(XMM0, R(XMM0));
|
||||
MOVD_xmm(R(EAX), XMM0);
|
||||
|
||||
SafeWriteRegToReg(AX, ECX, 16, 0, false);
|
||||
|
||||
RET();
|
||||
|
||||
const u8* storePairedU16 = AlignCode4();
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
|
||||
// PACKUSDW is available only in SSE4
|
||||
PXOR(XMM1, R(XMM1));
|
||||
MAXPS(XMM0, R(XMM1));
|
||||
MOVSS(XMM1, M((void *)&m_65535));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MINPS(XMM0, R(XMM1));
|
||||
|
||||
CVTPS2DQ(XMM0, R(XMM0));
|
||||
MOVQ_xmm(M(psTemp), XMM0);
|
||||
// place ps[0] into the higher word, ps[1] into the lower
|
||||
// so no need in ROL after BSWAP
|
||||
MOVZX(32, 16, EAX, M((char*)psTemp + 0));
|
||||
SHL(32, R(EAX), Imm8(16));
|
||||
MOV(16, R(AX), M((char*)psTemp + 4));
|
||||
|
||||
BSWAP(32, EAX);
|
||||
SafeWriteRegToReg(EAX, ECX, 32, 0, false);
|
||||
|
||||
RET();
|
||||
|
||||
const u8* storePairedS16 = AlignCode4();
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
|
||||
// SHUFPS or UNPCKLPS might be a better choice here. The last one might just be an alias though.
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
#ifdef QUANTIZE_OVERFLOW_SAFE
|
||||
MOVSS(XMM1, M((void *)&m_65535));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MINPS(XMM0, R(XMM1));
|
||||
#endif
|
||||
CVTPS2DQ(XMM0, R(XMM0));
|
||||
PACKSSDW(XMM0, R(XMM0));
|
||||
MOVD_xmm(R(EAX), XMM0);
|
||||
BSWAP(32, EAX);
|
||||
ROL(32, R(EAX), Imm8(16));
|
||||
SafeWriteRegToReg(EAX, ECX, 32, 0, false);
|
||||
|
||||
RET();
|
||||
|
||||
pairedStoreQuantized[0] = storePairedFloat;
|
||||
pairedStoreQuantized[1] = storePairedIllegal;
|
||||
pairedStoreQuantized[2] = storePairedIllegal;
|
||||
pairedStoreQuantized[3] = storePairedIllegal;
|
||||
pairedStoreQuantized[4] = storePairedU8;
|
||||
pairedStoreQuantized[5] = storePairedU16;
|
||||
pairedStoreQuantized[6] = storePairedS8;
|
||||
pairedStoreQuantized[7] = storePairedS16;
|
||||
}
|
||||
|
||||
void CommonAsmRoutines::GenQuantizedLoads() {
|
||||
const u8* loadPairedIllegal = AlignCode4();
|
||||
UD2();
|
||||
const u8* loadPairedFloat = AlignCode4();
|
||||
if (cpu_info.bSSSE3) {
|
||||
#ifdef _M_X64
|
||||
MOVQ_xmm(XMM0, MComplex(RBX, RCX, 1, 0));
|
||||
#else
|
||||
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVQ_xmm(XMM0, MDisp(ECX, (u32)Memory::base));
|
||||
#endif
|
||||
PSHUFB(XMM0, M((void *)pbswapShuffle2x4));
|
||||
} else {
|
||||
#ifdef _M_X64
|
||||
MOV(64, R(RCX), MComplex(RBX, RCX, 1, 0));
|
||||
BSWAP(64, RCX);
|
||||
ROL(64, R(RCX), Imm8(32));
|
||||
MOVQ_xmm(XMM0, R(RCX));
|
||||
#else
|
||||
#if 0
|
||||
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVQ_xmm(XMM0, MDisp(ECX, (u32)Memory::base));
|
||||
PXOR(XMM1, R(XMM1));
|
||||
PSHUFLW(XMM0, R(XMM0), 0xB1);
|
||||
MOVAPD(XMM1, R(XMM0));
|
||||
PSRLW(XMM0, 8);
|
||||
PSLLW(XMM1, 8);
|
||||
POR(XMM0, R(XMM1));
|
||||
#else
|
||||
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base));
|
||||
BSWAP(32, EAX);
|
||||
MOV(32, M(&psTemp[0]), R(RAX));
|
||||
MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base + 4));
|
||||
BSWAP(32, EAX);
|
||||
MOV(32, M(((float *)&psTemp[0]) + 1), R(RAX));
|
||||
MOVQ_xmm(XMM0, M(&psTemp[0]));
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
RET();
|
||||
|
||||
const u8* loadPairedU8 = AlignCode4();
|
||||
UnsafeLoadRegToRegNoSwap(ECX, ECX, 16, 0);
|
||||
MOVD_xmm(XMM0, R(ECX));
|
||||
PXOR(XMM1, R(XMM1));
|
||||
PUNPCKLBW(XMM0, R(XMM1));
|
||||
PUNPCKLWD(XMM0, R(XMM1));
|
||||
CVTDQ2PS(XMM0, R(XMM0));
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
RET();
|
||||
|
||||
const u8* loadPairedS8 = AlignCode4();
|
||||
UnsafeLoadRegToRegNoSwap(ECX, ECX, 16, 0);
|
||||
MOVD_xmm(XMM0, R(ECX));
|
||||
PUNPCKLBW(XMM0, R(XMM0));
|
||||
PUNPCKLWD(XMM0, R(XMM0));
|
||||
PSRAD(XMM0, 24);
|
||||
CVTDQ2PS(XMM0, R(XMM0));
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
RET();
|
||||
|
||||
const u8* loadPairedU16 = AlignCode4();
|
||||
UnsafeLoadRegToReg(ECX, ECX, 32, 0, false);
|
||||
ROL(32, R(ECX), Imm8(16));
|
||||
MOVD_xmm(XMM0, R(ECX));
|
||||
PXOR(XMM1, R(XMM1));
|
||||
PUNPCKLWD(XMM0, R(XMM1));
|
||||
CVTDQ2PS(XMM0, R(XMM0));
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
RET();
|
||||
|
||||
const u8* loadPairedS16 = AlignCode4();
|
||||
UnsafeLoadRegToReg(ECX, ECX, 32, 0, false);
|
||||
ROL(32, R(ECX), Imm8(16));
|
||||
MOVD_xmm(XMM0, R(ECX));
|
||||
PUNPCKLWD(XMM0, R(XMM0));
|
||||
PSRAD(XMM0, 16);
|
||||
CVTDQ2PS(XMM0, R(XMM0));
|
||||
SHR(32, R(EAX), Imm8(6));
|
||||
AND(32, R(EAX), Imm32(0xFC));
|
||||
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_dequantizeTableS));
|
||||
PUNPCKLDQ(XMM1, R(XMM1));
|
||||
MULPS(XMM0, R(XMM1));
|
||||
RET();
|
||||
|
||||
pairedLoadQuantized[0] = loadPairedFloat;
|
||||
pairedLoadQuantized[1] = loadPairedIllegal;
|
||||
pairedLoadQuantized[2] = loadPairedIllegal;
|
||||
pairedLoadQuantized[3] = loadPairedIllegal;
|
||||
pairedLoadQuantized[4] = loadPairedU8;
|
||||
pairedLoadQuantized[5] = loadPairedU16;
|
||||
pairedLoadQuantized[6] = loadPairedS8;
|
||||
pairedLoadQuantized[7] = loadPairedS16;
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
// Copyright (C) 2003 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#ifndef _JITASMCOMMON_H
|
||||
#define _JITASMCOMMON_H
|
||||
|
||||
#include "../JitCommon/Jit_Util.h"
|
||||
|
||||
class CommonAsmRoutines : public EmuCodeBlock {
|
||||
protected:
|
||||
void GenQuantizedLoads();
|
||||
void GenQuantizedStores();
|
||||
public:
|
||||
void GenFifoWrite(int size);
|
||||
void GenFifoXmm64Write();
|
||||
void GenFifoFloatWrite();
|
||||
|
||||
// In: array index: GQR to use.
|
||||
// In: ECX: Address to read from.
|
||||
// Out: XMM0: Bottom two 32-bit slots hold the read value,
|
||||
// converted to a pair of floats.
|
||||
// Trashes: EAX ECX EDX
|
||||
const u8 GC_ALIGNED16(*pairedLoadQuantized[8]);
|
||||
|
||||
// In: array index: GQR to use.
|
||||
// In: ECX: Address to write to.
|
||||
// In: XMM0: Bottom two 32-bit slots hold the pair of floats to be written.
|
||||
// Out: Nothing.
|
||||
// Trashes: EAX ECX EDX
|
||||
const u8 GC_ALIGNED16(*pairedStoreQuantized[8]);
|
||||
};
|
||||
|
||||
#endif
|
|
@ -288,7 +288,7 @@ bool JitBlock::ContainsAddress(u32 em_address)
|
|||
block_numbers->push_back(i);
|
||||
}
|
||||
|
||||
u32 JitBlockCache::GetOriginalFirstOp(u32 block_num)
|
||||
u32 JitBlockCache::GetOriginalFirstOp(int block_num)
|
||||
{
|
||||
if (block_num >= num_blocks)
|
||||
{
|
||||
|
@ -298,9 +298,9 @@ bool JitBlock::ContainsAddress(u32 em_address)
|
|||
return blocks[block_num].originalFirstOpcode;
|
||||
}
|
||||
|
||||
CompiledCode JitBlockCache::GetCompiledCodeFromBlock(int blockNumber)
|
||||
CompiledCode JitBlockCache::GetCompiledCodeFromBlock(int block_num)
|
||||
{
|
||||
return (CompiledCode)blockCodePointers[blockNumber];
|
||||
return (CompiledCode)blockCodePointers[block_num];
|
||||
}
|
||||
|
||||
//Block linker
|
||||
|
@ -351,25 +351,25 @@ bool JitBlock::ContainsAddress(u32 em_address)
|
|||
}
|
||||
}
|
||||
|
||||
void JitBlockCache::DestroyBlock(int blocknum, bool invalidate)
|
||||
void JitBlockCache::DestroyBlock(int block_num, bool invalidate)
|
||||
{
|
||||
if (blocknum < 0 || blocknum >= num_blocks)
|
||||
if (block_num < 0 || block_num >= num_blocks)
|
||||
{
|
||||
PanicAlert("DestroyBlock: Invalid block number %d", blocknum);
|
||||
PanicAlert("DestroyBlock: Invalid block number %d", block_num);
|
||||
return;
|
||||
}
|
||||
JitBlock &b = blocks[blocknum];
|
||||
JitBlock &b = blocks[block_num];
|
||||
if (b.invalid)
|
||||
{
|
||||
if (invalidate)
|
||||
PanicAlert("Invalidating invalid block %d", blocknum);
|
||||
PanicAlert("Invalidating invalid block %d", block_num);
|
||||
return;
|
||||
}
|
||||
b.invalid = true;
|
||||
#ifdef JIT_UNLIMITED_ICACHE
|
||||
Memory::Write_Opcode_JIT(b.originalAddress, b.originalFirstOpcode);
|
||||
#else
|
||||
if (Memory::ReadFast32(b.originalAddress) == blocknum)
|
||||
if (Memory::ReadFast32(b.originalAddress) == block_num)
|
||||
Memory::WriteUnchecked_U32(b.originalFirstOpcode, b.originalAddress);
|
||||
#endif
|
||||
|
||||
|
|
|
@ -130,12 +130,12 @@ public:
|
|||
// This one is slow so should only be used for one-shots from the debugger UI, not for anything during runtime.
|
||||
void GetBlockNumbersFromAddress(u32 em_address, std::vector<int> *block_numbers);
|
||||
|
||||
u32 GetOriginalFirstOp(u32 block_num);
|
||||
CompiledCode GetCompiledCodeFromBlock(int blockNumber);
|
||||
u32 GetOriginalFirstOp(int block_num);
|
||||
CompiledCode GetCompiledCodeFromBlock(int block_num);
|
||||
|
||||
// DOES NOT WORK CORRECTLY WITH INLINING
|
||||
void InvalidateICache(u32 em_address);
|
||||
void DestroyBlock(int blocknum, bool invalidate);
|
||||
void DestroyBlock(int block_num, bool invalidate);
|
||||
|
||||
// Not currently used
|
||||
//void DestroyBlocksWithFlag(BlockFlag death_flag);
|
||||
|
|
|
@ -39,17 +39,17 @@
|
|||
|
||||
using namespace Gen;
|
||||
|
||||
void Jit64::JitClearCA()
|
||||
void EmuCodeBlock::JitClearCA()
|
||||
{
|
||||
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0
|
||||
}
|
||||
|
||||
void Jit64::JitSetCA()
|
||||
void EmuCodeBlock::JitSetCA()
|
||||
{
|
||||
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1
|
||||
}
|
||||
|
||||
void Jit64::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend)
|
||||
void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend)
|
||||
{
|
||||
#ifdef _M_IX86
|
||||
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
|
||||
|
@ -74,7 +74,17 @@ void Jit64::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize
|
|||
}
|
||||
}
|
||||
|
||||
void Jit64::SafeLoadRegToEAX(X64Reg reg, int accessSize, s32 offset, bool signExtend)
|
||||
void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset)
|
||||
{
|
||||
#ifdef _M_IX86
|
||||
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVZX(32, accessSize, reg_value, MDisp(reg_addr, (u32)Memory::base + offset));
|
||||
#else
|
||||
MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset));
|
||||
#endif
|
||||
}
|
||||
|
||||
void EmuCodeBlock::SafeLoadRegToEAX(X64Reg reg, int accessSize, s32 offset, bool signExtend)
|
||||
{
|
||||
if (offset)
|
||||
ADD(32, R(reg), Imm32((u32)offset));
|
||||
|
@ -96,12 +106,12 @@ void Jit64::SafeLoadRegToEAX(X64Reg reg, int accessSize, s32 offset, bool signEx
|
|||
SetJumpTarget(arg2);
|
||||
}
|
||||
|
||||
void Jit64::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset)
|
||||
void EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap)
|
||||
{
|
||||
if (accessSize == 8 && reg_value >= 4) {
|
||||
PanicAlert("WARNING: likely incorrect use of UnsafeWriteRegToReg!");
|
||||
}
|
||||
BSWAP(accessSize, reg_value);
|
||||
if (swap) BSWAP(accessSize, reg_value);
|
||||
#ifdef _M_IX86
|
||||
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(accessSize, MDisp(reg_addr, (u32)Memory::base + offset), R(reg_value));
|
||||
|
@ -111,7 +121,7 @@ void Jit64::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSiz
|
|||
}
|
||||
|
||||
// Destroys both arg registers
|
||||
void Jit64::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset)
|
||||
void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap)
|
||||
{
|
||||
if (offset)
|
||||
ADD(32, R(reg_addr), Imm32(offset));
|
||||
|
@ -125,11 +135,11 @@ void Jit64::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize,
|
|||
}
|
||||
FixupBranch arg2 = J();
|
||||
SetJumpTarget(argh);
|
||||
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0);
|
||||
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap);
|
||||
SetJumpTarget(arg2);
|
||||
}
|
||||
|
||||
void Jit64::WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address)
|
||||
void EmuCodeBlock::WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address)
|
||||
{
|
||||
#ifdef _M_X64
|
||||
MOV(accessSize, MDisp(RBX, address & 0x3FFFFFFF), arg);
|
||||
|
@ -138,7 +148,7 @@ void Jit64::WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 ad
|
|||
#endif
|
||||
}
|
||||
|
||||
void Jit64::WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address)
|
||||
void EmuCodeBlock::WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address)
|
||||
{
|
||||
#ifdef _M_X64
|
||||
MOV(32, R(RAX), Imm32(address));
|
||||
|
@ -148,18 +158,18 @@ void Jit64::WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address)
|
|||
#endif
|
||||
}
|
||||
|
||||
void Jit64::ForceSinglePrecisionS(X64Reg xmm) {
|
||||
void EmuCodeBlock::ForceSinglePrecisionS(X64Reg xmm) {
|
||||
// Most games don't need these. Zelda requires it though - some platforms get stuck without them.
|
||||
if (jo.accurateSinglePrecision)
|
||||
if (jit.jo.accurateSinglePrecision)
|
||||
{
|
||||
CVTSD2SS(xmm, R(xmm));
|
||||
CVTSS2SD(xmm, R(xmm));
|
||||
}
|
||||
}
|
||||
|
||||
void Jit64::ForceSinglePrecisionP(X64Reg xmm) {
|
||||
void EmuCodeBlock::ForceSinglePrecisionP(X64Reg xmm) {
|
||||
// Most games don't need these. Zelda requires it though - some platforms get stuck without them.
|
||||
if (jo.accurateSinglePrecision)
|
||||
if (jit.jo.accurateSinglePrecision)
|
||||
{
|
||||
CVTPD2PS(xmm, R(xmm));
|
||||
CVTPS2PD(xmm, R(xmm));
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
// Copyright (C) 2003 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#ifndef _JITUTIL_H
|
||||
#define _JITUTIL_H
|
||||
|
||||
#include "x64Emitter.h"
|
||||
|
||||
// Like XCodeBlock but has some utilities for memory access.
|
||||
class EmuCodeBlock : public Gen::XCodeBlock {
|
||||
public:
|
||||
void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false);
|
||||
void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset);
|
||||
void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true);
|
||||
void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset, bool signExtend = false);
|
||||
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, bool swap = true);
|
||||
|
||||
void WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address);
|
||||
void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address);
|
||||
void JitClearCA();
|
||||
void JitSetCA();
|
||||
|
||||
void ForceSinglePrecisionS(Gen::X64Reg xmm);
|
||||
void ForceSinglePrecisionP(Gen::X64Reg xmm);
|
||||
};
|
||||
|
||||
#endif // _JITUTIL_H
|
|
@ -85,7 +85,8 @@ files = ["ActionReplay.cpp",
|
|||
"PowerPC/Interpreter/Interpreter_LoadStore.cpp",
|
||||
"PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp",
|
||||
"PowerPC/Interpreter/Interpreter_SystemRegisters.cpp",
|
||||
"PowerPC/Interpreter/Interpreter_Tables.cpp",
|
||||
"PowerPC/Interpreter/Interpreter_Tables.cpp",
|
||||
"PowerPC/JitCommon/JitAsmCommon.cpp",
|
||||
"PowerPC/JitCommon/JitCache.cpp",
|
||||
"PowerPC/JitCommon/JitBackpatch.cpp",
|
||||
"PowerPC/JitCommon/Jit_Util.cpp",
|
||||
|
|
|
@ -122,7 +122,6 @@ bool BootCore(const std::string& _rFilename)
|
|||
// General settings
|
||||
game_ini.Get("Core", "CPUOnThread", &StartUp.bCPUThread, StartUp.bCPUThread);
|
||||
game_ini.Get("Core", "SkipIdle", &StartUp.bSkipIdle, StartUp.bSkipIdle);
|
||||
game_ini.Get("Core", "OptimizeQuantizers", &StartUp.bOptimizeQuantizers, StartUp.bOptimizeQuantizers);
|
||||
game_ini.Get("Core", "EnableFPRF", &StartUp.bEnableFPRF, StartUp.bEnableFPRF);
|
||||
game_ini.Get("Core", "TLBHack", &StartUp.iTLBHack, StartUp.iTLBHack);
|
||||
// Wii settings
|
||||
|
|
|
@ -60,13 +60,12 @@ EVT_CHECKBOX(ID_INTERFACE_WIIMOTE_LEDS, CConfigMain::CoreSettingsChanged)
|
|||
EVT_CHECKBOX(ID_INTERFACE_WIIMOTE_SPEAKERS, CConfigMain::CoreSettingsChanged)
|
||||
EVT_CHOICE(ID_INTERFACE_LANG, CConfigMain::CoreSettingsChanged)
|
||||
|
||||
EVT_CHECKBOX(ID_ALLWAYS_HLE_BS2, CConfigMain::CoreSettingsChanged)
|
||||
EVT_CHECKBOX(ID_ALWAYS_HLE_BS2, CConfigMain::CoreSettingsChanged)
|
||||
EVT_RADIOBUTTON(ID_RADIOJIT, CConfigMain::CoreSettingsChanged)
|
||||
EVT_RADIOBUTTON(ID_RADIOINT, CConfigMain::CoreSettingsChanged)
|
||||
EVT_CHECKBOX(ID_CPUTHREAD, CConfigMain::CoreSettingsChanged)
|
||||
EVT_CHECKBOX(ID_DSPTHREAD, CConfigMain::CoreSettingsChanged)
|
||||
EVT_CHECKBOX(ID_LOCKTHREADS, CConfigMain::CoreSettingsChanged)
|
||||
EVT_CHECKBOX(ID_OPTIMIZEQUANTIZERS, CConfigMain::CoreSettingsChanged)
|
||||
EVT_CHECKBOX(ID_IDLESKIP, CConfigMain::CoreSettingsChanged)
|
||||
EVT_CHECKBOX(ID_ENABLECHEATS, CConfigMain::CoreSettingsChanged)
|
||||
EVT_CHOICE(ID_FRAMELIMIT, CConfigMain::CoreSettingsChanged)
|
||||
|
@ -142,7 +141,6 @@ void CConfigMain::UpdateGUI()
|
|||
CPUThread->Disable();
|
||||
DSPThread->Disable();
|
||||
LockThreads->Disable();
|
||||
OptimizeQuantizers->Disable();
|
||||
SkipIdle->Disable();
|
||||
EnableCheats->Disable();
|
||||
|
||||
|
@ -222,15 +220,13 @@ void CConfigMain::CreateGUIControls()
|
|||
|
||||
// Core Settings - Advanced
|
||||
//
|
||||
AlwaysHLE_BS2 = new wxCheckBox(GeneralPage, ID_ALLWAYS_HLE_BS2, wxT("HLE the IPL (recommended)"), wxDefaultPosition, wxDefaultSize, 0, wxDefaultValidator);
|
||||
AlwaysHLE_BS2 = new wxCheckBox(GeneralPage, ID_ALWAYS_HLE_BS2, wxT("HLE the IPL (recommended)"), wxDefaultPosition, wxDefaultSize, 0, wxDefaultValidator);
|
||||
AlwaysHLE_BS2->SetValue(SConfig::GetInstance().m_LocalCoreStartupParameter.bHLE_BS2);
|
||||
m_RadioJIT = new wxRadioButton(GeneralPage, ID_RADIOJIT, wxT("JIT Recompiler (recommended)"));
|
||||
m_RadioInt = new wxRadioButton(GeneralPage, ID_RADIOINT, wxT("Interpreter (very slow)"));
|
||||
SConfig::GetInstance().m_LocalCoreStartupParameter.bUseJIT ? m_RadioJIT->SetValue(true) : m_RadioInt->SetValue(true);
|
||||
LockThreads = new wxCheckBox(GeneralPage, ID_LOCKTHREADS, wxT("Lock threads to cores"), wxDefaultPosition, wxDefaultSize, 0, wxDefaultValidator);
|
||||
LockThreads->SetValue(SConfig::GetInstance().m_LocalCoreStartupParameter.bLockThreads);
|
||||
OptimizeQuantizers = new wxCheckBox(GeneralPage, ID_OPTIMIZEQUANTIZERS, wxT("Optimize Quantizers (speedup)"), wxDefaultPosition, wxDefaultSize, 0, wxDefaultValidator);
|
||||
OptimizeQuantizers->SetValue(SConfig::GetInstance().m_LocalCoreStartupParameter.bOptimizeQuantizers);
|
||||
DSPThread = new wxCheckBox(GeneralPage, ID_DSPTHREAD, wxT("DSP on thread (recommended)"), wxDefaultPosition, wxDefaultSize, 0, wxDefaultValidator);
|
||||
DSPThread->SetValue(SConfig::GetInstance().m_LocalCoreStartupParameter.bDSPThread);
|
||||
|
||||
|
@ -317,7 +313,6 @@ void CConfigMain::CreateGUIControls()
|
|||
sizerCoreType->Add(m_RadioInt, 0, wxALL | wxEXPAND, 5);
|
||||
sbAdvanced->Add(sizerCoreType, 0, wxALL, 5);
|
||||
sbAdvanced->Add(LockThreads, 0, wxALL, 5);
|
||||
sbAdvanced->Add(OptimizeQuantizers, 0, wxALL, 5);
|
||||
sbAdvanced->Add(DSPThread, 0, wxALL, 5);
|
||||
sCore->Add(sbBasic, 0, wxEXPAND);
|
||||
sCore->AddStretchSpacer();
|
||||
|
@ -690,7 +685,7 @@ void CConfigMain::CoreSettingsChanged(wxCommandEvent& event)
|
|||
case ID_FRAMELIMIT:
|
||||
SConfig::GetInstance().m_Framelimit = (u32)Framelimit->GetSelection();
|
||||
break;
|
||||
case ID_ALLWAYS_HLE_BS2: // Core
|
||||
case ID_ALWAYS_HLE_BS2: // Core
|
||||
SConfig::GetInstance().m_LocalCoreStartupParameter.bHLE_BS2 = AlwaysHLE_BS2->IsChecked();
|
||||
break;
|
||||
case ID_RADIOJIT:
|
||||
|
@ -710,9 +705,6 @@ void CConfigMain::CoreSettingsChanged(wxCommandEvent& event)
|
|||
case ID_LOCKTHREADS:
|
||||
SConfig::GetInstance().m_LocalCoreStartupParameter.bLockThreads = LockThreads->IsChecked();
|
||||
break;
|
||||
case ID_OPTIMIZEQUANTIZERS:
|
||||
SConfig::GetInstance().m_LocalCoreStartupParameter.bOptimizeQuantizers = OptimizeQuantizers->IsChecked();
|
||||
break;
|
||||
case ID_IDLESKIP:
|
||||
SConfig::GetInstance().m_LocalCoreStartupParameter.bSkipIdle = SkipIdle->IsChecked();
|
||||
break;
|
||||
|
|
|
@ -68,7 +68,6 @@ private:
|
|||
wxCheckBox* CPUThread;
|
||||
wxCheckBox* DSPThread;
|
||||
wxCheckBox* LockThreads;
|
||||
wxCheckBox* OptimizeQuantizers;
|
||||
wxCheckBox* SkipIdle;
|
||||
wxCheckBox* EnableCheats;
|
||||
|
||||
|
@ -159,13 +158,12 @@ private:
|
|||
ID_PATHSPAGE,
|
||||
ID_PLUGINPAGE,
|
||||
|
||||
ID_ALLWAYS_HLE_BS2,
|
||||
ID_ALWAYS_HLE_BS2,
|
||||
ID_RADIOJIT,
|
||||
ID_RADIOINT,
|
||||
ID_CPUTHREAD,
|
||||
ID_DSPTHREAD,
|
||||
ID_LOCKTHREADS,
|
||||
ID_OPTIMIZEQUANTIZERS,
|
||||
ID_IDLESKIP,
|
||||
ID_ENABLECHEATS,
|
||||
|
||||
|
|
|
@ -290,7 +290,6 @@ void CISOProperties::CreateGUIControls(bool IsWad)
|
|||
sbCoreOverrides = new wxStaticBoxSizer(wxVERTICAL, m_GameConfig, _("Core"));
|
||||
CPUThread = new wxCheckBox(m_GameConfig, ID_USEDUALCORE, _("Enable Dual Core"), wxDefaultPosition, wxDefaultSize, wxCHK_3STATE|wxCHK_ALLOW_3RD_STATE_FOR_USER, wxDefaultValidator);
|
||||
SkipIdle = new wxCheckBox(m_GameConfig, ID_IDLESKIP, _("Enable Idle Skipping"), wxDefaultPosition, wxDefaultSize, wxCHK_3STATE|wxCHK_ALLOW_3RD_STATE_FOR_USER, wxDefaultValidator);
|
||||
OptimizeQuantizers = new wxCheckBox(m_GameConfig, ID_OPTIMIZEQUANTIZERS, _("Optimize Quantizers"), wxDefaultPosition, wxDefaultSize, wxCHK_3STATE|wxCHK_ALLOW_3RD_STATE_FOR_USER, wxDefaultValidator);
|
||||
TLBHack = new wxCheckBox(m_GameConfig, ID_TLBHACK, _("TLB Hack"), wxDefaultPosition, wxDefaultSize, wxCHK_3STATE|wxCHK_ALLOW_3RD_STATE_FOR_USER, wxDefaultValidator);
|
||||
// Wii Console
|
||||
sbWiiOverrides = new wxStaticBoxSizer(wxVERTICAL, m_GameConfig, _("Wii Console"));
|
||||
|
@ -347,7 +346,6 @@ void CISOProperties::CreateGUIControls(bool IsWad)
|
|||
sbCoreOverrides->Add(CPUThread, 0, wxEXPAND|wxLEFT, 5);
|
||||
sbCoreOverrides->Add(SkipIdle, 0, wxEXPAND|wxLEFT, 5);
|
||||
sbCoreOverrides->Add(TLBHack, 0, wxEXPAND|wxLEFT, 5);
|
||||
sbCoreOverrides->Add(OptimizeQuantizers, 0, wxEXPAND|wxLEFT, 5);
|
||||
sbWiiOverrides->Add(EnableProgressiveScan, 0, wxEXPAND|wxLEFT, 5);
|
||||
sbWiiOverrides->Add(EnableWideScreen, 0, wxEXPAND|wxLEFT, 5);
|
||||
sbVideoOverrides->Add(ForceFiltering, 0, wxEXPAND|wxLEFT, 5);
|
||||
|
@ -806,11 +804,6 @@ void CISOProperties::LoadGameConfig()
|
|||
else
|
||||
SkipIdle->Set3StateValue(wxCHK_UNDETERMINED);
|
||||
|
||||
if (GameIni.Get("Core", "OptimizeQuantizers", &bTemp))
|
||||
OptimizeQuantizers->Set3StateValue((wxCheckBoxState)bTemp);
|
||||
else
|
||||
OptimizeQuantizers->Set3StateValue(wxCHK_UNDETERMINED);
|
||||
|
||||
if (GameIni.Get("Core", "TLBHack", &bTemp))
|
||||
TLBHack->Set3StateValue((wxCheckBoxState)bTemp);
|
||||
else
|
||||
|
@ -896,11 +889,6 @@ bool CISOProperties::SaveGameConfig()
|
|||
else
|
||||
GameIni.Set("Core", "SkipIdle", SkipIdle->Get3StateValue());
|
||||
|
||||
if (OptimizeQuantizers->Get3StateValue() == wxCHK_UNDETERMINED)
|
||||
GameIni.DeleteKey("Core", "OptimizeQuantizers");
|
||||
else
|
||||
GameIni.Set("Core", "OptimizeQuantizers", OptimizeQuantizers->Get3StateValue());
|
||||
|
||||
if (TLBHack->Get3StateValue() == wxCHK_UNDETERMINED)
|
||||
GameIni.DeleteKey("Core", "TLBHack");
|
||||
else
|
||||
|
|
|
@ -81,7 +81,7 @@ class CISOProperties : public wxDialog
|
|||
|
||||
wxStaticText *OverrideText;
|
||||
// Core
|
||||
wxCheckBox *CPUThread, *SkipIdle, *OptimizeQuantizers, *TLBHack, *BPHack;
|
||||
wxCheckBox *CPUThread, *SkipIdle, *TLBHack, *BPHack;
|
||||
// Wii
|
||||
wxCheckBox *EnableProgressiveScan, *EnableWideScreen;
|
||||
// Video
|
||||
|
@ -172,7 +172,6 @@ class CISOProperties : public wxDialog
|
|||
ID_RE0FIX,
|
||||
ID_ENABLEPROGRESSIVESCAN,
|
||||
ID_ENABLEWIDESCREEN,
|
||||
ID_OPTIMIZEQUANTIZERS,
|
||||
ID_EDITCONFIG,
|
||||
ID_EMUSTATE_TEXT,
|
||||
ID_EMUSTATE,
|
||||
|
|
Loading…
Reference in New Issue