Merge pull request #1830 from FioraAeterna/gqropts
JIT: optimize for the common case of unquantized psq_l/st
This commit is contained in:
commit
33047c9536
|
@ -21,6 +21,18 @@ static inline int CountSetBits(T v)
|
||||||
v = (v + (v >> 4)) & (T)~(T)0/255*15;
|
v = (v + (v >> 4)) & (T)~(T)0/255*15;
|
||||||
return (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * 8;
|
return (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * 8;
|
||||||
}
|
}
|
||||||
|
static inline int LeastSignificantSetBit(u8 val)
|
||||||
|
{
|
||||||
|
unsigned long index;
|
||||||
|
_BitScanForward(&index, val);
|
||||||
|
return (int)index;
|
||||||
|
}
|
||||||
|
static inline int LeastSignificantSetBit(u16 val)
|
||||||
|
{
|
||||||
|
unsigned long index;
|
||||||
|
_BitScanForward(&index, val);
|
||||||
|
return (int)index;
|
||||||
|
}
|
||||||
static inline int LeastSignificantSetBit(u32 val)
|
static inline int LeastSignificantSetBit(u32 val)
|
||||||
{
|
{
|
||||||
unsigned long index;
|
unsigned long index;
|
||||||
|
@ -34,8 +46,12 @@ static inline int LeastSignificantSetBit(u64 val)
|
||||||
return (int)index;
|
return (int)index;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
static inline int CountSetBits(u8 val) { return __builtin_popcount(val); }
|
||||||
|
static inline int CountSetBits(u16 val) { return __builtin_popcount(val); }
|
||||||
static inline int CountSetBits(u32 val) { return __builtin_popcount(val); }
|
static inline int CountSetBits(u32 val) { return __builtin_popcount(val); }
|
||||||
static inline int CountSetBits(u64 val) { return __builtin_popcountll(val); }
|
static inline int CountSetBits(u64 val) { return __builtin_popcountll(val); }
|
||||||
|
static inline int LeastSignificantSetBit(u8 val) { return __builtin_ctz(val); }
|
||||||
|
static inline int LeastSignificantSetBit(u16 val) { return __builtin_ctz(val); }
|
||||||
static inline int LeastSignificantSetBit(u32 val) { return __builtin_ctz(val); }
|
static inline int LeastSignificantSetBit(u32 val) { return __builtin_ctz(val); }
|
||||||
static inline int LeastSignificantSetBit(u64 val) { return __builtin_ctzll(val); }
|
static inline int LeastSignificantSetBit(u64 val) { return __builtin_ctzll(val); }
|
||||||
#endif
|
#endif
|
||||||
|
@ -163,5 +179,7 @@ public:
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
typedef BS::BitSet<u8> BitSet8;
|
||||||
|
typedef BS::BitSet<u16> BitSet16;
|
||||||
typedef BS::BitSet<u32> BitSet32;
|
typedef BS::BitSet<u32> BitSet32;
|
||||||
typedef BS::BitSet<u64> BitSet64;
|
typedef BS::BitSet<u64> BitSet64;
|
||||||
|
|
|
@ -137,10 +137,10 @@ static GekkoOPTemplate table4_2[] =
|
||||||
|
|
||||||
static GekkoOPTemplate table4_3[] =
|
static GekkoOPTemplate table4_3[] =
|
||||||
{
|
{
|
||||||
{6, Interpreter::psq_lx, {"psq_lx", OPTYPE_PS, FL_OUT_FLOAT_S | FL_IN_A0B | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}},
|
{6, Interpreter::psq_lx, {"psq_lx", OPTYPE_LOADPS, FL_OUT_FLOAT_S | FL_IN_A0B | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}},
|
||||||
{7, Interpreter::psq_stx, {"psq_stx", OPTYPE_PS, FL_IN_FLOAT_S | FL_IN_A0B | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}},
|
{7, Interpreter::psq_stx, {"psq_stx", OPTYPE_STOREPS, FL_IN_FLOAT_S | FL_IN_A0B | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}},
|
||||||
{38, Interpreter::psq_lux, {"psq_lux", OPTYPE_PS, FL_OUT_FLOAT_S | FL_OUT_A | FL_IN_AB | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}},
|
{38, Interpreter::psq_lux, {"psq_lux", OPTYPE_LOADPS, FL_OUT_FLOAT_S | FL_OUT_A | FL_IN_AB | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}},
|
||||||
{39, Interpreter::psq_stux, {"psq_stux", OPTYPE_PS, FL_IN_FLOAT_S | FL_OUT_A | FL_IN_AB | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}},
|
{39, Interpreter::psq_stux, {"psq_stux", OPTYPE_STOREPS, FL_IN_FLOAT_S | FL_OUT_A | FL_IN_AB | FL_USE_FPU | FL_LOADSTORE, 1, 0, 0, 0}},
|
||||||
};
|
};
|
||||||
|
|
||||||
static GekkoOPTemplate table19[] =
|
static GekkoOPTemplate table19[] =
|
||||||
|
|
|
@ -15,6 +15,7 @@
|
||||||
#include "Core/PatchEngine.h"
|
#include "Core/PatchEngine.h"
|
||||||
#include "Core/HLE/HLE.h"
|
#include "Core/HLE/HLE.h"
|
||||||
#include "Core/HW/ProcessorInterface.h"
|
#include "Core/HW/ProcessorInterface.h"
|
||||||
|
#include "Core/PowerPC/JitInterface.h"
|
||||||
#include "Core/PowerPC/Profiler.h"
|
#include "Core/PowerPC/Profiler.h"
|
||||||
#include "Core/PowerPC/Jit64/Jit.h"
|
#include "Core/PowerPC/Jit64/Jit.h"
|
||||||
#include "Core/PowerPC/Jit64/Jit64_Tables.h"
|
#include "Core/PowerPC/Jit64/Jit64_Tables.h"
|
||||||
|
@ -605,6 +606,35 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
|
||||||
js.skipnext = false;
|
js.skipnext = false;
|
||||||
js.carryFlagSet = false;
|
js.carryFlagSet = false;
|
||||||
js.carryFlagInverted = false;
|
js.carryFlagInverted = false;
|
||||||
|
js.assumeNoPairedQuantize = false;
|
||||||
|
|
||||||
|
// If the block only uses one GQR and the GQR is zero at compile time, make a guess that the block
|
||||||
|
// never uses quantized loads/stores. Many paired-heavy games use largely float loads and stores,
|
||||||
|
// which are significantly faster when inlined (especially in MMU mode, where this lets them use
|
||||||
|
// fastmem).
|
||||||
|
// Insert a check that the GQR is still zero at the start of the block in case our guess turns out
|
||||||
|
// wrong.
|
||||||
|
// TODO: support any other constant GQR value, not merely zero/unquantized: we can optimize quantized
|
||||||
|
// loadstores too, it'd just be more code.
|
||||||
|
if (code_block.m_gqr_used.Count() == 1 && js.pairedQuantizeAddresses.find(js.blockStart) == js.pairedQuantizeAddresses.end())
|
||||||
|
{
|
||||||
|
int gqr = *code_block.m_gqr_used.begin();
|
||||||
|
if (!code_block.m_gqr_modified[gqr] && !GQR(gqr))
|
||||||
|
{
|
||||||
|
CMP(32, PPCSTATE(spr[SPR_GQR0 + gqr]), Imm8(0));
|
||||||
|
FixupBranch failure = J_CC(CC_NZ, true);
|
||||||
|
SwitchToFarCode();
|
||||||
|
SetJumpTarget(failure);
|
||||||
|
MOV(32, PPCSTATE(pc), Imm32(js.blockStart));
|
||||||
|
ABI_PushRegistersAndAdjustStack({}, 0);
|
||||||
|
ABI_CallFunctionC((void *)&JitInterface::CompileExceptionCheck, (u32)JitInterface::ExceptionType::EXCEPTIONS_PAIRED_QUANTIZE);
|
||||||
|
ABI_PopRegistersAndAdjustStack({}, 0);
|
||||||
|
JMP(asm_routines.dispatcher, true);
|
||||||
|
SwitchToNearCode();
|
||||||
|
js.assumeNoPairedQuantize = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Translate instructions
|
// Translate instructions
|
||||||
for (u32 i = 0; i < code_block.m_num_instructions; i++)
|
for (u32 i = 0; i < code_block.m_num_instructions; i++)
|
||||||
{
|
{
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
#include "Core/PowerPC/Jit64/Jit.h"
|
#include "Core/PowerPC/Jit64/Jit.h"
|
||||||
#include "Core/PowerPC/Jit64/JitAsm.h"
|
#include "Core/PowerPC/Jit64/JitAsm.h"
|
||||||
#include "Core/PowerPC/Jit64/JitRegCache.h"
|
#include "Core/PowerPC/Jit64/JitRegCache.h"
|
||||||
|
#include "Core/PowerPC/JitCommon/JitAsmCommon.h"
|
||||||
|
|
||||||
using namespace Gen;
|
using namespace Gen;
|
||||||
|
|
||||||
|
@ -20,7 +21,6 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(bJITLoadStorePairedOff);
|
JITDISABLE(bJITLoadStorePairedOff);
|
||||||
FALLBACK_IF(!inst.RA);
|
|
||||||
|
|
||||||
s32 offset = inst.SIMM_12;
|
s32 offset = inst.SIMM_12;
|
||||||
bool indexed = inst.OPCD == 4;
|
bool indexed = inst.OPCD == 4;
|
||||||
|
@ -30,12 +30,75 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
|
||||||
int s = inst.FS;
|
int s = inst.FS;
|
||||||
int i = indexed ? inst.Ix : inst.I;
|
int i = indexed ? inst.Ix : inst.I;
|
||||||
int w = indexed ? inst.Wx : inst.W;
|
int w = indexed ? inst.Wx : inst.W;
|
||||||
|
FALLBACK_IF(!a);
|
||||||
|
|
||||||
gpr.Lock(a, b);
|
gpr.Lock(a, b);
|
||||||
|
if (js.assumeNoPairedQuantize)
|
||||||
|
{
|
||||||
|
int storeOffset = 0;
|
||||||
|
gpr.BindToRegister(a, true, update);
|
||||||
|
X64Reg addr = gpr.RX(a);
|
||||||
|
if (update && js.memcheck)
|
||||||
|
{
|
||||||
|
addr = RSCRATCH2;
|
||||||
|
MOV(32, R(addr), gpr.R(a));
|
||||||
|
}
|
||||||
|
if (indexed)
|
||||||
|
{
|
||||||
|
if (update)
|
||||||
|
{
|
||||||
|
ADD(32, R(addr), gpr.R(b));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
addr = RSCRATCH2;
|
||||||
|
if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
|
||||||
|
{
|
||||||
|
LEA(32, addr, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
MOV(32, R(addr), gpr.R(b));
|
||||||
|
if (a)
|
||||||
|
ADD(32, R(addr), gpr.R(a));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (update)
|
||||||
|
ADD(32, R(addr), Imm32(offset));
|
||||||
|
else
|
||||||
|
storeOffset = offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
fpr.Lock(s);
|
||||||
|
if (w)
|
||||||
|
{
|
||||||
|
CVTSD2SS(XMM0, fpr.R(s));
|
||||||
|
MOVD_xmm(R(RSCRATCH), XMM0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
CVTPD2PS(XMM0, fpr.R(s));
|
||||||
|
MOVQ_xmm(R(RSCRATCH), XMM0);
|
||||||
|
ROL(64, R(RSCRATCH), Imm8(32));
|
||||||
|
}
|
||||||
|
|
||||||
|
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
||||||
|
if (update && js.memcheck)
|
||||||
|
registersInUse[addr] = true;
|
||||||
|
SafeWriteRegToReg(RSCRATCH, addr, w ? 32 : 64, storeOffset, registersInUse);
|
||||||
|
MemoryExceptionCheck();
|
||||||
|
if (update && js.memcheck)
|
||||||
|
MOV(32, gpr.R(a), R(addr));
|
||||||
|
gpr.UnlockAll();
|
||||||
|
fpr.UnlockAll();
|
||||||
|
return;
|
||||||
|
}
|
||||||
gpr.FlushLockX(RSCRATCH_EXTRA);
|
gpr.FlushLockX(RSCRATCH_EXTRA);
|
||||||
if (update)
|
if (update)
|
||||||
gpr.BindToRegister(a, true, true);
|
gpr.BindToRegister(a, true, true);
|
||||||
fpr.BindToRegister(s, true, false);
|
|
||||||
if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg() && (indexed || offset))
|
if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg() && (indexed || offset))
|
||||||
{
|
{
|
||||||
if (indexed)
|
if (indexed)
|
||||||
|
@ -92,7 +155,6 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(bJITLoadStorePairedOff);
|
JITDISABLE(bJITLoadStorePairedOff);
|
||||||
FALLBACK_IF(!inst.RA);
|
|
||||||
|
|
||||||
s32 offset = inst.SIMM_12;
|
s32 offset = inst.SIMM_12;
|
||||||
bool indexed = inst.OPCD == 4;
|
bool indexed = inst.OPCD == 4;
|
||||||
|
@ -102,8 +164,116 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
|
||||||
int s = inst.FS;
|
int s = inst.FS;
|
||||||
int i = indexed ? inst.Ix : inst.I;
|
int i = indexed ? inst.Ix : inst.I;
|
||||||
int w = indexed ? inst.Wx : inst.W;
|
int w = indexed ? inst.Wx : inst.W;
|
||||||
|
FALLBACK_IF(!a);
|
||||||
|
|
||||||
gpr.Lock(a, b);
|
gpr.Lock(a, b);
|
||||||
|
if (js.assumeNoPairedQuantize)
|
||||||
|
{
|
||||||
|
s32 loadOffset = 0;
|
||||||
|
gpr.BindToRegister(a, true, update);
|
||||||
|
X64Reg addr = gpr.RX(a);
|
||||||
|
if (update && js.memcheck)
|
||||||
|
{
|
||||||
|
addr = RSCRATCH2;
|
||||||
|
MOV(32, R(addr), gpr.R(a));
|
||||||
|
}
|
||||||
|
if (indexed)
|
||||||
|
{
|
||||||
|
if (update)
|
||||||
|
{
|
||||||
|
ADD(32, R(addr), gpr.R(b));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
addr = RSCRATCH2;
|
||||||
|
if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
|
||||||
|
{
|
||||||
|
LEA(32, addr, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
MOV(32, R(addr), gpr.R(b));
|
||||||
|
if (a)
|
||||||
|
ADD(32, R(addr), gpr.R(a));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (update)
|
||||||
|
ADD(32, R(addr), Imm32(offset));
|
||||||
|
else
|
||||||
|
loadOffset = offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
fpr.Lock(s);
|
||||||
|
if (js.memcheck)
|
||||||
|
{
|
||||||
|
fpr.StoreFromRegister(s);
|
||||||
|
js.revertFprLoad = s;
|
||||||
|
}
|
||||||
|
fpr.BindToRegister(s, false);
|
||||||
|
|
||||||
|
// Let's mirror the JitAsmCommon code and assume all non-MMU loads go to RAM.
|
||||||
|
if (!js.memcheck)
|
||||||
|
{
|
||||||
|
if (w)
|
||||||
|
{
|
||||||
|
if (cpu_info.bSSSE3)
|
||||||
|
{
|
||||||
|
MOVD_xmm(XMM0, MComplex(RMEM, addr, SCALE_1, loadOffset));
|
||||||
|
PSHUFB(XMM0, M(pbswapShuffle1x4));
|
||||||
|
UNPCKLPS(XMM0, M(m_one));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
LoadAndSwap(32, RSCRATCH, MComplex(RMEM, addr, SCALE_1, loadOffset));
|
||||||
|
MOVD_xmm(XMM0, R(RSCRATCH));
|
||||||
|
UNPCKLPS(XMM0, M(m_one));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (cpu_info.bSSSE3)
|
||||||
|
{
|
||||||
|
MOVQ_xmm(XMM0, MComplex(RMEM, addr, SCALE_1, loadOffset));
|
||||||
|
PSHUFB(XMM0, M(pbswapShuffle2x4));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
LoadAndSwap(64, RSCRATCH, MComplex(RMEM, addr, SCALE_1, loadOffset));
|
||||||
|
ROL(64, R(RSCRATCH), Imm8(32));
|
||||||
|
MOVQ_xmm(XMM0, R(RSCRATCH));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
CVTPS2PD(fpr.RX(s), R(XMM0));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
||||||
|
registersInUse[fpr.RX(s) << 16] = false;
|
||||||
|
if (update)
|
||||||
|
registersInUse[addr] = true;
|
||||||
|
SafeLoadToReg(RSCRATCH, R(addr), w ? 32 : 64, loadOffset, registersInUse, false);
|
||||||
|
MemoryExceptionCheck();
|
||||||
|
if (w)
|
||||||
|
{
|
||||||
|
MOVD_xmm(XMM0, R(RSCRATCH));
|
||||||
|
UNPCKLPS(XMM0, M(m_one));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ROL(64, R(RSCRATCH), Imm8(32));
|
||||||
|
MOVQ_xmm(XMM0, R(RSCRATCH));
|
||||||
|
}
|
||||||
|
CVTPS2PD(fpr.RX(s), R(XMM0));
|
||||||
|
if (update)
|
||||||
|
MOV(32, gpr.R(a), R(addr));
|
||||||
|
}
|
||||||
|
gpr.UnlockAll();
|
||||||
|
fpr.UnlockAll();
|
||||||
|
return;
|
||||||
|
}
|
||||||
gpr.FlushLockX(RSCRATCH_EXTRA);
|
gpr.FlushLockX(RSCRATCH_EXTRA);
|
||||||
gpr.BindToRegister(a, true, update);
|
gpr.BindToRegister(a, true, update);
|
||||||
fpr.BindToRegister(s, false, true);
|
fpr.BindToRegister(s, false, true);
|
||||||
|
|
|
@ -191,8 +191,8 @@ void CommonAsmRoutines::GenMfcr()
|
||||||
|
|
||||||
// Safe + Fast Quantizers, originally from JITIL by magumagu
|
// Safe + Fast Quantizers, originally from JITIL by magumagu
|
||||||
|
|
||||||
static const u8 GC_ALIGNED16(pbswapShuffle1x4[16]) = { 3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
|
const u8 GC_ALIGNED16(pbswapShuffle1x4[16]) = { 3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
|
||||||
static const u8 GC_ALIGNED16(pbswapShuffle2x4[16]) = { 3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15 };
|
const u8 GC_ALIGNED16(pbswapShuffle2x4[16]) = { 3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15 };
|
||||||
|
|
||||||
static const float GC_ALIGNED16(m_quantizeTableS[]) =
|
static const float GC_ALIGNED16(m_quantizeTableS[]) =
|
||||||
{
|
{
|
||||||
|
@ -257,7 +257,7 @@ static const float GC_ALIGNED16(m_255) = 255.0f;
|
||||||
static const float GC_ALIGNED16(m_127) = 127.0f;
|
static const float GC_ALIGNED16(m_127) = 127.0f;
|
||||||
static const float GC_ALIGNED16(m_m128) = -128.0f;
|
static const float GC_ALIGNED16(m_m128) = -128.0f;
|
||||||
|
|
||||||
static const float GC_ALIGNED16(m_one[]) = {1.0f, 0.0f, 0.0f, 0.0f};
|
const float GC_ALIGNED16(m_one[]) = { 1.0f, 0.0f, 0.0f, 0.0f };
|
||||||
|
|
||||||
#define QUANTIZE_OVERFLOW_SAFE
|
#define QUANTIZE_OVERFLOW_SAFE
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,10 @@
|
||||||
|
|
||||||
#include "Core/PowerPC/JitCommon/Jit_Util.h"
|
#include "Core/PowerPC/JitCommon/Jit_Util.h"
|
||||||
|
|
||||||
|
extern const u8 GC_ALIGNED16(pbswapShuffle1x4[16]);
|
||||||
|
extern const u8 GC_ALIGNED16(pbswapShuffle2x4[16]);
|
||||||
|
extern const float GC_ALIGNED16(m_one[]);
|
||||||
|
|
||||||
class CommonAsmRoutinesBase
|
class CommonAsmRoutinesBase
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
|
@ -84,6 +84,7 @@ protected:
|
||||||
int revertGprLoad;
|
int revertGprLoad;
|
||||||
int revertFprLoad;
|
int revertFprLoad;
|
||||||
|
|
||||||
|
bool assumeNoPairedQuantize;
|
||||||
bool firstFPInstructionFound;
|
bool firstFPInstructionFound;
|
||||||
bool isLastInstruction;
|
bool isLastInstruction;
|
||||||
bool memcheck;
|
bool memcheck;
|
||||||
|
@ -104,6 +105,7 @@ protected:
|
||||||
JitBlock *curBlock;
|
JitBlock *curBlock;
|
||||||
|
|
||||||
std::unordered_set<u32> fifoWriteAddresses;
|
std::unordered_set<u32> fifoWriteAddresses;
|
||||||
|
std::unordered_set<u32> pairedQuantizeAddresses;
|
||||||
};
|
};
|
||||||
|
|
||||||
PPCAnalyst::CodeBlock code_block;
|
PPCAnalyst::CodeBlock code_block;
|
||||||
|
|
|
@ -65,6 +65,7 @@ using namespace Gen;
|
||||||
Core::DisplayMessage("Clearing code cache.", 3000);
|
Core::DisplayMessage("Clearing code cache.", 3000);
|
||||||
#endif
|
#endif
|
||||||
jit->js.fifoWriteAddresses.clear();
|
jit->js.fifoWriteAddresses.clear();
|
||||||
|
jit->js.pairedQuantizeAddresses.clear();
|
||||||
for (int i = 0; i < num_blocks; i++)
|
for (int i = 0; i < num_blocks; i++)
|
||||||
{
|
{
|
||||||
DestroyBlock(i, false);
|
DestroyBlock(i, false);
|
||||||
|
@ -311,7 +312,10 @@ using namespace Gen;
|
||||||
if (!forced)
|
if (!forced)
|
||||||
{
|
{
|
||||||
for (u32 i = address; i < address + length; i += 4)
|
for (u32 i = address; i < address + length; i += 4)
|
||||||
|
{
|
||||||
jit->js.fifoWriteAddresses.erase(i);
|
jit->js.fifoWriteAddresses.erase(i);
|
||||||
|
jit->js.pairedQuantizeAddresses.erase(i);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -240,20 +240,26 @@ namespace JitInterface
|
||||||
case ExceptionType::EXCEPTIONS_FIFO_WRITE:
|
case ExceptionType::EXCEPTIONS_FIFO_WRITE:
|
||||||
exception_addresses = &jit->js.fifoWriteAddresses;
|
exception_addresses = &jit->js.fifoWriteAddresses;
|
||||||
break;
|
break;
|
||||||
|
case ExceptionType::EXCEPTIONS_PAIRED_QUANTIZE:
|
||||||
|
exception_addresses = &jit->js.pairedQuantizeAddresses;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (PC != 0 && (exception_addresses->find(PC)) == (exception_addresses->end()))
|
if (PC != 0 && (exception_addresses->find(PC)) == (exception_addresses->end()))
|
||||||
{
|
{
|
||||||
int optype = GetOpInfo(Memory::ReadUnchecked_U32(PC))->type;
|
if (type == ExceptionType::EXCEPTIONS_FIFO_WRITE)
|
||||||
if (optype == OPTYPE_STORE || optype == OPTYPE_STOREFP || (optype == OPTYPE_STOREPS))
|
|
||||||
{
|
{
|
||||||
|
// Check in case the code has been replaced since: do we need to do this?
|
||||||
|
int optype = GetOpInfo(Memory::ReadUnchecked_U32(PC))->type;
|
||||||
|
if (optype != OPTYPE_STORE && optype != OPTYPE_STOREFP && (optype != OPTYPE_STOREPS))
|
||||||
|
return;
|
||||||
|
}
|
||||||
exception_addresses->insert(PC);
|
exception_addresses->insert(PC);
|
||||||
|
|
||||||
// Invalidate the JIT block so that it gets recompiled with the external exception check included.
|
// Invalidate the JIT block so that it gets recompiled with the external exception check included.
|
||||||
jit->GetBlockCache()->InvalidateICache(PC, 4, true);
|
jit->GetBlockCache()->InvalidateICache(PC, 4, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
void Shutdown()
|
void Shutdown()
|
||||||
{
|
{
|
||||||
|
|
|
@ -13,7 +13,8 @@ namespace JitInterface
|
||||||
{
|
{
|
||||||
enum class ExceptionType
|
enum class ExceptionType
|
||||||
{
|
{
|
||||||
EXCEPTIONS_FIFO_WRITE
|
EXCEPTIONS_FIFO_WRITE,
|
||||||
|
EXCEPTIONS_PAIRED_QUANTIZE
|
||||||
};
|
};
|
||||||
|
|
||||||
void DoState(PointerWrap &p);
|
void DoState(PointerWrap &p);
|
||||||
|
|
|
@ -638,6 +638,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32
|
||||||
block->m_broken = false;
|
block->m_broken = false;
|
||||||
block->m_memory_exception = false;
|
block->m_memory_exception = false;
|
||||||
block->m_num_instructions = 0;
|
block->m_num_instructions = 0;
|
||||||
|
block->m_gqr_used = BitSet8(0);
|
||||||
|
|
||||||
if (address == 0)
|
if (address == 0)
|
||||||
{
|
{
|
||||||
|
@ -865,6 +866,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32
|
||||||
|
|
||||||
// Forward scan, for flags that need the other direction for calculation.
|
// Forward scan, for flags that need the other direction for calculation.
|
||||||
BitSet32 fprIsSingle, fprIsDuplicated, fprIsStoreSafe;
|
BitSet32 fprIsSingle, fprIsDuplicated, fprIsStoreSafe;
|
||||||
|
BitSet8 gqrUsed, gqrModified;
|
||||||
for (u32 i = 0; i < block->m_num_instructions; i++)
|
for (u32 i = 0; i < block->m_num_instructions; i++)
|
||||||
{
|
{
|
||||||
code[i].fprIsSingle = fprIsSingle;
|
code[i].fprIsSingle = fprIsSingle;
|
||||||
|
@ -903,7 +905,22 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32
|
||||||
if (!strncmp(code[i].opinfo->opname, "mtfs", 4))
|
if (!strncmp(code[i].opinfo->opname, "mtfs", 4))
|
||||||
fprIsStoreSafe = BitSet32(0);
|
fprIsStoreSafe = BitSet32(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (code[i].opinfo->type == OPTYPE_STOREPS || code[i].opinfo->type == OPTYPE_LOADPS)
|
||||||
|
{
|
||||||
|
int gqr = code[i].inst.OPCD == 4 ? code[i].inst.Ix : code[i].inst.I;
|
||||||
|
gqrUsed[gqr] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (code[i].inst.OPCD == 31 && code[i].inst.SUBOP10 == 467) // mtspr
|
||||||
|
{
|
||||||
|
int gqr = ((code[i].inst.SPRU << 5) | code[i].inst.SPRL) - SPR_GQR0;
|
||||||
|
if (gqr >= 0 && gqr <= 7)
|
||||||
|
gqrModified[gqr] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
block->m_gqr_used = gqrUsed;
|
||||||
|
block->m_gqr_modified = gqrModified;
|
||||||
return address;
|
return address;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -154,6 +154,12 @@ struct CodeBlock
|
||||||
|
|
||||||
// Did we have a memory_exception?
|
// Did we have a memory_exception?
|
||||||
bool m_memory_exception;
|
bool m_memory_exception;
|
||||||
|
|
||||||
|
// Which GQRs this block uses, if any.
|
||||||
|
BitSet8 m_gqr_used;
|
||||||
|
|
||||||
|
// Which GQRs this block modifies, if any.
|
||||||
|
BitSet8 m_gqr_modified;
|
||||||
};
|
};
|
||||||
|
|
||||||
class PPCAnalyzer
|
class PPCAnalyzer
|
||||||
|
|
Loading…
Reference in New Issue