mirror of https://github.com/PCSX2/pcsx2.git
EE/IOP/VU: x86-64 recompiler support
This commit is contained in:
parent
eeca29b6d3
commit
dc57270fb8
|
@ -66,6 +66,7 @@ struct xImpl_FastCall
|
|||
void operator()(void *f, u32 a1, const xRegister32 &a2) const;
|
||||
void operator()(void *f, const xIndirect32 &a1) const;
|
||||
void operator()(void *f, u32 a1, u32 a2) const;
|
||||
void operator()(void *f, void *a1) const;
|
||||
|
||||
#ifdef __M_X86_64
|
||||
void operator()(void *f, const xRegisterLong &a1, const xRegisterLong &a2 = xEmptyReg) const;
|
||||
|
|
|
@ -145,6 +145,8 @@ extern void xBSWAP(const xRegister32or64 &to);
|
|||
extern void xLEA(xRegister64 to, const xIndirectVoid &src, bool preserve_flags = false);
|
||||
extern void xLEA(xRegister32 to, const xIndirectVoid &src, bool preserve_flags = false);
|
||||
extern void xLEA(xRegister16 to, const xIndirectVoid &src, bool preserve_flags = false);
|
||||
/// LEA with a target that will be decided later, guarantees that no optimizations are performed that could change what needs to be written in
|
||||
extern u32* xLEA_Writeback(xAddressReg to);
|
||||
|
||||
// ----- Push / Pop Instructions -----
|
||||
// Note: pushad/popad implementations are intentionally left out. The instructions are
|
||||
|
@ -198,6 +200,27 @@ public:
|
|||
~xScopedStackFrame();
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
/// Helper object to save some temporary registers before the call
|
||||
class xScopedSavedRegisters
|
||||
{
|
||||
std::vector<std::reference_wrapper<const xAddressReg>> regs;
|
||||
public:
|
||||
xScopedSavedRegisters(std::initializer_list<std::reference_wrapper<const xAddressReg>> regs);
|
||||
~xScopedSavedRegisters();
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
/// Helper function to calculate base+offset taking into account the limitations of x86-64's RIP-relative addressing
|
||||
/// (Will either return `base+offset` or LEA `base` into `tmpRegister` and return `tmpRegister+offset`)
|
||||
xAddressVoid xComplexAddress(const xAddressReg& tmpRegister, void *base, const xAddressVoid& offset);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
/// Helper function to load addresses that may be far from the current instruction pointer
|
||||
/// On i386, resolves to `mov dst, (sptr)addr`
|
||||
/// On x86-64, resolves to either `mov dst, (sptr)addr` or `lea dst, [addr]` depending on the distance from RIP
|
||||
void xLoadFarAddr(const xAddressReg& dst, void *addr);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// JMP / Jcc Instructions!
|
||||
|
||||
|
@ -445,8 +468,8 @@ extern void xMOVNTDQA(const xIndirectVoid &to, const xRegisterSSE &from);
|
|||
extern void xMOVNTPD(const xIndirectVoid &to, const xRegisterSSE &from);
|
||||
extern void xMOVNTPS(const xIndirectVoid &to, const xRegisterSSE &from);
|
||||
|
||||
extern void xMOVMSKPS(const xRegister32or64 &to, const xRegisterSSE &from);
|
||||
extern void xMOVMSKPD(const xRegister32or64 &to, const xRegisterSSE &from);
|
||||
extern void xMOVMSKPS(const xRegister32 &to, const xRegisterSSE &from);
|
||||
extern void xMOVMSKPD(const xRegister32 &to, const xRegisterSSE &from);
|
||||
|
||||
extern void xMASKMOV(const xRegisterSSE &to, const xRegisterSSE &from);
|
||||
extern void xPMOVMSKB(const xRegister32or64 &to, const xRegisterSSE &from);
|
||||
|
|
|
@ -489,6 +489,7 @@ public:
|
|||
#else
|
||||
#define xRegisterLong xRegister32
|
||||
#endif
|
||||
static const int wordsize = sizeof(sptr);
|
||||
|
||||
class xAddressReg : public xRegisterLong
|
||||
{
|
||||
|
@ -648,14 +649,9 @@ extern const xAddressReg
|
|||
r8, r9, r10, r11,
|
||||
r12, r13, r14, r15;
|
||||
|
||||
extern const xAddressReg
|
||||
eax, ebx, ecx, edx,
|
||||
esi, edi, ebp, esp;
|
||||
|
||||
// Temporary registers to aid the move to x86-64
|
||||
extern const xRegister32
|
||||
eaxd, ebxd, ecxd, edxd,
|
||||
esid, edid, ebpd, espd,
|
||||
eax, ebx, ecx, edx,
|
||||
esi, edi, ebp, esp,
|
||||
r8d, r9d, r10d, r11d,
|
||||
r12d, r13d, r14d, r15d;
|
||||
|
||||
|
|
|
@ -104,6 +104,11 @@ void xImpl_FastCall::operator()(void *f, u32 a1, const xRegisterLong &a2) const
|
|||
}
|
||||
#endif
|
||||
|
||||
void xImpl_FastCall::operator()(void *f, void *a1) const {
|
||||
xLEA(arg1reg, ptr[a1]);
|
||||
(*this)(f, arg1reg, arg2reg);
|
||||
}
|
||||
|
||||
void xImpl_FastCall::operator()(void *f, u32 a1, const xRegister32 &a2) const {
|
||||
if (!a2.IsEmpty()) { xMOV(arg2regd, a2); }
|
||||
xMOV(arg1regd, a1);
|
||||
|
|
|
@ -711,8 +711,8 @@ __fi void xMOVNTPS(const xIndirectVoid &to, const xRegisterSSE &from) { xOpWrite
|
|||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
__fi void xMOVMSKPS(const xRegister32or64 &to, const xRegisterSSE &from) { xOpWrite0F(0x50, to, from); }
|
||||
__fi void xMOVMSKPD(const xRegister32or64 &to, const xRegisterSSE &from) { xOpWrite0F(0x66, 0x50, to, from, true); }
|
||||
__fi void xMOVMSKPS(const xRegister32 &to, const xRegisterSSE &from) { xOpWrite0F(0x50, to, from); }
|
||||
__fi void xMOVMSKPD(const xRegister32 &to, const xRegisterSSE &from) { xOpWrite0F(0x66, 0x50, to, from, true); }
|
||||
|
||||
// xMASKMOV:
|
||||
// Selectively write bytes from mm1/xmm1 to memory location using the byte mask in mm2/xmm2.
|
||||
|
|
|
@ -130,17 +130,11 @@ const xAddressReg
|
|||
r12(12), r13(13),
|
||||
r14(14), r15(15);
|
||||
|
||||
const xAddressReg
|
||||
const xRegister32
|
||||
eax(0), ebx(3),
|
||||
ecx(1), edx(2),
|
||||
esp(4), ebp(5),
|
||||
esi(6), edi(7);
|
||||
|
||||
const xRegister32
|
||||
eaxd(0), ebxd(3),
|
||||
ecxd(1), edxd(2),
|
||||
espd(4), ebpd(5),
|
||||
esid(6), edid(7),
|
||||
esi(6), edi(7),
|
||||
r8d(8), r9d(9),
|
||||
r10d(10), r11d(11),
|
||||
r12d(12), r13d(13),
|
||||
|
@ -173,10 +167,10 @@ const xAddressReg
|
|||
calleeSavedReg2 = rsi;
|
||||
|
||||
const xRegister32
|
||||
arg1regd = ecxd,
|
||||
arg2regd = edxd,
|
||||
calleeSavedReg1d = edid,
|
||||
calleeSavedReg2d = esid;
|
||||
arg1regd = ecx,
|
||||
arg2regd = edx,
|
||||
calleeSavedReg1d = edi,
|
||||
calleeSavedReg2d = esi;
|
||||
#else
|
||||
const xAddressReg
|
||||
arg1reg = rdi,
|
||||
|
@ -187,8 +181,8 @@ const xAddressReg
|
|||
calleeSavedReg2 = r13;
|
||||
|
||||
const xRegister32
|
||||
arg1regd = edid,
|
||||
arg2regd = esid,
|
||||
arg1regd = edi,
|
||||
arg2regd = esi,
|
||||
calleeSavedReg1d = r12d,
|
||||
calleeSavedReg2d = r13d;
|
||||
#endif
|
||||
|
@ -367,7 +361,7 @@ void EmitSibMagic(uint regfield, const xIndirectVoid &info, int extraRIPOffset)
|
|||
EmitSibMagic(regfield, (void *)info.Displacement, extraRIPOffset);
|
||||
return;
|
||||
} else {
|
||||
if (info.Index == ebp && displacement_size == 0)
|
||||
if (info.Index == rbp && displacement_size == 0)
|
||||
displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]!
|
||||
|
||||
ModRM(displacement_size, regfield, info.Index.Id & 7);
|
||||
|
@ -385,7 +379,7 @@ void EmitSibMagic(uint regfield, const xIndirectVoid &info, int extraRIPOffset)
|
|||
xWrite<s32>(info.Displacement);
|
||||
return;
|
||||
} else {
|
||||
if (info.Base == ebp && displacement_size == 0)
|
||||
if (info.Base == rbp && displacement_size == 0)
|
||||
displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]!
|
||||
|
||||
ModRM(displacement_size, regfield, ModRm_UseSib);
|
||||
|
@ -896,7 +890,7 @@ static void EmitLeaMagic(const xRegisterInt &to, const xIndirectVoid &src, bool
|
|||
} else {
|
||||
if (src.Scale == 0) {
|
||||
if (!preserve_flags) {
|
||||
if (src.Index == esp) {
|
||||
if (src.Index == rsp) {
|
||||
// ESP is not encodable as an index (ix86 ignores it), thus:
|
||||
_xMovRtoR(to, sizeMatchedBase); // will do the trick!
|
||||
if (src.Displacement)
|
||||
|
@ -907,7 +901,7 @@ static void EmitLeaMagic(const xRegisterInt &to, const xIndirectVoid &src, bool
|
|||
_g1_EmitOp(G1Type_ADD, to, sizeMatchedIndex);
|
||||
return;
|
||||
}
|
||||
} else if ((src.Index == esp) && (src.Displacement == 0)) {
|
||||
} else if ((src.Index == rsp) && (src.Displacement == 0)) {
|
||||
// special case handling of ESP as Index, which is replaceable with
|
||||
// a single MOV even when preserve_flags is set! :D
|
||||
|
||||
|
@ -937,6 +931,17 @@ __emitinline void xLEA(xRegister16 to, const xIndirectVoid &src, bool preserve_f
|
|||
EmitLeaMagic(to, src, preserve_flags);
|
||||
}
|
||||
|
||||
__emitinline u32* xLEA_Writeback(xAddressReg to)
|
||||
{
|
||||
#ifdef __M_X86_64
|
||||
xOpWrite(0, 0x8d, to, ptr[(void*)(0xdcdcdcd + (uptr)xGetPtr() + 7)]);
|
||||
#else
|
||||
xOpAccWrite(0, 0xb8 | to.Id, 0, to);
|
||||
xWrite32(0xcdcdcdcd);
|
||||
#endif
|
||||
return (u32*)xGetPtr() - 1;
|
||||
}
|
||||
|
||||
// =====================================================================================================
|
||||
// TEST / INC / DEC
|
||||
// =====================================================================================================
|
||||
|
@ -1145,6 +1150,14 @@ __emitinline void xRestoreReg(const xRegisterSSE &dest)
|
|||
|
||||
#endif
|
||||
|
||||
static void stackAlign(int offset, bool moveDown) {
|
||||
int needed = (16 - (offset % 16)) % 16;
|
||||
if (moveDown) {
|
||||
needed = -needed;
|
||||
}
|
||||
ALIGN_STACK(needed);
|
||||
}
|
||||
|
||||
xScopedStackFrame::xScopedStackFrame(bool base_frame, bool save_base_pointer, int offset)
|
||||
{
|
||||
m_base_frame = base_frame;
|
||||
|
@ -1188,12 +1201,12 @@ xScopedStackFrame::xScopedStackFrame(bool base_frame, bool save_base_pointer, in
|
|||
|
||||
#endif
|
||||
|
||||
ALIGN_STACK(-(16 - m_offset % 16));
|
||||
stackAlign(m_offset, true);
|
||||
}
|
||||
|
||||
xScopedStackFrame::~xScopedStackFrame()
|
||||
{
|
||||
ALIGN_STACK(16 - m_offset % 16);
|
||||
stackAlign(m_offset, false);
|
||||
|
||||
#ifdef __M_X86_64
|
||||
|
||||
|
@ -1226,4 +1239,47 @@ xScopedStackFrame::~xScopedStackFrame()
|
|||
}
|
||||
}
|
||||
|
||||
xScopedSavedRegisters::xScopedSavedRegisters(std::initializer_list<std::reference_wrapper<const xAddressReg>> regs)
|
||||
: regs(regs)
|
||||
{
|
||||
for (auto reg : regs)
|
||||
{
|
||||
const xAddressReg& regRef = reg;
|
||||
xPUSH(regRef);
|
||||
}
|
||||
stackAlign(regs.size() * wordsize, true);
|
||||
}
|
||||
|
||||
xScopedSavedRegisters::~xScopedSavedRegisters() {
|
||||
stackAlign(regs.size() * wordsize, false);
|
||||
for (auto it = regs.rbegin(); it < regs.rend(); ++it) {
|
||||
const xAddressReg& regRef = *it;
|
||||
xPOP(regRef);
|
||||
}
|
||||
}
|
||||
|
||||
xAddressVoid xComplexAddress(const xAddressReg& tmpRegister, void *base, const xAddressVoid& offset) {
|
||||
if ((sptr)base == (s32)(sptr)base) {
|
||||
return offset + base;
|
||||
} else {
|
||||
xLEA(tmpRegister, ptr[base]);
|
||||
return offset + tmpRegister;
|
||||
}
|
||||
}
|
||||
|
||||
void xLoadFarAddr(const xAddressReg& dst, void *addr) {
|
||||
#ifdef __M_X86_64
|
||||
sptr iaddr = (sptr)addr;
|
||||
sptr rip = (sptr)xGetPtr() + 7; // LEA will be 7 bytes
|
||||
sptr disp = iaddr - rip;
|
||||
if (disp == (s32)disp) {
|
||||
xLEA(dst, ptr[addr]);
|
||||
} else {
|
||||
xMOV64(dst, iaddr);
|
||||
}
|
||||
#else
|
||||
xMOV(dst, (sptr)addr);
|
||||
#endif
|
||||
}
|
||||
|
||||
} // End namespace x86Emitter
|
||||
|
|
|
@ -782,7 +782,7 @@ void vtlb_Term()
|
|||
//nothing to do for now
|
||||
}
|
||||
|
||||
constexpr size_t VMAP_SIZE = sizeof(sptr) * VTLB_VMAP_ITEMS;
|
||||
constexpr size_t VMAP_SIZE = sizeof(VTLBVirtual) * VTLB_VMAP_ITEMS;
|
||||
|
||||
// Reserves the vtlb core allocation used by various emulation components!
|
||||
// [TODO] basemem - request allocating memory at the specified virtual location, which can allow
|
||||
|
|
|
@ -230,14 +230,14 @@ public:
|
|||
|
||||
#define PC_GETBLOCK_(x, reclut) ((BASEBLOCK*)(reclut[((u32)(x)) >> 16] + (x)*(sizeof(BASEBLOCK)/4)))
|
||||
|
||||
static void recLUT_SetPage(uptr reclut[0x10000], uptr hwlut[0x10000],
|
||||
static void recLUT_SetPage(uptr reclut[0x10000], u32 hwlut[0x10000],
|
||||
BASEBLOCK *mapbase, uint pagebase, uint pageidx, uint mappage)
|
||||
{
|
||||
// this value is in 64k pages!
|
||||
uint page = pagebase + pageidx;
|
||||
|
||||
pxAssert( page < 0x10000 );
|
||||
reclut[page] = (uptr)&mapbase[(mappage - page) << 14];
|
||||
reclut[page] = (uptr)&mapbase[((s32)mappage - (s32)page) << 14];
|
||||
if (hwlut)
|
||||
hwlut[page] = 0u - (pagebase << 16);
|
||||
}
|
||||
|
|
|
@ -118,12 +118,15 @@ extern _x86regs x86regs[iREGCNT_GPR], s_saveX86regs[iREGCNT_GPR];
|
|||
uptr _x86GetAddr(int type, int reg);
|
||||
void _initX86regs();
|
||||
int _getFreeX86reg(int mode);
|
||||
int _allocX86reg(x86Emitter::xRegisterLong x86reg, int type, int reg, int mode);
|
||||
[[deprecated]] int _allocX86reg(x86Emitter::xRegister64 x86reg, int type, int reg, int mode);
|
||||
int _allocX86reg(x86Emitter::xRegister32 x86reg, int type, int reg, int mode);
|
||||
// To resolve ambiguity between 32 and 64, delete once everything's on 32
|
||||
int _allocX86reg(x86Emitter::xRegisterEmpty x86reg, int type, int reg, int mode);
|
||||
void _deleteX86reg(int type, int reg, int flush);
|
||||
int _checkX86reg(int type, int reg, int mode);
|
||||
void _addNeededX86reg(int type, int reg);
|
||||
void _clearNeededX86regs();
|
||||
void _freeX86reg(const x86Emitter::xRegisterLong& x86reg);
|
||||
void _freeX86reg(const x86Emitter::xRegister32& x86reg);
|
||||
void _freeX86reg(int x86reg);
|
||||
void _freeX86regs();
|
||||
void _flushCachedRegs();
|
||||
|
|
|
@ -397,9 +397,9 @@ void FPU_MUL(int info, int regd, int sreg, int treg, bool acc)
|
|||
|
||||
if (CHECK_FPUMULHACK)
|
||||
{
|
||||
xMOVD(ecx, xRegisterSSE(sreg));
|
||||
xMOVD(edx, xRegisterSSE(treg));
|
||||
xFastCall((void*)(uptr)&FPU_MUL_HACK, ecx, edx); //returns the hacked result or 0
|
||||
xMOVD(arg1regd, xRegisterSSE(sreg));
|
||||
xMOVD(arg2regd, xRegisterSSE(treg));
|
||||
xFastCall((void*)(uptr)&FPU_MUL_HACK, arg1regd, arg2regd); //returns the hacked result or 0
|
||||
xTEST(eax, eax);
|
||||
noHack = JZ8(0);
|
||||
xMOVDZX(xRegisterSSE(regd), eax);
|
||||
|
|
|
@ -1500,16 +1500,18 @@ void recQFSRV()
|
|||
int info = eeRecompileCodeXMM(XMMINFO_WRITED);
|
||||
|
||||
xMOV(eax, ptr32[&cpuRegs.sa]);
|
||||
xMOVDQU(xRegisterSSE(EEREC_D), ptr32[eax + &cpuRegs.GPR.r[_Rt_]]);
|
||||
xLEA(rcx, ptr[&cpuRegs.GPR.r[_Rt_]]);
|
||||
xMOVDQU(xRegisterSSE(EEREC_D), ptr32[rax + rcx]);
|
||||
return;
|
||||
}
|
||||
|
||||
int info = eeRecompileCodeXMM( XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED );
|
||||
|
||||
xMOV(eax, ptr32[&cpuRegs.sa]);
|
||||
xMOVDQA(ptr32[&tempqw[0]], xRegisterSSE(EEREC_T));
|
||||
xMOVDQA(ptr32[&tempqw[4]], xRegisterSSE(EEREC_S));
|
||||
xMOVDQU(xRegisterSSE(EEREC_D), ptr32[eax + &tempqw]);
|
||||
xLEA(rcx, ptr[tempqw]);
|
||||
xMOVDQA(ptr32[rcx], xRegisterSSE(EEREC_T));
|
||||
xMOVDQA(ptr32[rcx+16], xRegisterSSE(EEREC_S));
|
||||
xMOVDQU(xRegisterSSE(EEREC_D), ptr32[rax + rcx]);
|
||||
|
||||
_clearNeededXMMregs();
|
||||
}
|
||||
|
|
|
@ -46,7 +46,7 @@ u32 g_psxMaxRecMem = 0;
|
|||
u32 s_psxrecblocks[] = {0};
|
||||
|
||||
uptr psxRecLUT[0x10000];
|
||||
uptr psxhwLUT[0x10000];
|
||||
u32 psxhwLUT[0x10000];
|
||||
|
||||
static __fi u32 HWADDR(u32 mem) { return psxhwLUT[mem >> 16] + mem; }
|
||||
|
||||
|
@ -126,13 +126,13 @@ static DynGenFunc* _DynGen_JITCompile()
|
|||
|
||||
u8* retval = xGetPtr();
|
||||
|
||||
xFastCall((void*)iopRecRecompile, ptr[&psxRegs.pc] );
|
||||
xFastCall((void*)iopRecRecompile, ptr32[&psxRegs.pc] );
|
||||
|
||||
xMOV( eax, ptr[&psxRegs.pc] );
|
||||
xMOV( ebx, eax );
|
||||
xSHR( eax, 16 );
|
||||
xMOV( ecx, ptr[psxRecLUT + (eax*4)] );
|
||||
xJMP( ptr32[ecx+ebx] );
|
||||
xMOV( rcx, ptrNative[xComplexAddress(rcx, psxRecLUT, rax*wordsize)] );
|
||||
xJMP( ptrNative[rbx*(wordsize/4) + rcx] );
|
||||
|
||||
return (DynGenFunc*)retval;
|
||||
}
|
||||
|
@ -152,8 +152,8 @@ static DynGenFunc* _DynGen_DispatcherReg()
|
|||
xMOV( eax, ptr[&psxRegs.pc] );
|
||||
xMOV( ebx, eax );
|
||||
xSHR( eax, 16 );
|
||||
xMOV( ecx, ptr[psxRecLUT + (eax*4)] );
|
||||
xJMP( ptr32[ecx+ebx] );
|
||||
xMOV( rcx, ptrNative[xComplexAddress(rcx, psxRecLUT, rax*wordsize)] );
|
||||
xJMP( ptrNative[rbx*(wordsize/4) + rcx] );
|
||||
|
||||
return (DynGenFunc*)retval;
|
||||
}
|
||||
|
@ -391,7 +391,7 @@ void _psxDeleteReg(int reg, int flush)
|
|||
_deleteX86reg(X86TYPE_PSX, reg, flush ? 0 : 2);
|
||||
}
|
||||
|
||||
void _psxMoveGPRtoR(const xRegisterLong& to, int fromgpr)
|
||||
void _psxMoveGPRtoR(const xRegister32& to, int fromgpr)
|
||||
{
|
||||
if( PSX_IS_CONST1(fromgpr) )
|
||||
xMOV(to, g_psxConstRegs[fromgpr] );
|
||||
|
@ -863,22 +863,22 @@ void psxSetBranchReg(u32 reg)
|
|||
psxbranch = 1;
|
||||
|
||||
if( reg != 0xffffffff ) {
|
||||
_allocX86reg(esi, X86TYPE_PCWRITEBACK, 0, MODE_WRITE);
|
||||
_psxMoveGPRtoR(esi, reg);
|
||||
_allocX86reg(calleeSavedReg2d, X86TYPE_PCWRITEBACK, 0, MODE_WRITE);
|
||||
_psxMoveGPRtoR(calleeSavedReg2d, reg);
|
||||
|
||||
psxRecompileNextInstruction(1);
|
||||
|
||||
if( x86regs[esi.GetId()].inuse ) {
|
||||
pxAssert( x86regs[esi.GetId()].type == X86TYPE_PCWRITEBACK );
|
||||
xMOV(ptr[&psxRegs.pc], esi);
|
||||
x86regs[esi.GetId()].inuse = 0;
|
||||
if( x86regs[calleeSavedReg2d.GetId()].inuse ) {
|
||||
pxAssert( x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK );
|
||||
xMOV(ptr32[&psxRegs.pc], calleeSavedReg2d);
|
||||
x86regs[calleeSavedReg2d.GetId()].inuse = 0;
|
||||
#ifdef PCSX2_DEBUG
|
||||
xOR( esi, esi );
|
||||
xOR( calleeSavedReg2d, calleeSavedReg2d );
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
xMOV(eax, ptr[&g_recWriteback]);
|
||||
xMOV(ptr[&psxRegs.pc], eax);
|
||||
xMOV(eax, ptr32[&g_recWriteback]);
|
||||
xMOV(ptr32[&psxRegs.pc], eax);
|
||||
|
||||
#ifdef PCSX2_DEBUG
|
||||
xOR( eax, eax );
|
||||
|
|
|
@ -48,7 +48,7 @@ void _psxFlushCall(int flushtype);
|
|||
|
||||
void _psxOnWriteReg(int reg);
|
||||
|
||||
void _psxMoveGPRtoR(const x86Emitter::xRegisterLong& to, int fromgpr);
|
||||
void _psxMoveGPRtoR(const x86Emitter::xRegister32& to, int fromgpr);
|
||||
#if 0
|
||||
void _psxMoveGPRtoM(uptr to, int fromgpr);
|
||||
void _psxMoveGPRtoRm(x86IntRegType to, int fromgpr);
|
||||
|
|
|
@ -65,9 +65,9 @@ void rpsxADDconst(int dreg, int sreg, u32 off, int info)
|
|||
if (sreg == dreg) {
|
||||
xADD(ptr32[&psxRegs.GPR.r[dreg]], off);
|
||||
} else {
|
||||
xMOV(eax, ptr[&psxRegs.GPR.r[sreg]]);
|
||||
xMOV(eax, ptr32[&psxRegs.GPR.r[sreg]]);
|
||||
if (off) xADD(eax, off);
|
||||
xMOV(ptr[&psxRegs.GPR.r[dreg]], eax);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax);
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
@ -97,7 +97,7 @@ void rpsxSLTconst(int info, int dreg, int sreg, int imm)
|
|||
xXOR(eax, eax);
|
||||
xCMP(ptr32[&psxRegs.GPR.r[sreg]], imm);
|
||||
xSETL(al);
|
||||
xMOV(ptr[&psxRegs.GPR.r[dreg]], eax);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax);
|
||||
}
|
||||
|
||||
void rpsxSLTI_(int info) { rpsxSLTconst(info, _Rt_, _Rs_, _Imm_); }
|
||||
|
@ -115,7 +115,7 @@ void rpsxSLTUconst(int info, int dreg, int sreg, int imm)
|
|||
xXOR(eax, eax);
|
||||
xCMP(ptr32[&psxRegs.GPR.r[sreg]], imm);
|
||||
xSETB(al);
|
||||
xMOV(ptr[&psxRegs.GPR.r[dreg]], eax);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax);
|
||||
}
|
||||
|
||||
void rpsxSLTIU_(int info) { rpsxSLTUconst(info, _Rt_, _Rs_, (s32)_Imm_); }
|
||||
|
@ -134,9 +134,9 @@ void rpsxANDconst(int info, int dreg, int sreg, u32 imm)
|
|||
if (sreg == dreg) {
|
||||
xAND(ptr32[&psxRegs.GPR.r[dreg]], imm);
|
||||
} else {
|
||||
xMOV(eax, ptr[&psxRegs.GPR.r[sreg]]);
|
||||
xMOV(eax, ptr32[&psxRegs.GPR.r[sreg]]);
|
||||
xAND(eax, imm);
|
||||
xMOV(ptr[&psxRegs.GPR.r[dreg]], eax);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax);
|
||||
}
|
||||
} else {
|
||||
xMOV(ptr32[&psxRegs.GPR.r[dreg]], 0);
|
||||
|
@ -160,15 +160,15 @@ void rpsxORconst(int info, int dreg, int sreg, u32 imm)
|
|||
xOR(ptr32[&psxRegs.GPR.r[dreg]], imm);
|
||||
}
|
||||
else {
|
||||
xMOV(eax, ptr[&psxRegs.GPR.r[sreg]]);
|
||||
xMOV(eax, ptr32[&psxRegs.GPR.r[sreg]]);
|
||||
xOR(eax, imm);
|
||||
xMOV(ptr[&psxRegs.GPR.r[dreg]], eax);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax);
|
||||
}
|
||||
}
|
||||
else {
|
||||
if( dreg != sreg ) {
|
||||
xMOV(ecx, ptr[&psxRegs.GPR.r[sreg]]);
|
||||
xMOV(ptr[&psxRegs.GPR.r[dreg]], ecx);
|
||||
xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -189,9 +189,9 @@ void rpsxXORconst(int info, int dreg, int sreg, u32 imm)
|
|||
xNOT(ptr32[&psxRegs.GPR.r[dreg]]);
|
||||
}
|
||||
else {
|
||||
xMOV(ecx, ptr[&psxRegs.GPR.r[sreg]]);
|
||||
xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]);
|
||||
xNOT(ecx);
|
||||
xMOV(ptr[&psxRegs.GPR.r[dreg]], ecx);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx);
|
||||
}
|
||||
}
|
||||
else if (imm) {
|
||||
|
@ -200,15 +200,15 @@ void rpsxXORconst(int info, int dreg, int sreg, u32 imm)
|
|||
xXOR(ptr32[&psxRegs.GPR.r[dreg]], imm);
|
||||
}
|
||||
else {
|
||||
xMOV(eax, ptr[&psxRegs.GPR.r[sreg]]);
|
||||
xMOV(eax, ptr32[&psxRegs.GPR.r[sreg]]);
|
||||
xXOR(eax, imm);
|
||||
xMOV(ptr[&psxRegs.GPR.r[dreg]], eax);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax);
|
||||
}
|
||||
}
|
||||
else {
|
||||
if( dreg != sreg ) {
|
||||
xMOV(ecx, ptr[&psxRegs.GPR.r[sreg]]);
|
||||
xMOV(ptr[&psxRegs.GPR.r[dreg]], ecx);
|
||||
xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -241,16 +241,16 @@ void rpsxADDU_constt(int info)
|
|||
void rpsxADDU_(int info)
|
||||
{
|
||||
if (_Rs_ && _Rt_) {
|
||||
xMOV(eax, ptr[&psxRegs.GPR.r[_Rs_]]);
|
||||
xADD(eax, ptr[&psxRegs.GPR.r[_Rt_]]);
|
||||
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
|
||||
xADD(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
|
||||
} else if (_Rs_) {
|
||||
xMOV(eax, ptr[&psxRegs.GPR.r[_Rs_]]);
|
||||
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
|
||||
} else if (_Rt_) {
|
||||
xMOV(eax, ptr[&psxRegs.GPR.r[_Rt_]]);
|
||||
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
|
||||
} else {
|
||||
xXOR(eax, eax);
|
||||
}
|
||||
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
|
||||
}
|
||||
|
||||
PSXRECOMPILE_CONSTCODE0(ADDU);
|
||||
|
@ -266,8 +266,8 @@ void rpsxSUBU_const()
|
|||
void rpsxSUBU_consts(int info)
|
||||
{
|
||||
xMOV(eax, g_psxConstRegs[_Rs_]);
|
||||
xSUB(eax, ptr[&psxRegs.GPR.r[_Rt_]]);
|
||||
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax);
|
||||
xSUB(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
|
||||
}
|
||||
|
||||
void rpsxSUBU_constt(int info) { rpsxADDconst(_Rd_, _Rs_, -(int)g_psxConstRegs[_Rt_], info); }
|
||||
|
@ -278,13 +278,13 @@ void rpsxSUBU_(int info)
|
|||
if (!_Rd_) return;
|
||||
|
||||
if( _Rd_ == _Rs_ ) {
|
||||
xMOV(eax, ptr[&psxRegs.GPR.r[_Rt_]]);
|
||||
xSUB(ptr[&psxRegs.GPR.r[_Rd_]], eax);
|
||||
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
|
||||
xSUB(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
|
||||
}
|
||||
else {
|
||||
xMOV(eax, ptr[&psxRegs.GPR.r[_Rs_]]);
|
||||
xSUB(eax, ptr[&psxRegs.GPR.r[_Rt_]]);
|
||||
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax);
|
||||
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
|
||||
xSUB(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -296,13 +296,13 @@ void rpsxLogicalOp(int info, int op)
|
|||
{
|
||||
if( _Rd_ == _Rs_ || _Rd_ == _Rt_ ) {
|
||||
int vreg = _Rd_ == _Rs_ ? _Rt_ : _Rs_;
|
||||
xMOV(ecx, ptr[&psxRegs.GPR.r[vreg]]);
|
||||
xMOV(ecx, ptr32[&psxRegs.GPR.r[vreg]]);
|
||||
|
||||
switch(op) {
|
||||
case 0: xAND(ptr[&psxRegs.GPR.r[_Rd_]], ecx); break;
|
||||
case 1: xOR(ptr[&psxRegs.GPR.r[_Rd_]], ecx); break;
|
||||
case 2: xXOR(ptr[&psxRegs.GPR.r[_Rd_]], ecx); break;
|
||||
case 3: xOR(ptr[&psxRegs.GPR.r[_Rd_]], ecx); break;
|
||||
case 0: xAND(ptr32[&psxRegs.GPR.r[_Rd_]], ecx); break;
|
||||
case 1: xOR(ptr32[&psxRegs.GPR.r[_Rd_]], ecx); break;
|
||||
case 2: xXOR(ptr32[&psxRegs.GPR.r[_Rd_]], ecx); break;
|
||||
case 3: xOR(ptr32[&psxRegs.GPR.r[_Rd_]], ecx); break;
|
||||
default: pxAssert(0);
|
||||
}
|
||||
|
||||
|
@ -310,19 +310,19 @@ void rpsxLogicalOp(int info, int op)
|
|||
xNOT(ptr32[&psxRegs.GPR.r[_Rd_]]);
|
||||
}
|
||||
else {
|
||||
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
|
||||
xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
|
||||
|
||||
switch(op) {
|
||||
case 0: xAND(ecx, ptr[&psxRegs.GPR.r[_Rt_]]); break;
|
||||
case 1: xOR(ecx, ptr[&psxRegs.GPR.r[_Rt_]]); break;
|
||||
case 2: xXOR(ecx, ptr[&psxRegs.GPR.r[_Rt_]]); break;
|
||||
case 3: xOR(ecx, ptr[&psxRegs.GPR.r[_Rt_]]); break;
|
||||
case 0: xAND(ecx, ptr32[&psxRegs.GPR.r[_Rt_]]); break;
|
||||
case 1: xOR(ecx, ptr32[&psxRegs.GPR.r[_Rt_]]); break;
|
||||
case 2: xXOR(ecx, ptr32[&psxRegs.GPR.r[_Rt_]]); break;
|
||||
case 3: xOR(ecx, ptr32[&psxRegs.GPR.r[_Rt_]]); break;
|
||||
default: pxAssert(0);
|
||||
}
|
||||
|
||||
if( op == 3 )
|
||||
xNOT(ecx);
|
||||
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], ecx);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], ecx);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -374,10 +374,10 @@ void rpsxNORconst(int info, int dreg, int sreg, u32 imm)
|
|||
xNOT(ptr32[&psxRegs.GPR.r[dreg]]);
|
||||
}
|
||||
else {
|
||||
xMOV(ecx, ptr[&psxRegs.GPR.r[sreg]]);
|
||||
xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]);
|
||||
xOR(ecx, imm);
|
||||
xNOT(ecx);
|
||||
xMOV(ptr[&psxRegs.GPR.r[dreg]], ecx);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx);
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
@ -385,9 +385,9 @@ void rpsxNORconst(int info, int dreg, int sreg, u32 imm)
|
|||
xNOT(ptr32[&psxRegs.GPR.r[dreg]]);
|
||||
}
|
||||
else {
|
||||
xMOV(ecx, ptr[&psxRegs.GPR.r[sreg]]);
|
||||
xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]);
|
||||
xNOT(ecx);
|
||||
xMOV(ptr[&psxRegs.GPR.r[dreg]], ecx);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -407,19 +407,19 @@ void rpsxSLT_const()
|
|||
void rpsxSLT_consts(int info)
|
||||
{
|
||||
xXOR(eax, eax);
|
||||
xCMP(ptr32[&psxRegs.GPR.r[_Rt_]], g_psxConstRegs[_Rs_]);
|
||||
xSETG(al);
|
||||
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax);
|
||||
xCMP(ptr32[&psxRegs.GPR.r[_Rt_]], g_psxConstRegs[_Rs_]);
|
||||
xSETG(al);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
|
||||
}
|
||||
|
||||
void rpsxSLT_constt(int info) { rpsxSLTconst(info, _Rd_, _Rs_, g_psxConstRegs[_Rt_]); }
|
||||
void rpsxSLT_(int info)
|
||||
{
|
||||
xMOV(eax, ptr[&psxRegs.GPR.r[_Rs_]]);
|
||||
xCMP(eax, ptr[&psxRegs.GPR.r[_Rt_]]);
|
||||
xSETL(al);
|
||||
xAND(eax, 0xff);
|
||||
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax);
|
||||
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
|
||||
xCMP(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
|
||||
xSETL(al);
|
||||
xAND(eax, 0xff);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
|
||||
}
|
||||
|
||||
PSXRECOMPILE_CONSTCODE0(SLT);
|
||||
|
@ -433,9 +433,9 @@ void rpsxSLTU_const()
|
|||
void rpsxSLTU_consts(int info)
|
||||
{
|
||||
xXOR(eax, eax);
|
||||
xCMP(ptr32[&psxRegs.GPR.r[_Rt_]], g_psxConstRegs[_Rs_]);
|
||||
xSETA(al);
|
||||
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax);
|
||||
xCMP(ptr32[&psxRegs.GPR.r[_Rt_]], g_psxConstRegs[_Rs_]);
|
||||
xSETA(al);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
|
||||
}
|
||||
|
||||
void rpsxSLTU_constt(int info) { rpsxSLTUconst(info, _Rd_, _Rs_, g_psxConstRegs[_Rt_]); }
|
||||
|
@ -444,11 +444,11 @@ void rpsxSLTU_(int info)
|
|||
// Rd = Rs < Rt (unsigned)
|
||||
if (!_Rd_) return;
|
||||
|
||||
xMOV(eax, ptr[&psxRegs.GPR.r[_Rs_]]);
|
||||
xCMP(eax, ptr[&psxRegs.GPR.r[_Rt_]]);
|
||||
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
|
||||
xCMP(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
|
||||
xSBB(eax, eax);
|
||||
xNEG(eax);
|
||||
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
|
||||
}
|
||||
|
||||
PSXRECOMPILE_CONSTCODE0(SLTU);
|
||||
|
@ -468,18 +468,18 @@ void rpsxMULTsuperconst(int info, int sreg, int imm, int sign)
|
|||
xMOV(eax, imm);
|
||||
if( sign ) xMUL(ptr32[&psxRegs.GPR.r[sreg]]);
|
||||
else xUMUL(ptr32[&psxRegs.GPR.r[sreg]]);
|
||||
xMOV(ptr[&psxRegs.GPR.n.lo], eax);
|
||||
xMOV(ptr[&psxRegs.GPR.n.hi], edx);
|
||||
xMOV(ptr32[&psxRegs.GPR.n.lo], eax);
|
||||
xMOV(ptr32[&psxRegs.GPR.n.hi], edx);
|
||||
}
|
||||
|
||||
void rpsxMULTsuper(int info, int sign)
|
||||
{
|
||||
// Lo/Hi = Rs * Rt (signed)
|
||||
xMOV(eax, ptr[&psxRegs.GPR.r[_Rs_]]);
|
||||
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
|
||||
if( sign ) xMUL(ptr32[&psxRegs.GPR.r[_Rt_]]);
|
||||
else xUMUL(ptr32[&psxRegs.GPR.r[_Rt_]]);
|
||||
xMOV(ptr[&psxRegs.GPR.n.lo], eax);
|
||||
xMOV(ptr[&psxRegs.GPR.n.hi], edx);
|
||||
xMOV(ptr32[&psxRegs.GPR.n.lo], eax);
|
||||
xMOV(ptr32[&psxRegs.GPR.n.hi], edx);
|
||||
}
|
||||
|
||||
void rpsxMULT_consts(int info) { rpsxMULTsuperconst(info, _Rt_, g_psxConstRegs[_Rs_], 1); }
|
||||
|
@ -542,12 +542,12 @@ void rpsxDIVsuper(int info, int sign, int process = 0)
|
|||
if( process & PROCESS_CONSTT )
|
||||
xMOV(ecx, g_psxConstRegs[_Rt_]);
|
||||
else
|
||||
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rt_]]);
|
||||
xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rt_]]);
|
||||
|
||||
if( process & PROCESS_CONSTS )
|
||||
xMOV(eax, g_psxConstRegs[_Rs_]);
|
||||
else
|
||||
xMOV(eax, ptr[&psxRegs.GPR.r[_Rs_]]);
|
||||
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
|
||||
|
||||
u8 *end1;
|
||||
if (sign) //test for overflow (x86 will just throw an exception)
|
||||
|
@ -593,8 +593,8 @@ void rpsxDIVsuper(int info, int sign, int process = 0)
|
|||
if (sign) x86SetJ8( end1 );
|
||||
x86SetJ8( end2 );
|
||||
|
||||
xMOV(ptr[&psxRegs.GPR.n.lo], eax);
|
||||
xMOV(ptr[&psxRegs.GPR.n.hi], edx);
|
||||
xMOV(ptr32[&psxRegs.GPR.n.lo], eax);
|
||||
xMOV(ptr32[&psxRegs.GPR.n.hi], edx);
|
||||
}
|
||||
|
||||
void rpsxDIV_consts(int info) { rpsxDIVsuper(info, 1, PROCESS_CONSTS); }
|
||||
|
@ -639,12 +639,12 @@ static void rpsxLB()
|
|||
_psxOnWriteReg(_Rt_);
|
||||
_psxDeleteReg(_Rt_, 0);
|
||||
|
||||
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
|
||||
xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
|
||||
if (_Imm_) xADD(ecx, _Imm_);
|
||||
xFastCall((void*)iopMemRead8, ecx ); // returns value in EAX
|
||||
if (_Rt_) {
|
||||
xMOVSX(eax, al);
|
||||
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax);
|
||||
}
|
||||
PSX_DEL_CONST(_Rt_);
|
||||
}
|
||||
|
@ -655,12 +655,12 @@ static void rpsxLBU()
|
|||
_psxOnWriteReg(_Rt_);
|
||||
_psxDeleteReg(_Rt_, 0);
|
||||
|
||||
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
|
||||
xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
|
||||
if (_Imm_) xADD(ecx, _Imm_);
|
||||
xFastCall((void*)iopMemRead8, ecx ); // returns value in EAX
|
||||
if (_Rt_) {
|
||||
xMOVZX(eax, al);
|
||||
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax);
|
||||
}
|
||||
PSX_DEL_CONST(_Rt_);
|
||||
}
|
||||
|
@ -671,12 +671,12 @@ static void rpsxLH()
|
|||
_psxOnWriteReg(_Rt_);
|
||||
_psxDeleteReg(_Rt_, 0);
|
||||
|
||||
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
|
||||
xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
|
||||
if (_Imm_) xADD(ecx, _Imm_);
|
||||
xFastCall((void*)iopMemRead16, ecx ); // returns value in EAX
|
||||
if (_Rt_) {
|
||||
xMOVSX(eax, ax);
|
||||
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax);
|
||||
}
|
||||
PSX_DEL_CONST(_Rt_);
|
||||
}
|
||||
|
@ -687,12 +687,12 @@ static void rpsxLHU()
|
|||
_psxOnWriteReg(_Rt_);
|
||||
_psxDeleteReg(_Rt_, 0);
|
||||
|
||||
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
|
||||
xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
|
||||
if (_Imm_) xADD(ecx, _Imm_);
|
||||
xFastCall((void*)iopMemRead16, ecx ); // returns value in EAX
|
||||
if (_Rt_) {
|
||||
xMOVZX(eax, ax);
|
||||
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax);
|
||||
}
|
||||
PSX_DEL_CONST(_Rt_);
|
||||
}
|
||||
|
@ -704,7 +704,7 @@ static void rpsxLW()
|
|||
_psxDeleteReg(_Rt_, 0);
|
||||
|
||||
_psxFlushCall(FLUSH_EVERYTHING);
|
||||
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
|
||||
xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
|
||||
if (_Imm_) xADD(ecx, _Imm_);
|
||||
|
||||
xTEST(ecx, 0x10000000);
|
||||
|
@ -712,18 +712,17 @@ static void rpsxLW()
|
|||
|
||||
xFastCall((void*)iopMemRead32, ecx ); // returns value in EAX
|
||||
if (_Rt_) {
|
||||
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax);
|
||||
}
|
||||
j8Ptr[1] = JMP8(0);
|
||||
x86SetJ8(j8Ptr[0]);
|
||||
|
||||
// read from psM directly
|
||||
xAND(ecx, 0x1fffff);
|
||||
xADD(ecx, (uptr)iopMem->Main);
|
||||
|
||||
xMOV(ecx, ptr[ecx]);
|
||||
xMOV(ecx, ptr32[xComplexAddress(rax, iopMem->Main, rcx)]);
|
||||
if (_Rt_) {
|
||||
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], ecx);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], ecx);
|
||||
}
|
||||
|
||||
x86SetJ8(j8Ptr[1]);
|
||||
|
@ -735,10 +734,10 @@ static void rpsxSB()
|
|||
_psxDeleteReg(_Rs_, 1);
|
||||
_psxDeleteReg(_Rt_, 1);
|
||||
|
||||
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
|
||||
if (_Imm_) xADD(ecx, _Imm_);
|
||||
xMOV( edx, ptr[&psxRegs.GPR.r[_Rt_]] );
|
||||
xFastCall((void*)iopMemWrite8, ecx, edx );
|
||||
xMOV(arg1regd, ptr32[&psxRegs.GPR.r[_Rs_]]);
|
||||
if (_Imm_) xADD(arg1regd, _Imm_);
|
||||
xMOV( arg2regd, ptr32[&psxRegs.GPR.r[_Rt_]] );
|
||||
xFastCall((void*)iopMemWrite8, arg1regd, arg2regd );
|
||||
}
|
||||
|
||||
static void rpsxSH()
|
||||
|
@ -746,10 +745,10 @@ static void rpsxSH()
|
|||
_psxDeleteReg(_Rs_, 1);
|
||||
_psxDeleteReg(_Rt_, 1);
|
||||
|
||||
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
|
||||
if (_Imm_) xADD(ecx, _Imm_);
|
||||
xMOV( edx, ptr[&psxRegs.GPR.r[_Rt_]] );
|
||||
xFastCall((void*)iopMemWrite16, ecx, edx );
|
||||
xMOV(arg1regd, ptr32[&psxRegs.GPR.r[_Rs_]]);
|
||||
if (_Imm_) xADD(arg1regd, _Imm_);
|
||||
xMOV( arg2regd, ptr32[&psxRegs.GPR.r[_Rt_]] );
|
||||
xFastCall((void*)iopMemWrite16, arg1regd, arg2regd );
|
||||
}
|
||||
|
||||
static void rpsxSW()
|
||||
|
@ -757,10 +756,10 @@ static void rpsxSW()
|
|||
_psxDeleteReg(_Rs_, 1);
|
||||
_psxDeleteReg(_Rt_, 1);
|
||||
|
||||
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
|
||||
if (_Imm_) xADD(ecx, _Imm_);
|
||||
xMOV( edx, ptr[&psxRegs.GPR.r[_Rt_]] );
|
||||
xFastCall((void*)iopMemWrite32, ecx, edx );
|
||||
xMOV(arg1regd, ptr32[&psxRegs.GPR.r[_Rs_]]);
|
||||
if (_Imm_) xADD(arg1regd, _Imm_);
|
||||
xMOV( arg2regd, ptr32[&psxRegs.GPR.r[_Rt_]] );
|
||||
xFastCall((void*)iopMemWrite32, arg1regd, arg2regd );
|
||||
}
|
||||
|
||||
//// SLL
|
||||
|
@ -782,19 +781,19 @@ void rpsxShiftConst(int info, int rdreg, int rtreg, int imm, int shifttype)
|
|||
}
|
||||
}
|
||||
else {
|
||||
xMOV(eax, ptr[&psxRegs.GPR.r[rtreg]]);
|
||||
xMOV(eax, ptr32[&psxRegs.GPR.r[rtreg]]);
|
||||
switch(shifttype) {
|
||||
case 0: xSHL(eax, imm); break;
|
||||
case 1: xSHR(eax, imm); break;
|
||||
case 2: xSAR(eax, imm); break;
|
||||
}
|
||||
xMOV(ptr[&psxRegs.GPR.r[rdreg]], eax);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[rdreg]], eax);
|
||||
}
|
||||
}
|
||||
else {
|
||||
if( rdreg != rtreg ) {
|
||||
xMOV(eax, ptr[&psxRegs.GPR.r[rtreg]]);
|
||||
xMOV(ptr[&psxRegs.GPR.r[rdreg]], eax);
|
||||
xMOV(eax, ptr32[&psxRegs.GPR.r[rtreg]]);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[rdreg]], eax);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -834,23 +833,23 @@ void rpsxShiftVconsts(int info, int shifttype)
|
|||
void rpsxShiftVconstt(int info, int shifttype)
|
||||
{
|
||||
xMOV(eax, g_psxConstRegs[_Rt_]);
|
||||
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
|
||||
xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
|
||||
switch(shifttype) {
|
||||
case 0: xSHL(eax, cl); break;
|
||||
case 1: xSHR(eax, cl); break;
|
||||
case 2: xSAR(eax, cl); break;
|
||||
}
|
||||
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
|
||||
}
|
||||
|
||||
void rpsxSLLV_consts(int info) { rpsxShiftVconsts(info, 0); }
|
||||
void rpsxSLLV_constt(int info) { rpsxShiftVconstt(info, 0); }
|
||||
void rpsxSLLV_(int info)
|
||||
{
|
||||
xMOV(eax, ptr[&psxRegs.GPR.r[_Rt_]]);
|
||||
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
|
||||
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
|
||||
xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
|
||||
xSHL(eax, cl);
|
||||
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
|
||||
}
|
||||
|
||||
PSXRECOMPILE_CONSTCODE0(SLLV);
|
||||
|
@ -865,10 +864,10 @@ void rpsxSRLV_consts(int info) { rpsxShiftVconsts(info, 1); }
|
|||
void rpsxSRLV_constt(int info) { rpsxShiftVconstt(info, 1); }
|
||||
void rpsxSRLV_(int info)
|
||||
{
|
||||
xMOV(eax, ptr[&psxRegs.GPR.r[_Rt_]]);
|
||||
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
|
||||
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
|
||||
xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
|
||||
xSHR(eax, cl);
|
||||
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
|
||||
}
|
||||
|
||||
PSXRECOMPILE_CONSTCODE0(SRLV);
|
||||
|
@ -883,10 +882,10 @@ void rpsxSRAV_consts(int info) { rpsxShiftVconsts(info, 2); }
|
|||
void rpsxSRAV_constt(int info) { rpsxShiftVconstt(info, 2); }
|
||||
void rpsxSRAV_(int info)
|
||||
{
|
||||
xMOV(eax, ptr[&psxRegs.GPR.r[_Rt_]]);
|
||||
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
|
||||
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
|
||||
xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
|
||||
xSAR(eax, cl);
|
||||
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
|
||||
}
|
||||
|
||||
PSXRECOMPILE_CONSTCODE0(SRAV);
|
||||
|
@ -900,8 +899,8 @@ void rpsxMFHI()
|
|||
|
||||
_psxOnWriteReg(_Rd_);
|
||||
_psxDeleteReg(_Rd_, 0);
|
||||
xMOV(eax, ptr[&psxRegs.GPR.n.hi]);
|
||||
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax);
|
||||
xMOV(eax, ptr32[&psxRegs.GPR.n.hi]);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
|
||||
}
|
||||
|
||||
void rpsxMTHI()
|
||||
|
@ -911,8 +910,8 @@ void rpsxMTHI()
|
|||
}
|
||||
else {
|
||||
_psxDeleteReg(_Rs_, 1);
|
||||
xMOV(eax, ptr[&psxRegs.GPR.r[_Rs_]]);
|
||||
xMOV(ptr[&psxRegs.GPR.n.hi], eax);
|
||||
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
|
||||
xMOV(ptr32[&psxRegs.GPR.n.hi], eax);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -922,8 +921,8 @@ void rpsxMFLO()
|
|||
|
||||
_psxOnWriteReg(_Rd_);
|
||||
_psxDeleteReg(_Rd_, 0);
|
||||
xMOV(eax, ptr[&psxRegs.GPR.n.lo]);
|
||||
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax);
|
||||
xMOV(eax, ptr32[&psxRegs.GPR.n.lo]);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
|
||||
}
|
||||
|
||||
void rpsxMTLO()
|
||||
|
@ -933,8 +932,8 @@ void rpsxMTLO()
|
|||
}
|
||||
else {
|
||||
_psxDeleteReg(_Rs_, 1);
|
||||
xMOV(eax, ptr[&psxRegs.GPR.r[_Rs_]]);
|
||||
xMOV(ptr[&psxRegs.GPR.n.lo], eax);
|
||||
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
|
||||
xMOV(ptr32[&psxRegs.GPR.n.lo], eax);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -965,8 +964,8 @@ void rpsxJR()
|
|||
void rpsxJALR()
|
||||
{
|
||||
// jalr Rs
|
||||
_allocX86reg(esi, X86TYPE_PCWRITEBACK, 0, MODE_WRITE);
|
||||
_psxMoveGPRtoR(esi, _Rs_);
|
||||
_allocX86reg(calleeSavedReg2d, X86TYPE_PCWRITEBACK, 0, MODE_WRITE);
|
||||
_psxMoveGPRtoR(calleeSavedReg2d, _Rs_);
|
||||
|
||||
if ( _Rd_ )
|
||||
{
|
||||
|
@ -977,18 +976,18 @@ void rpsxJALR()
|
|||
|
||||
psxRecompileNextInstruction(1);
|
||||
|
||||
if( x86regs[esi.GetId()].inuse ) {
|
||||
pxAssert( x86regs[esi.GetId()].type == X86TYPE_PCWRITEBACK );
|
||||
xMOV(ptr[&psxRegs.pc], esi);
|
||||
x86regs[esi.GetId()].inuse = 0;
|
||||
if( x86regs[calleeSavedReg2d.GetId()].inuse ) {
|
||||
pxAssert( x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK );
|
||||
xMOV(ptr32[&psxRegs.pc], calleeSavedReg2d);
|
||||
x86regs[calleeSavedReg2d.GetId()].inuse = 0;
|
||||
#ifdef PCSX2_DEBUG
|
||||
xOR( esi, esi );
|
||||
xOR( calleeSavedReg2d, calleeSavedReg2d );
|
||||
#endif
|
||||
|
||||
}
|
||||
else {
|
||||
xMOV(eax, ptr[&g_recWriteback]);
|
||||
xMOV(ptr[&psxRegs.pc], eax);
|
||||
xMOV(eax, ptr32[&g_recWriteback]);
|
||||
xMOV(ptr32[&psxRegs.pc], eax);
|
||||
#ifdef PCSX2_DEBUG
|
||||
xOR( eax, eax );
|
||||
#endif
|
||||
|
@ -1016,8 +1015,8 @@ void rpsxSetBranchEQ(int info, int process)
|
|||
s_pbranchjmp = JNE32( 0 );
|
||||
}
|
||||
else {
|
||||
xMOV(eax, ptr[&psxRegs.GPR.r[ _Rs_ ] ]);
|
||||
xCMP(eax, ptr[&psxRegs.GPR.r[ _Rt_ ] ]);
|
||||
xMOV(eax, ptr32[&psxRegs.GPR.r[ _Rs_ ] ]);
|
||||
xCMP(eax, ptr32[&psxRegs.GPR.r[ _Rt_ ] ]);
|
||||
s_pbranchjmp = JNE32( 0 );
|
||||
}
|
||||
}
|
||||
|
@ -1342,8 +1341,8 @@ void rpsxMFC0()
|
|||
if (!_Rt_) return;
|
||||
|
||||
_psxOnWriteReg(_Rt_);
|
||||
xMOV(eax, ptr[&psxRegs.CP0.r[_Rd_]]);
|
||||
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax);
|
||||
xMOV(eax, ptr32[&psxRegs.CP0.r[_Rd_]]);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax);
|
||||
}
|
||||
|
||||
void rpsxCFC0()
|
||||
|
@ -1352,8 +1351,8 @@ void rpsxCFC0()
|
|||
if (!_Rt_) return;
|
||||
|
||||
_psxOnWriteReg(_Rt_);
|
||||
xMOV(eax, ptr[&psxRegs.CP0.r[_Rd_]]);
|
||||
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax);
|
||||
xMOV(eax, ptr32[&psxRegs.CP0.r[_Rd_]]);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax);
|
||||
}
|
||||
|
||||
void rpsxMTC0()
|
||||
|
@ -1364,8 +1363,8 @@ void rpsxMTC0()
|
|||
}
|
||||
else {
|
||||
_psxDeleteReg(_Rt_, 1);
|
||||
xMOV(eax, ptr[&psxRegs.GPR.r[_Rt_]]);
|
||||
xMOV(ptr[&psxRegs.CP0.r[_Rd_]], eax);
|
||||
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
|
||||
xMOV(ptr32[&psxRegs.CP0.r[_Rd_]], eax);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1377,13 +1376,13 @@ void rpsxCTC0()
|
|||
|
||||
void rpsxRFE()
|
||||
{
|
||||
xMOV(eax, ptr[&psxRegs.CP0.n.Status]);
|
||||
xMOV(eax, ptr32[&psxRegs.CP0.n.Status]);
|
||||
xMOV(ecx, eax);
|
||||
xAND(eax, 0xfffffff0);
|
||||
xAND(ecx, 0x3c);
|
||||
xSHR(ecx, 2);
|
||||
xOR(eax, ecx);
|
||||
xMOV(ptr[&psxRegs.CP0.n.Status], eax);
|
||||
xMOV(ptr32[&psxRegs.CP0.n.Status], eax);
|
||||
|
||||
// Test the IOP's INTC status, so that any pending ints get raised.
|
||||
|
||||
|
|
|
@ -104,7 +104,7 @@ extern u32 g_cpuHasConstReg, g_cpuFlushedConstReg;
|
|||
u32* _eeGetConstReg(int reg);
|
||||
|
||||
// finds where the GPR is stored and moves lower 32 bits to EAX
|
||||
void _eeMoveGPRtoR(const x86Emitter::xRegisterLong& to, int fromgpr);
|
||||
void _eeMoveGPRtoR(const x86Emitter::xRegister32& to, int fromgpr);
|
||||
void _eeMoveGPRtoM(uptr to, int fromgpr);
|
||||
void _eeMoveGPRtoRm(x86IntRegType to, int fromgpr);
|
||||
void eeSignExtendTo(int gpr, bool onlyupper=false);
|
||||
|
|
|
@ -239,7 +239,17 @@ void _flushConstRegs()
|
|||
}
|
||||
}
|
||||
|
||||
int _allocX86reg(xRegisterLong x86reg, int type, int reg, int mode)
|
||||
int _allocX86reg(xRegisterEmpty x86reg, int type, int reg, int mode)
|
||||
{
|
||||
return _allocX86reg(xRegister32(x86reg), type, reg, mode);
|
||||
}
|
||||
|
||||
int _allocX86reg(xRegister64 x86reg, int type, int reg, int mode)
|
||||
{
|
||||
return _allocX86reg(xRegister32(x86reg.Id), type, reg, mode);
|
||||
}
|
||||
|
||||
int _allocX86reg(xRegister32 x86reg, int type, int reg, int mode)
|
||||
{
|
||||
uint i;
|
||||
pxAssertDev( reg >= 0 && reg < 32, "Register index out of bounds." );
|
||||
|
@ -313,7 +323,7 @@ int _allocX86reg(xRegisterLong x86reg, int type, int reg, int mode)
|
|||
}
|
||||
|
||||
if (x86reg.IsEmpty())
|
||||
x86reg = xRegisterLong(_getFreeX86reg(oldmode));
|
||||
x86reg = xRegister32(_getFreeX86reg(oldmode));
|
||||
else
|
||||
_freeX86reg(x86reg);
|
||||
|
||||
|
@ -440,7 +450,7 @@ void _deleteX86reg(int type, int reg, int flush)
|
|||
}
|
||||
|
||||
// Temporary solution to support eax/ebx... type
|
||||
void _freeX86reg(const x86Emitter::xRegisterLong& x86reg)
|
||||
void _freeX86reg(const x86Emitter::xRegister32& x86reg)
|
||||
{
|
||||
_freeX86reg(x86reg.GetId());
|
||||
}
|
||||
|
|
|
@ -51,7 +51,7 @@ using namespace R5900;
|
|||
|
||||
u32 maxrecmem = 0;
|
||||
static __aligned16 uptr recLUT[_64kb];
|
||||
static __aligned16 uptr hwLUT[_64kb];
|
||||
static __aligned16 u32 hwLUT[_64kb];
|
||||
|
||||
static __fi u32 HWADDR(u32 mem) { return hwLUT[mem >> 16] + mem; }
|
||||
|
||||
|
@ -75,7 +75,7 @@ static const int RECCONSTBUF_SIZE = 16384 * 2; // 64 bit consts in 32 bit units
|
|||
static RecompiledCodeReserve* recMem = NULL;
|
||||
static u8* recRAMCopy = NULL;
|
||||
static u8* recLutReserve_RAM = NULL;
|
||||
static const size_t recLutSize = Ps2MemSize::MainRam + Ps2MemSize::Rom + Ps2MemSize::Rom1 + Ps2MemSize::Rom2;
|
||||
static const size_t recLutSize = (Ps2MemSize::MainRam + Ps2MemSize::Rom + Ps2MemSize::Rom1 + Ps2MemSize::Rom2) * wordsize / 4;
|
||||
|
||||
static uptr m_ConfiguredCacheReserve = 64;
|
||||
|
||||
|
@ -153,7 +153,7 @@ u32* _eeGetConstReg(int reg)
|
|||
return &cpuRegs.GPR.r[ reg ].UL[0];
|
||||
}
|
||||
|
||||
void _eeMoveGPRtoR(const xRegisterLong& to, int fromgpr)
|
||||
void _eeMoveGPRtoR(const xRegister32& to, int fromgpr)
|
||||
{
|
||||
if( fromgpr == 0 )
|
||||
xXOR(to, to); // zero register should use xor, thanks --air
|
||||
|
@ -346,13 +346,17 @@ static DynGenFunc* _DynGen_JITCompile()
|
|||
|
||||
u8* retval = xGetAlignedCallTarget();
|
||||
|
||||
xFastCall((void*)recRecompile, ptr[&cpuRegs.pc] );
|
||||
xFastCall((void*)recRecompile, ptr32[&cpuRegs.pc] );
|
||||
|
||||
// C equivalent:
|
||||
// u32 addr = cpuRegs.pc;
|
||||
// void(**base)() = (void(**)())recLUT[addr >> 16];
|
||||
// base[addr >> 2]();
|
||||
xMOV( eax, ptr[&cpuRegs.pc] );
|
||||
xMOV( ebx, eax );
|
||||
xSHR( eax, 16 );
|
||||
xMOV( ecx, ptr[recLUT + (eax*4)] );
|
||||
xJMP( ptr32[ecx+ebx] );
|
||||
xMOV( rcx, ptrNative[xComplexAddress(rcx, recLUT, rax*wordsize)] );
|
||||
xJMP( ptrNative[rbx*(wordsize/4) + rcx] );
|
||||
|
||||
return (DynGenFunc*)retval;
|
||||
}
|
||||
|
@ -369,11 +373,15 @@ static DynGenFunc* _DynGen_DispatcherReg()
|
|||
{
|
||||
u8* retval = xGetPtr(); // fallthrough target, can't align it!
|
||||
|
||||
// C equivalent:
|
||||
// u32 addr = cpuRegs.pc;
|
||||
// void(**base)() = (void(**)())recLUT[addr >> 16];
|
||||
// base[addr >> 2]();
|
||||
xMOV( eax, ptr[&cpuRegs.pc] );
|
||||
xMOV( ebx, eax );
|
||||
xSHR( eax, 16 );
|
||||
xMOV( ecx, ptr[recLUT + (eax*4)] );
|
||||
xJMP( ptr32[ecx+ebx] );
|
||||
xMOV( rcx, ptrNative[xComplexAddress(rcx, recLUT, rax*wordsize)] );
|
||||
xJMP( ptrNative[rbx*(wordsize/4) + rcx] );
|
||||
|
||||
return (DynGenFunc*)retval;
|
||||
}
|
||||
|
@ -461,7 +469,7 @@ static void _DynGen_Dispatchers()
|
|||
|
||||
static __ri void ClearRecLUT(BASEBLOCK* base, int memsize)
|
||||
{
|
||||
for (int i = 0; i < memsize/4; i++)
|
||||
for (int i = 0; i < memsize/(int)sizeof(uptr); i++)
|
||||
base[i].SetFnptr((uptr)JITCompile);
|
||||
}
|
||||
|
||||
|
@ -521,7 +529,7 @@ static void recAlloc()
|
|||
for (int i = 0; i < 0x10000; i++)
|
||||
recLUT_SetPage(recLUT, 0, 0, 0, i, 0);
|
||||
|
||||
for ( int i = 0x0000; i < Ps2MemSize::MainRam / 0x10000; i++ )
|
||||
for ( int i = 0x0000; i < (int)(Ps2MemSize::MainRam / 0x10000); i++ )
|
||||
{
|
||||
recLUT_SetPage(recLUT, hwLUT, recRAM, 0x0000, i, i);
|
||||
recLUT_SetPage(recLUT, hwLUT, recRAM, 0x2000, i, i);
|
||||
|
@ -864,21 +872,21 @@ void SetBranchReg( u32 reg )
|
|||
// xMOV(ptr[&cpuRegs.pc], eax);
|
||||
// }
|
||||
// }
|
||||
_allocX86reg(esi, X86TYPE_PCWRITEBACK, 0, MODE_WRITE);
|
||||
_eeMoveGPRtoR(esi, reg);
|
||||
_allocX86reg(calleeSavedReg2d, X86TYPE_PCWRITEBACK, 0, MODE_WRITE);
|
||||
_eeMoveGPRtoR(calleeSavedReg2d, reg);
|
||||
|
||||
if (EmuConfig.Gamefixes.GoemonTlbHack) {
|
||||
xMOV(ecx, esi);
|
||||
xMOV(ecx, calleeSavedReg2d);
|
||||
vtlb_DynV2P();
|
||||
xMOV(esi, eax);
|
||||
xMOV(calleeSavedReg2d, eax);
|
||||
}
|
||||
|
||||
recompileNextInstruction(1);
|
||||
|
||||
if( x86regs[esi.GetId()].inuse ) {
|
||||
pxAssert( x86regs[esi.GetId()].type == X86TYPE_PCWRITEBACK );
|
||||
xMOV(ptr[&cpuRegs.pc], esi);
|
||||
x86regs[esi.GetId()].inuse = 0;
|
||||
if( x86regs[calleeSavedReg2d.GetId()].inuse ) {
|
||||
pxAssert( x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK );
|
||||
xMOV(ptr[&cpuRegs.pc], calleeSavedReg2d);
|
||||
x86regs[calleeSavedReg2d.GetId()].inuse = 0;
|
||||
}
|
||||
else {
|
||||
xMOV(eax, ptr[&g_recWriteback]);
|
||||
|
@ -1525,8 +1533,8 @@ static void memory_protect_recompiled_code(u32 startpc, u32 size)
|
|||
break;
|
||||
|
||||
case ProtMode_Manual:
|
||||
xMOV( ecx, inpage_ptr );
|
||||
xMOV( edx, inpage_sz / 4 );
|
||||
xMOV( arg1regd, inpage_ptr );
|
||||
xMOV( arg2regd, inpage_sz / 4 );
|
||||
//xMOV( eax, startpc ); // uncomment this to access startpc (as eax) in dyna_block_discard
|
||||
|
||||
u32 lpc = inpage_ptr;
|
||||
|
@ -1737,7 +1745,7 @@ static void __fastcall recRecompile( const u32 startpc )
|
|||
// Game will unmap some virtual addresses. If a constant address were hardcoded in the block, we would be in a bad situation.
|
||||
eeRecNeedsReset = true;
|
||||
// 0x3563b8 is the start address of the function that invalidate entry in TLB cache
|
||||
xFastCall((void*)GoemonUnloadTlb, ptr[&cpuRegs.GPR.n.a0.UL[0]]);
|
||||
xFastCall((void*)GoemonUnloadTlb, ptr32[&cpuRegs.GPR.n.a0.UL[0]]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -148,7 +148,7 @@ void recSLTIU_(int info)
|
|||
x86SetJ8(j8Ptr[0]);
|
||||
x86SetJ8(j8Ptr[1]);
|
||||
|
||||
xMOV(ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], eax);
|
||||
xMOV(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], eax);
|
||||
xMOV(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ]], 0 );
|
||||
}
|
||||
|
||||
|
@ -178,7 +178,7 @@ void recSLTI_(int info)
|
|||
x86SetJ8(j8Ptr[0]);
|
||||
x86SetJ8(j8Ptr[1]);
|
||||
|
||||
xMOV(ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], eax);
|
||||
xMOV(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], eax);
|
||||
xMOV(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ]], 0 );
|
||||
}
|
||||
|
||||
|
|
|
@ -103,13 +103,13 @@ void recJALR()
|
|||
EE::Profiler.EmitOp(eeOpcode::JALR);
|
||||
|
||||
int newpc = pc + 4;
|
||||
_allocX86reg(esi, X86TYPE_PCWRITEBACK, 0, MODE_WRITE);
|
||||
_eeMoveGPRtoR(esi, _Rs_);
|
||||
_allocX86reg(calleeSavedReg2d, X86TYPE_PCWRITEBACK, 0, MODE_WRITE);
|
||||
_eeMoveGPRtoR(calleeSavedReg2d, _Rs_);
|
||||
|
||||
if (EmuConfig.Gamefixes.GoemonTlbHack) {
|
||||
xMOV(ecx, esi);
|
||||
xMOV(ecx, calleeSavedReg2d);
|
||||
vtlb_DynV2P();
|
||||
xMOV(esi, eax);
|
||||
xMOV(calleeSavedReg2d, eax);
|
||||
}
|
||||
// uncomment when there are NO instructions that need to call interpreter
|
||||
// int mmreg;
|
||||
|
@ -147,10 +147,10 @@ void recJALR()
|
|||
_clearNeededXMMregs();
|
||||
recompileNextInstruction(1);
|
||||
|
||||
if( x86regs[esi.GetId()].inuse ) {
|
||||
pxAssert( x86regs[esi.GetId()].type == X86TYPE_PCWRITEBACK );
|
||||
xMOV(ptr[&cpuRegs.pc], esi);
|
||||
x86regs[esi.GetId()].inuse = 0;
|
||||
if( x86regs[calleeSavedReg2d.GetId()].inuse ) {
|
||||
pxAssert( x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK );
|
||||
xMOV(ptr[&cpuRegs.pc], calleeSavedReg2d);
|
||||
x86regs[calleeSavedReg2d.GetId()].inuse = 0;
|
||||
}
|
||||
else {
|
||||
xMOV(eax, ptr[&g_recWriteback]);
|
||||
|
|
|
@ -99,13 +99,13 @@ void recLoad64( u32 bits, bool sign )
|
|||
{
|
||||
pxAssume( bits == 64 || bits == 128 );
|
||||
|
||||
// Load EDX with the destination.
|
||||
// Load arg2 with the destination.
|
||||
// 64/128 bit modes load the result directly into the cpuRegs.GPR struct.
|
||||
|
||||
if (_Rt_)
|
||||
xMOV(edx, (uptr)&cpuRegs.GPR.r[_Rt_].UL[0]);
|
||||
xLEA(arg2reg, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
else
|
||||
xMOV(edx, (uptr)&dummyValue[0]);
|
||||
xLEA(arg2reg, ptr[&dummyValue[0]]);
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
|
@ -121,11 +121,11 @@ void recLoad64( u32 bits, bool sign )
|
|||
else
|
||||
{
|
||||
// Load ECX with the source memory address that we're reading from.
|
||||
_eeMoveGPRtoR(ecx, _Rs_);
|
||||
_eeMoveGPRtoR(arg1regd, _Rs_);
|
||||
if (_Imm_ != 0)
|
||||
xADD(ecx, _Imm_);
|
||||
xADD(arg1regd, _Imm_);
|
||||
if (bits == 128) // force 16 byte alignment on 128 bit reads
|
||||
xAND(ecx, ~0x0F);
|
||||
xAND(arg1regd, ~0x0F);
|
||||
|
||||
_eeOnLoadWrite(_Rt_);
|
||||
_deleteEEreg(_Rt_, 0);
|
||||
|
@ -154,10 +154,10 @@ void recLoad32( u32 bits, bool sign )
|
|||
}
|
||||
else
|
||||
{
|
||||
// Load ECX with the source memory address that we're reading from.
|
||||
_eeMoveGPRtoR(ecx, _Rs_);
|
||||
// Load arg1 with the source memory address that we're reading from.
|
||||
_eeMoveGPRtoR(arg1regd, _Rs_);
|
||||
if (_Imm_ != 0)
|
||||
xADD(ecx, _Imm_ );
|
||||
xADD(arg1regd, _Imm_ );
|
||||
|
||||
_eeOnLoadWrite(_Rt_);
|
||||
_deleteEEreg(_Rt_, 0);
|
||||
|
@ -194,12 +194,12 @@ void recStore(u32 bits)
|
|||
|
||||
if (bits < 64)
|
||||
{
|
||||
_eeMoveGPRtoR(edx, _Rt_);
|
||||
_eeMoveGPRtoR(arg2regd, _Rt_);
|
||||
}
|
||||
else if (bits == 128 || bits == 64)
|
||||
{
|
||||
_flushEEreg(_Rt_); // flush register to mem
|
||||
xMOV(edx, (uptr)&cpuRegs.GPR.r[_Rt_].UL[0]);
|
||||
xLEA(arg2reg, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
}
|
||||
|
||||
// Load ECX with the destination address, or issue a direct optimized write
|
||||
|
@ -215,11 +215,11 @@ void recStore(u32 bits)
|
|||
}
|
||||
else
|
||||
{
|
||||
_eeMoveGPRtoR(ecx, _Rs_);
|
||||
_eeMoveGPRtoR(arg1regd, _Rs_);
|
||||
if (_Imm_ != 0)
|
||||
xADD(ecx, _Imm_);
|
||||
xADD(arg1regd, _Imm_);
|
||||
if (bits == 128)
|
||||
xAND(ecx, ~0x0F);
|
||||
xAND(arg1regd, ~0x0F);
|
||||
|
||||
iFlushCall(FLUSH_FULLVTLB);
|
||||
|
||||
|
@ -253,30 +253,30 @@ void recLWL()
|
|||
iFlushCall(FLUSH_FULLVTLB);
|
||||
_deleteEEreg(_Rt_, 1);
|
||||
|
||||
_eeMoveGPRtoR(ecx, _Rs_);
|
||||
_eeMoveGPRtoR(arg1regd, _Rs_);
|
||||
if (_Imm_ != 0)
|
||||
xADD(ecx, _Imm_);
|
||||
xADD(arg1regd, _Imm_);
|
||||
|
||||
// edi = bit offset in word
|
||||
xMOV(edi, ecx);
|
||||
xAND(edi, 3);
|
||||
xSHL(edi, 3);
|
||||
// calleeSavedReg1 = bit offset in word
|
||||
xMOV(calleeSavedReg1d, arg1regd);
|
||||
xAND(calleeSavedReg1d, 3);
|
||||
xSHL(calleeSavedReg1d, 3);
|
||||
|
||||
xAND(ecx, ~3);
|
||||
xAND(arg1regd, ~3);
|
||||
vtlb_DynGenRead32(32, false);
|
||||
|
||||
if (!_Rt_)
|
||||
return;
|
||||
|
||||
// mask off bytes loaded
|
||||
xMOV(ecx, edi);
|
||||
xMOV(ecx, calleeSavedReg1d);
|
||||
xMOV(edx, 0xffffff);
|
||||
xSHR(edx, cl);
|
||||
xAND(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], edx);
|
||||
|
||||
// OR in bytes loaded
|
||||
xMOV(ecx, 24);
|
||||
xSUB(ecx, edi);
|
||||
xNEG(ecx);
|
||||
xADD(ecx, 24);
|
||||
xSHL(eax, cl);
|
||||
xOR(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
|
||||
|
||||
|
@ -301,16 +301,16 @@ void recLWR()
|
|||
iFlushCall(FLUSH_FULLVTLB);
|
||||
_deleteEEreg(_Rt_, 1);
|
||||
|
||||
_eeMoveGPRtoR(ecx, _Rs_);
|
||||
_eeMoveGPRtoR(arg1regd, _Rs_);
|
||||
if (_Imm_ != 0)
|
||||
xADD(ecx, _Imm_);
|
||||
xADD(arg1regd, _Imm_);
|
||||
|
||||
// edi = bit offset in word
|
||||
xMOV(edi, ecx);
|
||||
xAND(edi, 3);
|
||||
xSHL(edi, 3);
|
||||
xMOV(calleeSavedReg1d, arg1regd);
|
||||
xAND(calleeSavedReg1d, 3);
|
||||
xSHL(calleeSavedReg1d, 3);
|
||||
|
||||
xAND(ecx, ~3);
|
||||
xAND(arg1regd, ~3);
|
||||
vtlb_DynGenRead32(32, false);
|
||||
|
||||
if (!_Rt_)
|
||||
|
@ -318,17 +318,17 @@ void recLWR()
|
|||
|
||||
// mask off bytes loaded
|
||||
xMOV(ecx, 24);
|
||||
xSUB(ecx, edi);
|
||||
xSUB(ecx, calleeSavedReg1d);
|
||||
xMOV(edx, 0xffffff00);
|
||||
xSHL(edx, cl);
|
||||
xAND(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], edx);
|
||||
|
||||
// OR in bytes loaded
|
||||
xMOV(ecx, edi);
|
||||
xMOV(ecx, calleeSavedReg1d);
|
||||
xSHR(eax, cl);
|
||||
xOR(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
|
||||
|
||||
xCMP(edi, 0);
|
||||
xCMP(ecx, 0);
|
||||
xForwardJump8 nosignextend(Jcc_NotEqual);
|
||||
// if ((addr & 3) == 0)
|
||||
xCDQ();
|
||||
|
@ -351,38 +351,38 @@ void recSWL()
|
|||
#ifdef REC_STORES
|
||||
iFlushCall(FLUSH_FULLVTLB);
|
||||
|
||||
_eeMoveGPRtoR(ecx, _Rs_);
|
||||
_eeMoveGPRtoR(arg1regd, _Rs_);
|
||||
if (_Imm_ != 0)
|
||||
xADD(ecx, _Imm_);
|
||||
xADD(arg1regd, _Imm_);
|
||||
|
||||
// edi = bit offset in word
|
||||
xMOV(edi, ecx);
|
||||
xAND(edi, 3);
|
||||
xSHL(edi, 3);
|
||||
xMOV(calleeSavedReg1d, arg1regd);
|
||||
xAND(calleeSavedReg1d, 3);
|
||||
xSHL(calleeSavedReg1d, 3);
|
||||
|
||||
xAND(ecx, ~3);
|
||||
xAND(arg1regd, ~3);
|
||||
vtlb_DynGenRead32(32, false);
|
||||
|
||||
// mask read -> edx
|
||||
xMOV(ecx, edi);
|
||||
xMOV(edx, 0xffffff00);
|
||||
xSHL(edx, cl);
|
||||
xAND(edx, eax);
|
||||
// mask read -> arg2
|
||||
xMOV(ecx, calleeSavedReg1d);
|
||||
xMOV(arg2regd, 0xffffff00);
|
||||
xSHL(arg2regd, cl);
|
||||
xAND(arg2regd, eax);
|
||||
|
||||
if (_Rt_)
|
||||
{
|
||||
// mask write and OR -> edx
|
||||
xMOV(ecx, 24);
|
||||
xSUB(ecx, edi);
|
||||
xNEG(ecx);
|
||||
xADD(ecx, 24);
|
||||
_eeMoveGPRtoR(eax, _Rt_);
|
||||
xSHR(eax, cl);
|
||||
xOR(edx, eax);
|
||||
xOR(arg2regd, eax);
|
||||
}
|
||||
|
||||
_eeMoveGPRtoR(ecx, _Rs_);
|
||||
_eeMoveGPRtoR(arg1regd, _Rs_);
|
||||
if (_Imm_ != 0)
|
||||
xADD(ecx, _Imm_);
|
||||
xAND(ecx, ~3);
|
||||
xADD(arg1regd, _Imm_);
|
||||
xAND(arg1regd, ~3);
|
||||
|
||||
vtlb_DynGenWrite(32);
|
||||
#else
|
||||
|
@ -401,38 +401,38 @@ void recSWR()
|
|||
#ifdef REC_STORES
|
||||
iFlushCall(FLUSH_FULLVTLB);
|
||||
|
||||
_eeMoveGPRtoR(ecx, _Rs_);
|
||||
_eeMoveGPRtoR(arg1regd, _Rs_);
|
||||
if (_Imm_ != 0)
|
||||
xADD(ecx, _Imm_);
|
||||
xADD(arg1regd, _Imm_);
|
||||
|
||||
// edi = bit offset in word
|
||||
xMOV(edi, ecx);
|
||||
xAND(edi, 3);
|
||||
xSHL(edi, 3);
|
||||
xMOV(calleeSavedReg1d, arg1regd);
|
||||
xAND(calleeSavedReg1d, 3);
|
||||
xSHL(calleeSavedReg1d, 3);
|
||||
|
||||
xAND(ecx, ~3);
|
||||
xAND(arg1regd, ~3);
|
||||
vtlb_DynGenRead32(32, false);
|
||||
|
||||
// mask read -> edx
|
||||
xMOV(ecx, 24);
|
||||
xSUB(ecx, edi);
|
||||
xMOV(edx, 0xffffff);
|
||||
xSHR(edx, cl);
|
||||
xAND(edx, eax);
|
||||
xSUB(ecx, calleeSavedReg1d);
|
||||
xMOV(arg2regd, 0xffffff);
|
||||
xSHR(arg2regd, cl);
|
||||
xAND(arg2regd, eax);
|
||||
|
||||
if (_Rt_)
|
||||
{
|
||||
// mask write and OR -> edx
|
||||
xMOV(ecx, edi);
|
||||
xMOV(ecx, calleeSavedReg1d);
|
||||
_eeMoveGPRtoR(eax, _Rt_);
|
||||
xSHL(eax, cl);
|
||||
xOR(edx, eax);
|
||||
xOR(arg2regd, eax);
|
||||
}
|
||||
|
||||
_eeMoveGPRtoR(ecx, _Rs_);
|
||||
_eeMoveGPRtoR(arg1regd, _Rs_);
|
||||
if (_Imm_ != 0)
|
||||
xADD(ecx, _Imm_);
|
||||
xAND(ecx, ~3);
|
||||
xADD(arg1regd, _Imm_);
|
||||
xAND(arg1regd, ~3);
|
||||
|
||||
vtlb_DynGenWrite(32);
|
||||
#else
|
||||
|
@ -512,9 +512,9 @@ void recLWC1()
|
|||
}
|
||||
else
|
||||
{
|
||||
_eeMoveGPRtoR(ecx, _Rs_);
|
||||
_eeMoveGPRtoR(arg1regd, _Rs_);
|
||||
if (_Imm_ != 0)
|
||||
xADD(ecx, _Imm_);
|
||||
xADD(arg1regd, _Imm_);
|
||||
|
||||
iFlushCall(FLUSH_FULLVTLB);
|
||||
|
||||
|
@ -536,7 +536,7 @@ void recSWC1()
|
|||
#else
|
||||
_deleteFPtoXMMreg(_Rt_, 1);
|
||||
|
||||
xMOV(edx, ptr32[&fpuRegs.fpr[_Rt_].UL] );
|
||||
xMOV(arg2regd, ptr32[&fpuRegs.fpr[_Rt_].UL] );
|
||||
|
||||
if( GPR_IS_CONST1( _Rs_ ) )
|
||||
{
|
||||
|
@ -545,9 +545,9 @@ void recSWC1()
|
|||
}
|
||||
else
|
||||
{
|
||||
_eeMoveGPRtoR(ecx, _Rs_);
|
||||
_eeMoveGPRtoR(arg1regd, _Rs_);
|
||||
if (_Imm_ != 0)
|
||||
xADD(ecx, _Imm_);
|
||||
xADD(arg1regd, _Imm_);
|
||||
|
||||
iFlushCall(FLUSH_FULLVTLB);
|
||||
|
||||
|
@ -574,9 +574,9 @@ void recSWC1()
|
|||
void recLQC2()
|
||||
{
|
||||
if (_Rt_)
|
||||
xMOV(edx, (uptr)&VU0.VF[_Ft_].UD[0]);
|
||||
xLEA(arg2reg, ptr[&VU0.VF[_Ft_].UD[0]]);
|
||||
else
|
||||
xMOV(edx, (uptr)&dummyValue[0]);
|
||||
xLEA(arg2reg, ptr[&dummyValue[0]]);
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
|
@ -586,9 +586,9 @@ void recLQC2()
|
|||
}
|
||||
else
|
||||
{
|
||||
_eeMoveGPRtoR(ecx, _Rs_);
|
||||
_eeMoveGPRtoR(arg1regd, _Rs_);
|
||||
if (_Imm_ != 0)
|
||||
xADD(ecx, _Imm_);
|
||||
xADD(arg1regd, _Imm_);
|
||||
|
||||
iFlushCall(FLUSH_FULLVTLB);
|
||||
|
||||
|
@ -602,7 +602,7 @@ void recLQC2()
|
|||
|
||||
void recSQC2()
|
||||
{
|
||||
xMOV(edx, (uptr)&VU0.VF[_Ft_].UD[0]);
|
||||
xLEA(arg2reg, ptr[&VU0.VF[_Ft_].UD[0]]);
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
|
@ -611,9 +611,9 @@ void recSQC2()
|
|||
}
|
||||
else
|
||||
{
|
||||
_eeMoveGPRtoR(ecx, _Rs_);
|
||||
_eeMoveGPRtoR(arg1regd, _Rs_);
|
||||
if (_Imm_ != 0)
|
||||
xADD(ecx, _Imm_);
|
||||
xADD(arg1regd, _Imm_);
|
||||
|
||||
iFlushCall(FLUSH_FULLVTLB);
|
||||
|
||||
|
|
|
@ -73,6 +73,12 @@ static void iMOV128_SSE( const xIndirectVoid& destRm, const xIndirectVoid& srcRm
|
|||
//
|
||||
static void iMOV64_Smart( const xIndirectVoid& destRm, const xIndirectVoid& srcRm )
|
||||
{
|
||||
if (wordsize == 8) {
|
||||
xMOV(rax, srcRm);
|
||||
xMOV(destRm, rax);
|
||||
return;
|
||||
}
|
||||
|
||||
if( _hasFreeXMMreg() )
|
||||
{
|
||||
// Move things using MOVLPS:
|
||||
|
@ -92,8 +98,8 @@ static void iMOV64_Smart( const xIndirectVoid& destRm, const xIndirectVoid& srcR
|
|||
/*
|
||||
// Pseudo-Code For the following Dynarec Implementations -->
|
||||
|
||||
u32 vmv=vmap[addr>>VTLB_PAGE_BITS];
|
||||
s32 ppf=addr+vmv;
|
||||
u32 vmv = vmap[addr>>VTLB_PAGE_BITS].raw();
|
||||
sptr ppf=addr+vmv;
|
||||
if (!(ppf<0))
|
||||
{
|
||||
data[0]=*reinterpret_cast<DataType*>(ppf);
|
||||
|
@ -105,7 +111,7 @@ static void iMOV64_Smart( const xIndirectVoid& destRm, const xIndirectVoid& srcR
|
|||
{
|
||||
//has to: translate, find function, call function
|
||||
u32 hand=(u8)vmv;
|
||||
u32 paddr=ppf-hand+0x80000000;
|
||||
u32 paddr=(ppf-hand) << 1;
|
||||
//Console.WriteLn("Translated 0x%08X to 0x%08X",params addr,paddr);
|
||||
return reinterpret_cast<TemplateHelper<DataSize,false>::HandlerType*>(RWFT[TemplateHelper<DataSize,false>::sidx][0][hand])(paddr,data);
|
||||
}
|
||||
|
@ -114,26 +120,28 @@ static void iMOV64_Smart( const xIndirectVoid& destRm, const xIndirectVoid& srcR
|
|||
|
||||
mov eax,ecx;
|
||||
shr eax,VTLB_PAGE_BITS;
|
||||
mov eax,[eax*4+vmap];
|
||||
add ecx,eax;
|
||||
mov rax,[rax*wordsize+vmap];
|
||||
add rcx,rax;
|
||||
js _fullread;
|
||||
|
||||
//these are wrong order, just an example ...
|
||||
mov [eax],ecx;
|
||||
mov ecx,[edx];
|
||||
mov [eax+4],ecx;
|
||||
mov ecx,[edx+4];
|
||||
mov [eax+4+4],ecx;
|
||||
mov ecx,[edx+4+4];
|
||||
mov [eax+4+4+4+4],ecx;
|
||||
mov ecx,[edx+4+4+4+4];
|
||||
mov [rax],ecx;
|
||||
mov ecx,[rdx];
|
||||
mov [rax+4],ecx;
|
||||
mov ecx,[rdx+4];
|
||||
mov [rax+4+4],ecx;
|
||||
mov ecx,[rdx+4+4];
|
||||
mov [rax+4+4+4+4],ecx;
|
||||
mov ecx,[rdx+4+4+4+4];
|
||||
///....
|
||||
|
||||
jmp cont;
|
||||
_fullread:
|
||||
movzx eax,al;
|
||||
sub ecx,eax;
|
||||
#ifndef __M_X86_64 // The x86-64 marker will be cleared by using 32-bit ops
|
||||
sub ecx,0x80000000;
|
||||
#endif
|
||||
call [eax+stuff];
|
||||
cont:
|
||||
........
|
||||
|
@ -146,17 +154,16 @@ namespace vtlb_private
|
|||
// Prepares eax, ecx, and, ebx for Direct or Indirect operations.
|
||||
// Returns the writeback pointer for ebx (return address from indirect handling)
|
||||
//
|
||||
static uptr* DynGen_PrepRegs()
|
||||
static u32* DynGen_PrepRegs()
|
||||
{
|
||||
// Warning dirty ebx (in case someone got the very bad idea to move this code)
|
||||
EE::Profiler.EmitMem();
|
||||
|
||||
xMOV( eax, ecx );
|
||||
xMOV( eax, arg1regd );
|
||||
xSHR( eax, VTLB_PAGE_BITS );
|
||||
xMOV( eax, ptr[(eax*4) + vtlbdata.vmap] );
|
||||
xMOV( ebx, 0xcdcdcdcd );
|
||||
uptr* writeback = ((uptr*)xGetPtr()) - 1;
|
||||
xADD( ecx, eax );
|
||||
xMOV( rax, ptrNative[xComplexAddress(rbx, vtlbdata.vmap, rax*wordsize)] );
|
||||
u32* writeback = xLEA_Writeback( rbx );
|
||||
xADD( arg1reg, rax );
|
||||
|
||||
return writeback;
|
||||
}
|
||||
|
@ -168,28 +175,28 @@ namespace vtlb_private
|
|||
{
|
||||
case 8:
|
||||
if( sign )
|
||||
xMOVSX( eax, ptr8[ecx] );
|
||||
xMOVSX( eax, ptr8[arg1reg] );
|
||||
else
|
||||
xMOVZX( eax, ptr8[ecx] );
|
||||
xMOVZX( eax, ptr8[arg1reg] );
|
||||
break;
|
||||
|
||||
case 16:
|
||||
if( sign )
|
||||
xMOVSX( eax, ptr16[ecx] );
|
||||
xMOVSX( eax, ptr16[arg1reg] );
|
||||
else
|
||||
xMOVZX( eax, ptr16[ecx] );
|
||||
xMOVZX( eax, ptr16[arg1reg] );
|
||||
break;
|
||||
|
||||
case 32:
|
||||
xMOV( eax, ptr[ecx] );
|
||||
xMOV( eax, ptr[arg1reg] );
|
||||
break;
|
||||
|
||||
case 64:
|
||||
iMOV64_Smart( ptr[edx], ptr[ecx] );
|
||||
iMOV64_Smart( ptr[arg2reg], ptr[arg1reg] );
|
||||
break;
|
||||
|
||||
case 128:
|
||||
iMOV128_SSE( ptr[edx], ptr[ecx] );
|
||||
iMOV128_SSE( ptr[arg2reg], ptr[arg1reg] );
|
||||
break;
|
||||
|
||||
jNO_DEFAULT
|
||||
|
@ -199,27 +206,29 @@ namespace vtlb_private
|
|||
// ------------------------------------------------------------------------
|
||||
static void DynGen_DirectWrite( u32 bits )
|
||||
{
|
||||
// TODO: x86Emitter can't use dil (and xRegister8(rdi.Id) is not dil)
|
||||
switch(bits)
|
||||
{
|
||||
//8 , 16, 32 : data on EDX
|
||||
case 8:
|
||||
xMOV( ptr[ecx], dl );
|
||||
xMOV( edx, arg2regd );
|
||||
xMOV( ptr[arg1reg], dl );
|
||||
break;
|
||||
|
||||
case 16:
|
||||
xMOV( ptr[ecx], dx );
|
||||
xMOV( ptr[arg1reg], xRegister16(arg2reg.Id) );
|
||||
break;
|
||||
|
||||
case 32:
|
||||
xMOV( ptr[ecx], edx );
|
||||
xMOV( ptr[arg1reg], arg2regd );
|
||||
break;
|
||||
|
||||
case 64:
|
||||
iMOV64_Smart( ptr[ecx], ptr[edx] );
|
||||
iMOV64_Smart( ptr[arg1reg], ptr[arg2reg] );
|
||||
break;
|
||||
|
||||
case 128:
|
||||
iMOV128_SSE( ptr[ecx], ptr[edx] );
|
||||
iMOV128_SSE( ptr[arg1reg], ptr[arg2reg] );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -274,15 +283,23 @@ static void DynGen_IndirectDispatch( int mode, int bits, bool sign = false )
|
|||
|
||||
// ------------------------------------------------------------------------
|
||||
// Generates the various instances of the indirect dispatchers
|
||||
// In: arg1reg: vtlb entry, arg2reg: data ptr (if mode >= 64), rbx: function return ptr
|
||||
// Out: eax: result (if mode < 64)
|
||||
static void DynGen_IndirectTlbDispatcher( int mode, int bits, bool sign )
|
||||
{
|
||||
xMOVZX( eax, al );
|
||||
xSUB( ecx, 0x80000000 );
|
||||
xSUB( ecx, eax );
|
||||
if (wordsize != 8) xSUB( arg1regd, 0x80000000 );
|
||||
xSUB( arg1regd, eax );
|
||||
|
||||
// jump to the indirect handler, which is a __fastcall C++ function.
|
||||
// [ecx is address, edx is data]
|
||||
xFastCall(ptr32[(eax*4) + vtlbdata.RWFT[bits][mode]], ecx, edx);
|
||||
sptr table = (sptr)vtlbdata.RWFT[bits][mode];
|
||||
if (table == (s32)table) {
|
||||
xFastCall(ptrNative[(rax*wordsize) + table], arg1reg, arg2reg);
|
||||
} else {
|
||||
xLEA(arg3reg, ptr[(void*)table]);
|
||||
xFastCall(ptrNative[(rax*wordsize) + arg3reg], arg1reg, arg2reg);
|
||||
}
|
||||
|
||||
if (!mode)
|
||||
{
|
||||
|
@ -302,7 +319,7 @@ static void DynGen_IndirectTlbDispatcher( int mode, int bits, bool sign )
|
|||
}
|
||||
}
|
||||
|
||||
xJMP( ebx );
|
||||
xJMP( rbx );
|
||||
}
|
||||
|
||||
// One-time initialization procedure. Multiple subsequent calls during the lifespan of the
|
||||
|
@ -338,18 +355,30 @@ void vtlb_dynarec_init()
|
|||
Perf::any.map((uptr)m_IndirectDispatchers, __pagesize, "TLB Dispatcher");
|
||||
}
|
||||
|
||||
static void vtlb_SetWriteback(u32 *writeback)
|
||||
{
|
||||
uptr val = (uptr)xGetPtr();
|
||||
if (wordsize == 8)
|
||||
{
|
||||
pxAssertMsg(*((u8*)writeback - 2) == 0x8d, "Expected codegen to be an LEA");
|
||||
val -= ((uptr)writeback + 4);
|
||||
}
|
||||
pxAssertMsg((sptr)val == (s32)val, "Writeback too far away!");
|
||||
*writeback = val;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Dynarec Load Implementations
|
||||
void vtlb_DynGenRead64(u32 bits)
|
||||
{
|
||||
pxAssume( bits == 64 || bits == 128 );
|
||||
|
||||
uptr* writeback = DynGen_PrepRegs();
|
||||
u32* writeback = DynGen_PrepRegs();
|
||||
|
||||
DynGen_IndirectDispatch( 0, bits );
|
||||
DynGen_DirectRead( bits, false );
|
||||
|
||||
*writeback = (uptr)xGetPtr(); // return target for indirect's call/ret
|
||||
vtlb_SetWriteback(writeback); // return target for indirect's call/ret
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
@ -360,12 +389,12 @@ void vtlb_DynGenRead32(u32 bits, bool sign)
|
|||
{
|
||||
pxAssume( bits <= 32 );
|
||||
|
||||
uptr* writeback = DynGen_PrepRegs();
|
||||
u32* writeback = DynGen_PrepRegs();
|
||||
|
||||
DynGen_IndirectDispatch( 0, bits, sign && bits < 32 );
|
||||
DynGen_DirectRead( bits, sign );
|
||||
|
||||
*writeback = (uptr)xGetPtr();
|
||||
vtlb_SetWriteback(writeback);
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
@ -378,15 +407,15 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const )
|
|||
auto vmv = vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS];
|
||||
if( !vmv.isHandler(addr_const) )
|
||||
{
|
||||
auto ppf = vmv.assumeHandlerGetPAddr(addr_const);
|
||||
auto ppf = vmv.assumePtr(addr_const);
|
||||
switch( bits )
|
||||
{
|
||||
case 64:
|
||||
iMOV64_Smart( ptr[edx], ptr[(void*)ppf] );
|
||||
iMOV64_Smart( ptr[arg2reg], ptr[(void*)ppf] );
|
||||
break;
|
||||
|
||||
case 128:
|
||||
iMOV128_SSE( ptr[edx], ptr[(void*)ppf] );
|
||||
iMOV128_SSE( ptr[arg2reg], ptr[(void*)ppf] );
|
||||
break;
|
||||
|
||||
jNO_DEFAULT
|
||||
|
@ -405,7 +434,7 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const )
|
|||
}
|
||||
|
||||
iFlushCall(FLUSH_FULLVTLB);
|
||||
xFastCall( vmv.assumeHandlerGetRaw(szidx, 0), paddr );
|
||||
xFastCall( vmv.assumeHandlerGetRaw(szidx, 0), paddr, arg2reg );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -442,7 +471,7 @@ void vtlb_DynGenRead32_Const( u32 bits, bool sign, u32 addr_const )
|
|||
break;
|
||||
|
||||
case 32:
|
||||
xMOV( eax, ptr[(void*)ppf] );
|
||||
xMOV( eax, ptr32[(u32*)ppf] );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -494,12 +523,12 @@ void vtlb_DynGenRead32_Const( u32 bits, bool sign, u32 addr_const )
|
|||
|
||||
void vtlb_DynGenWrite(u32 sz)
|
||||
{
|
||||
uptr* writeback = DynGen_PrepRegs();
|
||||
u32* writeback = DynGen_PrepRegs();
|
||||
|
||||
DynGen_IndirectDispatch( 1, sz );
|
||||
DynGen_DirectWrite( sz );
|
||||
|
||||
*writeback = (uptr)xGetPtr();
|
||||
vtlb_SetWriteback(writeback);
|
||||
}
|
||||
|
||||
|
||||
|
@ -514,28 +543,30 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const )
|
|||
auto vmv = vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS];
|
||||
if( !vmv.isHandler(addr_const) )
|
||||
{
|
||||
// TODO: x86Emitter can't use dil (and xRegister8(rdi.Id) is not dil)
|
||||
auto ppf = vmv.assumePtr(addr_const);
|
||||
switch(bits)
|
||||
{
|
||||
//8 , 16, 32 : data on EDX
|
||||
//8 , 16, 32 : data on arg2
|
||||
case 8:
|
||||
xMOV( edx, arg2regd );
|
||||
xMOV( ptr[(void*)ppf], dl );
|
||||
break;
|
||||
|
||||
case 16:
|
||||
xMOV( ptr[(void*)ppf], dx );
|
||||
xMOV( ptr[(void*)ppf], xRegister16(arg2reg.Id) );
|
||||
break;
|
||||
|
||||
case 32:
|
||||
xMOV( ptr[(void*)ppf], edx );
|
||||
xMOV( ptr[(void*)ppf], arg2regd );
|
||||
break;
|
||||
|
||||
case 64:
|
||||
iMOV64_Smart( ptr[(void*)ppf], ptr[edx] );
|
||||
iMOV64_Smart( ptr[(void*)ppf], ptr[arg2reg] );
|
||||
break;
|
||||
|
||||
case 128:
|
||||
iMOV128_SSE( ptr[(void*)ppf], ptr[edx] );
|
||||
iMOV128_SSE( ptr[(void*)ppf], ptr[arg2reg] );
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -556,7 +587,7 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const )
|
|||
}
|
||||
|
||||
iFlushCall(FLUSH_FULLVTLB);
|
||||
xFastCall( vmv.assumeHandlerGetRaw(szidx, true), paddr, edx );
|
||||
xFastCall( vmv.assumeHandlerGetRaw(szidx, true), paddr, arg2reg );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -565,13 +596,14 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const )
|
|||
|
||||
// ecx - virtual address
|
||||
// Returns physical address in eax.
|
||||
// Clobbers edx
|
||||
void vtlb_DynV2P()
|
||||
{
|
||||
xMOV(eax, ecx);
|
||||
xAND(ecx, VTLB_PAGE_MASK); // vaddr & VTLB_PAGE_MASK
|
||||
|
||||
xSHR(eax, VTLB_PAGE_BITS);
|
||||
xMOV(eax, ptr[(eax*4) + vtlbdata.ppmap]); //vtlbdata.ppmap[vaddr>>VTLB_PAGE_BITS];
|
||||
xMOV(eax, ptr[xComplexAddress(rdx, vtlbdata.ppmap, rax*4)]); //vtlbdata.ppmap[vaddr>>VTLB_PAGE_BITS];
|
||||
|
||||
xOR(eax, ecx);
|
||||
}
|
||||
|
|
|
@ -178,25 +178,25 @@ void normJumpCompile(mV, microFlagCycles& mFC, bool isEvilJump) {
|
|||
mVUpBlock->jumpCache = new microJumpCache[mProgSize/2];
|
||||
}
|
||||
|
||||
if (isEvilJump) xMOV(gprT2, ptr32[&mVU.evilBranch]);
|
||||
else xMOV(gprT2, ptr32[&mVU.branch]);
|
||||
if (doJumpCaching) xMOV(gprT3, (uptr)mVUpBlock);
|
||||
else xMOV(gprT3, (uptr)&mVUpBlock->pStateEnd);
|
||||
if (isEvilJump) xMOV(arg1regd, ptr32[&mVU.evilBranch]);
|
||||
else xMOV(arg1regd, ptr32[&mVU.branch]);
|
||||
if (doJumpCaching) xLoadFarAddr(arg2reg, mVUpBlock);
|
||||
else xLoadFarAddr(arg2reg, &mVUpBlock->pStateEnd);
|
||||
|
||||
if(mVUup.eBit && isEvilJump)// E-bit EvilJump
|
||||
{
|
||||
//Xtreme G 3 does 2 conditional jumps, the first contains an E Bit on the first instruction
|
||||
//So if it is taken, you need to end the program, else you get infinite loops.
|
||||
mVUendProgram(mVU, &mFC, 2);
|
||||
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], gprT2);
|
||||
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], arg1regd);
|
||||
xJMP(mVU.exitFunct);
|
||||
}
|
||||
|
||||
if (!mVU.index) xFastCall((void*)(void(*)())mVUcompileJIT<0>, gprT2, gprT3); //(u32 startPC, uptr pState)
|
||||
else xFastCall((void*)(void(*)())mVUcompileJIT<1>, gprT2, gprT3);
|
||||
if (!mVU.index) xFastCall((void*)(void(*)())mVUcompileJIT<0>, arg1reg, arg2reg); //(u32 startPC, uptr pState)
|
||||
else xFastCall((void*)(void(*)())mVUcompileJIT<1>, arg1reg, arg2reg);
|
||||
|
||||
mVUrestoreRegs(mVU);
|
||||
xJMP(gprT1); // Jump to rec-code address
|
||||
xJMP(gprT1q); // Jump to rec-code address
|
||||
}
|
||||
|
||||
void normBranch(mV, microFlagCycles& mFC) {
|
||||
|
|
|
@ -27,8 +27,8 @@ void mVUdispatcherAB(mV) {
|
|||
xScopedStackFrame frame(false, true);
|
||||
|
||||
// __fastcall = The caller has already put the needed parameters in ecx/edx:
|
||||
if (!isVU1) { xFastCall((void*)mVUexecuteVU0, ecx, edx); }
|
||||
else { xFastCall((void*)mVUexecuteVU1, ecx, edx); }
|
||||
if (!isVU1) { xFastCall((void*)mVUexecuteVU0, arg1reg, arg2reg); }
|
||||
else { xFastCall((void*)mVUexecuteVU1, arg1reg, arg2reg); }
|
||||
|
||||
// Load VU's MXCSR state
|
||||
xLDMXCSR(g_sseVUMXCSR);
|
||||
|
@ -52,7 +52,7 @@ void mVUdispatcherAB(mV) {
|
|||
xSHUF.PS(xmmPQ, xmmT1, 0); // wzyx = PPQQ
|
||||
|
||||
// Jump to Recompiled Code Block
|
||||
xJMP(eax);
|
||||
xJMP(rax);
|
||||
|
||||
mVU.exitFunct = x86Ptr;
|
||||
|
||||
|
@ -89,7 +89,7 @@ void mVUdispatcherCD(mV) {
|
|||
xMOV(gprF3, ptr32[&mVU.statFlag[3]]);
|
||||
|
||||
// Jump to Recompiled Code Block
|
||||
xJMP(ptr32[&mVU.resumePtrXG]);
|
||||
xJMP(ptrNative[&mVU.resumePtrXG]);
|
||||
|
||||
mVU.exitFunctXG = x86Ptr;
|
||||
|
||||
|
|
|
@ -283,7 +283,7 @@ __fi void mVUsetupFlags(mV, microFlagCycles& mFC) {
|
|||
xMOV(gprF3, getFlagReg(bStatus[3]));
|
||||
}
|
||||
else if (sortRegs == 2) {
|
||||
xMOV(gprT1, getFlagReg (bStatus[3]));
|
||||
xMOV(gprT1, getFlagReg (bStatus[3]));
|
||||
xMOV(gprF0, getFlagReg (bStatus[0]));
|
||||
xMOV(gprF1, getFlagReg2(bStatus[1]));
|
||||
xMOV(gprF2, getFlagReg2(bStatus[2]));
|
||||
|
@ -291,7 +291,7 @@ __fi void mVUsetupFlags(mV, microFlagCycles& mFC) {
|
|||
}
|
||||
else if (sortRegs == 3) {
|
||||
int gFlag = (bStatus[0] == bStatus[1]) ? bStatus[2] : bStatus[1];
|
||||
xMOV(gprT1, getFlagReg (gFlag));
|
||||
xMOV(gprT1, getFlagReg (gFlag));
|
||||
xMOV(gprT2, getFlagReg (bStatus[3]));
|
||||
xMOV(gprF0, getFlagReg (bStatus[0]));
|
||||
xMOV(gprF1, getFlagReg3(bStatus[1]));
|
||||
|
@ -299,12 +299,12 @@ __fi void mVUsetupFlags(mV, microFlagCycles& mFC) {
|
|||
xMOV(gprF3, gprT2);
|
||||
}
|
||||
else {
|
||||
xMOV(gprT1, getFlagReg(bStatus[0]));
|
||||
xMOV(gprT1, getFlagReg(bStatus[0]));
|
||||
xMOV(gprT2, getFlagReg(bStatus[1]));
|
||||
xMOV(gprT3, getFlagReg(bStatus[2]));
|
||||
xMOV(gprF3, getFlagReg(bStatus[3]));
|
||||
xMOV(gprF0, gprT1);
|
||||
xMOV(gprF1, gprT2);
|
||||
xMOV(gprF1, gprT2);
|
||||
xMOV(gprF2, gprT3);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -841,16 +841,14 @@ mVUop(mVU_ILW) {
|
|||
analyzeVIreg2(mVU, _It_, mVUlow.VI_write, 4);
|
||||
}
|
||||
pass2 {
|
||||
xAddressVoid ptr(mVU.regs().Mem + offsetSS);
|
||||
void *ptr = mVU.regs().Mem + offsetSS;
|
||||
|
||||
mVUallocVIa(mVU, gprT2, _Is_);
|
||||
if (!_Is_)
|
||||
xXOR(gprT2, gprT2);
|
||||
xADD(gprT2, _Imm11_);
|
||||
mVUaddrFix (mVU, gprT2);
|
||||
ptr += gprT2;
|
||||
|
||||
xMOVZX(gprT1, ptr16[ptr]);
|
||||
mVUaddrFix (mVU, gprT2q);
|
||||
xMOVZX(gprT1, ptr16[xComplexAddress(gprT3q, ptr, gprT2q)]);
|
||||
mVUallocVIb(mVU, gprT1, _It_);
|
||||
mVU.profiler.EmitOp(opILW);
|
||||
}
|
||||
|
@ -866,13 +864,14 @@ mVUop(mVU_ILWR) {
|
|||
analyzeVIreg2(mVU, _It_, mVUlow.VI_write, 4);
|
||||
}
|
||||
pass2 {
|
||||
xAddressVoid ptr(mVU.regs().Mem + offsetSS);
|
||||
void *ptr = mVU.regs().Mem + offsetSS;
|
||||
if (_Is_) {
|
||||
mVUallocVIa(mVU, gprT2, _Is_);
|
||||
mVUaddrFix (mVU, gprT2);
|
||||
ptr += gprT2;
|
||||
mVUaddrFix (mVU, gprT2q);
|
||||
xMOVZX(gprT1, ptr16[xComplexAddress(gprT3q, ptr, gprT2q)]);
|
||||
} else {
|
||||
xMOVZX(gprT1, ptr16[ptr]);
|
||||
}
|
||||
xMOVZX(gprT1, ptr16[ptr]);
|
||||
mVUallocVIb(mVU, gprT1, _It_);
|
||||
mVU.profiler.EmitOp(opILWR);
|
||||
}
|
||||
|
@ -883,26 +882,44 @@ mVUop(mVU_ILWR) {
|
|||
// ISW/ISWR
|
||||
//------------------------------------------------------------------
|
||||
|
||||
static void writeBackISW(microVU& mVU, void *base_ptr, xAddressReg reg) {
|
||||
if (!reg.IsEmpty() && (sptr)base_ptr != (s32)(sptr)base_ptr) {
|
||||
int register_offset = -1;
|
||||
auto writeBackAt = [&](int offset){
|
||||
if (register_offset == -1) {
|
||||
xLEA(gprT3q, ptr[(void*)((sptr)base_ptr + offset)]);
|
||||
register_offset = offset;
|
||||
}
|
||||
xMOV(ptr32[gprT3q+gprT2q+(offset-register_offset)], gprT1);
|
||||
};
|
||||
if (_X) writeBackAt(0);
|
||||
if (_Y) writeBackAt(4);
|
||||
if (_Z) writeBackAt(8);
|
||||
if (_W) writeBackAt(12);
|
||||
} else {
|
||||
if (_X) xMOV(ptr32[base_ptr+reg], gprT1);
|
||||
if (_Y) xMOV(ptr32[base_ptr+reg+4], gprT1);
|
||||
if (_Z) xMOV(ptr32[base_ptr+reg+8], gprT1);
|
||||
if (_W) xMOV(ptr32[base_ptr+reg+12], gprT1);
|
||||
}
|
||||
}
|
||||
|
||||
mVUop(mVU_ISW) {
|
||||
pass1 {
|
||||
analyzeVIreg1(mVU, _Is_, mVUlow.VI_read[0]);
|
||||
analyzeVIreg1(mVU, _It_, mVUlow.VI_read[1]);
|
||||
}
|
||||
pass2 {
|
||||
xAddressVoid ptr(mVU.regs().Mem);
|
||||
void *ptr = mVU.regs().Mem;
|
||||
|
||||
mVUallocVIa(mVU, gprT2, _Is_);
|
||||
if (!_Is_)
|
||||
xXOR(gprT2, gprT2);
|
||||
xADD(gprT2, _Imm11_);
|
||||
mVUaddrFix (mVU, gprT2);
|
||||
ptr += gprT2;
|
||||
|
||||
mVUallocVIa(mVU, gprT1, _It_);
|
||||
if (_X) xMOV(ptr32[ptr], gprT1);
|
||||
if (_Y) xMOV(ptr32[ptr+4], gprT1);
|
||||
if (_Z) xMOV(ptr32[ptr+8], gprT1);
|
||||
if (_W) xMOV(ptr32[ptr+12], gprT1);
|
||||
writeBackISW(mVU, ptr, gprT2);
|
||||
mVU.profiler.EmitOp(opISW);
|
||||
}
|
||||
pass3 { mVUlog("ISW.%s vi%02d, vi%02d + %d", _XYZW_String, _Ft_, _Fs_, _Imm11_); }
|
||||
|
@ -913,17 +930,16 @@ mVUop(mVU_ISWR) {
|
|||
analyzeVIreg1(mVU, _Is_, mVUlow.VI_read[0]);
|
||||
analyzeVIreg1(mVU, _It_, mVUlow.VI_read[1]); }
|
||||
pass2 {
|
||||
xAddressVoid ptr(mVU.regs().Mem);
|
||||
void *ptr = mVU.regs().Mem;
|
||||
xAddressReg is = xEmptyReg;
|
||||
if (_Is_) {
|
||||
mVUallocVIa(mVU, gprT2, _Is_);
|
||||
mVUaddrFix (mVU, gprT2);
|
||||
ptr += gprT2;
|
||||
mVUaddrFix (mVU, gprT2q);
|
||||
is = gprT2q;
|
||||
}
|
||||
mVUallocVIa(mVU, gprT1, _It_);
|
||||
if (_X) xMOV(ptr32[ptr], gprT1);
|
||||
if (_Y) xMOV(ptr32[ptr+4], gprT1);
|
||||
if (_Z) xMOV(ptr32[ptr+8], gprT1);
|
||||
if (_W) xMOV(ptr32[ptr+12], gprT1);
|
||||
writeBackISW(mVU, ptr, is);
|
||||
|
||||
mVU.profiler.EmitOp(opISWR);
|
||||
}
|
||||
pass3 { mVUlog("ISWR.%s vi%02d, vi%02d", _XYZW_String, _Ft_, _Fs_); }
|
||||
|
@ -936,16 +952,15 @@ mVUop(mVU_ISWR) {
|
|||
mVUop(mVU_LQ) {
|
||||
pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, false); }
|
||||
pass2 {
|
||||
xAddressVoid ptr(mVU.regs().Mem);
|
||||
void *ptr = mVU.regs().Mem;
|
||||
mVUallocVIa(mVU, gprT2, _Is_);
|
||||
if (!_Is_)
|
||||
xXOR(gprT2, gprT2);
|
||||
xADD(gprT2, _Imm11_);
|
||||
mVUaddrFix(mVU, gprT2);
|
||||
ptr += gprT2;
|
||||
mVUaddrFix(mVU, gprT2q);
|
||||
|
||||
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
||||
mVUloadReg(Ft, ptr, _X_Y_Z_W);
|
||||
mVUloadReg(Ft, xComplexAddress(gprT3q, ptr, gprT2q), _X_Y_Z_W);
|
||||
mVU.regAlloc->clearNeeded(Ft);
|
||||
mVU.profiler.EmitOp(opLQ);
|
||||
}
|
||||
|
@ -955,18 +970,25 @@ mVUop(mVU_LQ) {
|
|||
mVUop(mVU_LQD) {
|
||||
pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, true); }
|
||||
pass2 {
|
||||
xAddressVoid ptr(mVU.regs().Mem);
|
||||
void * ptr = mVU.regs().Mem;
|
||||
xAddressReg is = xEmptyReg;
|
||||
if (_Is_ || isVU0) { // Access VU1 regs mem-map in !_Is_ case
|
||||
mVUallocVIa(mVU, gprT2, _Is_);
|
||||
xSUB(gprT2b, 1);
|
||||
if (_Is_) mVUallocVIb(mVU, gprT2, _Is_);
|
||||
mVUaddrFix (mVU, gprT2);
|
||||
ptr += gprT2;
|
||||
mVUaddrFix (mVU, gprT2q);
|
||||
is = gprT2q;
|
||||
}
|
||||
else {
|
||||
ptr = (void*)((sptr)ptr + (0xffff & (mVU.microMemSize-8)));
|
||||
}
|
||||
else ptr += (0xffff & (mVU.microMemSize-8));
|
||||
if (!mVUlow.noWriteVF) {
|
||||
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
||||
mVUloadReg(Ft, ptr, _X_Y_Z_W);
|
||||
if (is.IsEmpty()) {
|
||||
mVUloadReg(Ft, xAddressVoid(ptr), _X_Y_Z_W);
|
||||
} else {
|
||||
mVUloadReg(Ft, xComplexAddress(gprT3q, ptr, is), _X_Y_Z_W);
|
||||
}
|
||||
mVU.regAlloc->clearNeeded(Ft);
|
||||
}
|
||||
mVU.profiler.EmitOp(opLQD);
|
||||
|
@ -977,18 +999,23 @@ mVUop(mVU_LQD) {
|
|||
mVUop(mVU_LQI) {
|
||||
pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, true); }
|
||||
pass2 {
|
||||
xAddressVoid ptr(mVU.regs().Mem);
|
||||
void *ptr = mVU.regs().Mem;
|
||||
xAddressReg is = xEmptyReg;
|
||||
if (_Is_) {
|
||||
mVUallocVIa(mVU, gprT1, _Is_);
|
||||
xMOV(gprT2, gprT1);
|
||||
xADD(gprT1b, 1);
|
||||
mVUallocVIb(mVU, gprT1, _Is_);
|
||||
mVUaddrFix (mVU, gprT2);
|
||||
ptr += gprT2;
|
||||
mVUaddrFix (mVU, gprT2q);
|
||||
is = gprT2q;
|
||||
}
|
||||
if (!mVUlow.noWriteVF) {
|
||||
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
||||
mVUloadReg(Ft, ptr, _X_Y_Z_W);
|
||||
if (is.IsEmpty()) {
|
||||
mVUloadReg(Ft, xAddressVoid(ptr), _X_Y_Z_W);
|
||||
} else {
|
||||
mVUloadReg(Ft, xComplexAddress(gprT3q, ptr, is), _X_Y_Z_W);
|
||||
}
|
||||
mVU.regAlloc->clearNeeded(Ft);
|
||||
}
|
||||
mVU.profiler.EmitOp(opLQI);
|
||||
|
@ -1003,17 +1030,16 @@ mVUop(mVU_LQI) {
|
|||
mVUop(mVU_SQ) {
|
||||
pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, false); }
|
||||
pass2 {
|
||||
xAddressVoid ptr(mVU.regs().Mem);
|
||||
void * ptr = mVU.regs().Mem;
|
||||
|
||||
mVUallocVIa(mVU, gprT2, _It_);
|
||||
if (!_It_)
|
||||
xXOR(gprT2, gprT2);
|
||||
xADD(gprT2, _Imm11_);
|
||||
mVUaddrFix(mVU, gprT2);
|
||||
ptr += gprT2;
|
||||
mVUaddrFix(mVU, gprT2q);
|
||||
|
||||
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||
mVUsaveReg(Fs, ptr, _X_Y_Z_W, 1);
|
||||
mVUsaveReg(Fs, xComplexAddress(gprT3q, ptr, gprT2q), _X_Y_Z_W, 1);
|
||||
mVU.regAlloc->clearNeeded(Fs);
|
||||
mVU.profiler.EmitOp(opSQ);
|
||||
}
|
||||
|
@ -1023,17 +1049,24 @@ mVUop(mVU_SQ) {
|
|||
mVUop(mVU_SQD) {
|
||||
pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, true); }
|
||||
pass2 {
|
||||
xAddressVoid ptr(mVU.regs().Mem);
|
||||
void *ptr = mVU.regs().Mem;
|
||||
xAddressReg it = xEmptyReg;
|
||||
if (_It_ || isVU0) {// Access VU1 regs mem-map in !_It_ case
|
||||
mVUallocVIa(mVU, gprT2, _It_);
|
||||
xSUB(gprT2b, 1);
|
||||
if (_It_) mVUallocVIb(mVU, gprT2, _It_);
|
||||
mVUaddrFix (mVU, gprT2);
|
||||
ptr += gprT2;
|
||||
mVUaddrFix (mVU, gprT2q);
|
||||
it = gprT2q;
|
||||
}
|
||||
else {
|
||||
ptr = (void*)((sptr)ptr + (0xffff & (mVU.microMemSize-8)));
|
||||
}
|
||||
else ptr += (0xffff & (mVU.microMemSize-8));
|
||||
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||
mVUsaveReg(Fs, ptr, _X_Y_Z_W, 1);
|
||||
if (it.IsEmpty()) {
|
||||
mVUsaveReg(Fs, xAddressVoid(ptr), _X_Y_Z_W, 1);
|
||||
} else {
|
||||
mVUsaveReg(Fs, xComplexAddress(gprT3q, ptr, it), _X_Y_Z_W, 1);
|
||||
}
|
||||
mVU.regAlloc->clearNeeded(Fs);
|
||||
mVU.profiler.EmitOp(opSQD);
|
||||
}
|
||||
|
@ -1043,17 +1076,20 @@ mVUop(mVU_SQD) {
|
|||
mVUop(mVU_SQI) {
|
||||
pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, true); }
|
||||
pass2 {
|
||||
xAddressVoid ptr(mVU.regs().Mem);
|
||||
void *ptr = mVU.regs().Mem;
|
||||
if (_It_) {
|
||||
mVUallocVIa(mVU, gprT1, _It_);
|
||||
xMOV(gprT2, gprT1);
|
||||
xADD(gprT1b, 1);
|
||||
mVUallocVIb(mVU, gprT1, _It_);
|
||||
mVUaddrFix (mVU, gprT2);
|
||||
ptr += gprT2;
|
||||
mVUaddrFix (mVU, gprT2q);
|
||||
}
|
||||
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||
mVUsaveReg(Fs, ptr, _X_Y_Z_W, 1);
|
||||
if (_It_) {
|
||||
mVUsaveReg(Fs, xComplexAddress(gprT3q, ptr, gprT2q), _X_Y_Z_W, 1);
|
||||
} else {
|
||||
mVUsaveReg(Fs, xAddressVoid(ptr), _X_Y_Z_W, 1);
|
||||
}
|
||||
mVU.regAlloc->clearNeeded(Fs);
|
||||
mVU.profiler.EmitOp(opSQI);
|
||||
}
|
||||
|
@ -1409,7 +1445,7 @@ void normJumpPass2(mV) {
|
|||
if (!mVUlow.evilBranch) { xMOV(ptr32[&mVU.branch], gprT1 ); }
|
||||
else { xMOV(ptr32[&mVU.evilBranch], gprT1 ); }
|
||||
//If delay slot is conditional, it uses badBranch to go to its target
|
||||
if (mVUlow.badBranch) { xADD(gprT1, 8); xMOV(ptr32[&mVU.badBranch], gprT1); }
|
||||
if (mVUlow.badBranch) { xADD(gprT1, 8); xMOV(ptr32[&mVU.badBranch], gprT1); }
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -373,7 +373,8 @@ static void recCTC2() {
|
|||
// Executing vu0 block here fixes the intro of Ratchet and Clank
|
||||
// sVU's COP2 has a comment that "Donald Duck" needs this too...
|
||||
if (_Rd_) _eeMoveGPRtoM((uptr)&vu0Regs.VI[_Rd_].UL, _Rt_);
|
||||
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, (uptr)CpuVU0);
|
||||
xLoadFarAddr(arg1reg, CpuVU0);
|
||||
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
using namespace x86Emitter;
|
||||
|
||||
typedef xRegisterSSE xmm;
|
||||
typedef xRegisterLong x32;
|
||||
typedef xRegister32 x32;
|
||||
|
||||
struct microVU;
|
||||
|
||||
|
@ -145,14 +145,24 @@ static const char branchSTR[16][8] = {
|
|||
#define gprT1 eax // eax - Temp Reg
|
||||
#define gprT2 ecx // ecx - Temp Reg
|
||||
#define gprT3 edx // edx - Temp Reg
|
||||
#define gprT1q rax // eax - Temp Reg
|
||||
#define gprT2q rcx // ecx - Temp Reg
|
||||
#define gprT3q rdx // edx - Temp Reg
|
||||
#define gprT1b ax // Low 16-bit of gprT1 (eax)
|
||||
#define gprT2b cx // Low 16-bit of gprT2 (ecx)
|
||||
#define gprT3b dx // Low 16-bit of gprT3 (edx)
|
||||
|
||||
#ifdef __M_X86_64
|
||||
#define gprF0 ebx // Status Flag 0
|
||||
#define gprF1 r12d // Status Flag 1
|
||||
#define gprF2 r13d // Status Flag 2
|
||||
#define gprF3 r14d // Status Flag 3
|
||||
#else
|
||||
#define gprF0 ebx // Status Flag 0
|
||||
#define gprF1 ebp // Status Flag 1
|
||||
#define gprF2 esi // Status Flag 2
|
||||
#define gprF3 edi // Status Flag 3
|
||||
#endif
|
||||
|
||||
// Function Params
|
||||
#define mP microVU& mVU, int recPass
|
||||
|
|
|
@ -236,6 +236,18 @@ __fi void mVUrestoreRegs(microVU& mVU, bool fromMemory = false) {
|
|||
else xMOVAPS(xmmPQ, ptr128[&mVU.xmmBackup[xmmPQ.Id][0]]);
|
||||
}
|
||||
|
||||
class mVUScopedXMMBackup {
|
||||
microVU& mVU;
|
||||
bool fromMemory;
|
||||
public:
|
||||
mVUScopedXMMBackup(microVU& mVU, bool fromMemory): mVU(mVU), fromMemory(fromMemory) {
|
||||
mVUbackupRegs(mVU, fromMemory);
|
||||
}
|
||||
~mVUScopedXMMBackup() {
|
||||
mVUrestoreRegs(mVU, fromMemory);
|
||||
}
|
||||
};
|
||||
|
||||
_mVUt void __fc mVUprintRegs() {
|
||||
microVU& mVU = mVUx;
|
||||
for(int i = 0; i < 8; i++) {
|
||||
|
@ -274,42 +286,31 @@ static void __fc mVUwaitMTVU() {
|
|||
}
|
||||
|
||||
// Transforms the Address in gprReg to valid VU0/VU1 Address
|
||||
__fi void mVUaddrFix(mV, const x32& gprReg)
|
||||
__fi void mVUaddrFix(mV, const xAddressReg& gprReg)
|
||||
{
|
||||
if (isVU1) {
|
||||
xAND(gprReg, 0x3ff); // wrap around
|
||||
xSHL(gprReg, 4);
|
||||
xAND(xRegister32(gprReg.Id), 0x3ff); // wrap around
|
||||
xSHL(xRegister32(gprReg.Id), 4);
|
||||
}
|
||||
else {
|
||||
xTEST(gprReg, 0x400);
|
||||
xTEST(xRegister32(gprReg.Id), 0x400);
|
||||
xForwardJNZ8 jmpA; // if addr & 0x4000, reads VU1's VF regs and VI regs
|
||||
xAND(gprReg, 0xff); // if !(addr & 0x4000), wrap around
|
||||
xAND(xRegister32(gprReg.Id), 0xff); // if !(addr & 0x4000), wrap around
|
||||
xForwardJump32 jmpB;
|
||||
jmpA.SetTarget();
|
||||
if (THREAD_VU1) {
|
||||
mVUbackupRegs(mVU, true);
|
||||
xPUSH(gprT1);
|
||||
xPUSH(gprT2);
|
||||
xPUSH(gprT3);
|
||||
// Align the stackframe (GCC only, since GCC assumes stackframe is always aligned)
|
||||
#ifdef __GNUC__
|
||||
xSUB(esp, 4);
|
||||
#endif
|
||||
if (IsDevBuild && !isCOP2) { // Lets see which games do this!
|
||||
xMOV(gprT2, mVU.prog.cur->idx); // Note: Kernel does it via COP2 to initialize VU1!
|
||||
xMOV(gprT3, xPC); // So we don't spam console, we'll only check micro-mode...
|
||||
xCALL((void*)mVUwarningRegAccess);
|
||||
{
|
||||
mVUScopedXMMBackup mVUSave(mVU, true);
|
||||
xScopedSavedRegisters save {gprT1q, gprT2q, gprT3q};
|
||||
if (IsDevBuild && !isCOP2) { // Lets see which games do this!
|
||||
xMOV(arg1regd, mVU.prog.cur->idx); // Note: Kernel does it via COP2 to initialize VU1!
|
||||
xMOV(arg2regd, xPC); // So we don't spam console, we'll only check micro-mode...
|
||||
xFastCall((void*)mVUwarningRegAccess, arg1regd, arg2regd);
|
||||
}
|
||||
xFastCall((void*)mVUwaitMTVU);
|
||||
}
|
||||
xCALL((void*)mVUwaitMTVU);
|
||||
#ifdef __GNUC__
|
||||
xADD(esp, 4);
|
||||
#endif
|
||||
xPOP (gprT3);
|
||||
xPOP (gprT2);
|
||||
xPOP (gprT1);
|
||||
mVUrestoreRegs(mVU, true);
|
||||
}
|
||||
xAND(gprReg, 0x3f); // ToDo: theres a potential problem if VU0 overrides VU1's VF0/VI0 regs!
|
||||
xAND(xRegister32(gprReg.Id), 0x3f); // ToDo: theres a potential problem if VU0 overrides VU1's VF0/VI0 regs!
|
||||
xADD(gprReg, (u128*)VU1.VF - (u128*)VU0.Mem);
|
||||
jmpB.SetTarget();
|
||||
xSHL(gprReg, 4); // multiply by 16 (shift left by 4)
|
||||
|
@ -568,38 +569,38 @@ void mVUcustomSearch() {
|
|||
memset(mVUsearchXMM, 0xcc, __pagesize);
|
||||
xSetPtr(mVUsearchXMM);
|
||||
|
||||
xMOVAPS (xmm0, ptr32[ecx]);
|
||||
xPCMP.EQD(xmm0, ptr32[edx]);
|
||||
xMOVAPS (xmm1, ptr32[ecx + 0x10]);
|
||||
xPCMP.EQD(xmm1, ptr32[edx + 0x10]);
|
||||
xMOVAPS (xmm0, ptr32[arg1reg]);
|
||||
xPCMP.EQD(xmm0, ptr32[arg2reg]);
|
||||
xMOVAPS (xmm1, ptr32[arg1reg + 0x10]);
|
||||
xPCMP.EQD(xmm1, ptr32[arg2reg + 0x10]);
|
||||
xPAND (xmm0, xmm1);
|
||||
|
||||
xMOVMSKPS(eax, xmm0);
|
||||
xCMP (eax, 0xf);
|
||||
xForwardJL8 exitPoint;
|
||||
|
||||
xMOVAPS (xmm0, ptr32[ecx + 0x20]);
|
||||
xPCMP.EQD(xmm0, ptr32[edx + 0x20]);
|
||||
xMOVAPS (xmm1, ptr32[ecx + 0x30]);
|
||||
xPCMP.EQD(xmm1, ptr32[edx + 0x30]);
|
||||
xMOVAPS (xmm0, ptr32[arg1reg + 0x20]);
|
||||
xPCMP.EQD(xmm0, ptr32[arg2reg + 0x20]);
|
||||
xMOVAPS (xmm1, ptr32[arg1reg + 0x30]);
|
||||
xPCMP.EQD(xmm1, ptr32[arg2reg + 0x30]);
|
||||
xPAND (xmm0, xmm1);
|
||||
|
||||
xMOVAPS (xmm2, ptr32[ecx + 0x40]);
|
||||
xPCMP.EQD(xmm2, ptr32[edx + 0x40]);
|
||||
xMOVAPS (xmm3, ptr32[ecx + 0x50]);
|
||||
xPCMP.EQD(xmm3, ptr32[edx + 0x50]);
|
||||
xMOVAPS (xmm2, ptr32[arg1reg + 0x40]);
|
||||
xPCMP.EQD(xmm2, ptr32[arg2reg + 0x40]);
|
||||
xMOVAPS (xmm3, ptr32[arg1reg + 0x50]);
|
||||
xPCMP.EQD(xmm3, ptr32[arg2reg + 0x50]);
|
||||
xPAND (xmm2, xmm3);
|
||||
|
||||
xMOVAPS (xmm4, ptr32[ecx + 0x60]);
|
||||
xPCMP.EQD(xmm4, ptr32[edx + 0x60]);
|
||||
xMOVAPS (xmm5, ptr32[ecx + 0x70]);
|
||||
xPCMP.EQD(xmm5, ptr32[edx + 0x70]);
|
||||
xMOVAPS (xmm4, ptr32[arg1reg + 0x60]);
|
||||
xPCMP.EQD(xmm4, ptr32[arg2reg + 0x60]);
|
||||
xMOVAPS (xmm5, ptr32[arg1reg + 0x70]);
|
||||
xPCMP.EQD(xmm5, ptr32[arg2reg + 0x70]);
|
||||
xPAND (xmm4, xmm5);
|
||||
|
||||
xMOVAPS (xmm6, ptr32[ecx + 0x80]);
|
||||
xPCMP.EQD(xmm6, ptr32[edx + 0x80]);
|
||||
xMOVAPS (xmm7, ptr32[ecx + 0x90]);
|
||||
xPCMP.EQD(xmm7, ptr32[edx + 0x90]);
|
||||
xMOVAPS (xmm6, ptr32[arg1reg + 0x80]);
|
||||
xPCMP.EQD(xmm6, ptr32[arg2reg + 0x80]);
|
||||
xMOVAPS (xmm7, ptr32[arg1reg + 0x90]);
|
||||
xPCMP.EQD(xmm7, ptr32[arg2reg + 0x90]);
|
||||
xPAND (xmm6, xmm7);
|
||||
|
||||
xPAND (xmm0, xmm2);
|
||||
|
|
|
@ -221,10 +221,10 @@ void VifUnpackSSE_Dynarec::CompileRoutine() {
|
|||
while (vNum) {
|
||||
|
||||
|
||||
ShiftDisplacementWindow( dstIndirect, ecx );
|
||||
ShiftDisplacementWindow( dstIndirect, arg1reg );
|
||||
|
||||
if(UnpkNoOfIterations == 0)
|
||||
ShiftDisplacementWindow( srcIndirect, edx ); //Don't need to do this otherwise as we arent reading the source.
|
||||
ShiftDisplacementWindow( srcIndirect, arg2reg ); //Don't need to do this otherwise as we arent reading the source.
|
||||
|
||||
|
||||
if (vCL < cycleSize) {
|
||||
|
|
|
@ -63,8 +63,8 @@ VifUnpackSSE_Base::VifUnpackSSE_Base()
|
|||
, UnpkLoopIteration(0)
|
||||
, UnpkNoOfIterations(0)
|
||||
, IsAligned(0)
|
||||
, dstIndirect(ecx) // parameter 1 of __fastcall
|
||||
, srcIndirect(edx) // parameter 2 of __fastcall
|
||||
, dstIndirect(arg1reg)
|
||||
, srcIndirect(arg2reg)
|
||||
, workReg( xmm1 )
|
||||
, destReg( xmm0 )
|
||||
{
|
||||
|
|
|
@ -24,23 +24,23 @@ TEST(CodegenTests, MOVTest)
|
|||
{
|
||||
CODEGEN_TEST_BOTH(xMOV(rax, 0), "31 c0");
|
||||
CODEGEN_TEST_64(xMOV(rax, rcx), "48 89 c8");
|
||||
CODEGEN_TEST_BOTH(xMOV(eaxd, ecxd), "89 c8");
|
||||
CODEGEN_TEST_BOTH(xMOV(eax, ecx), "89 c8");
|
||||
CODEGEN_TEST_64(xMOV(r8, 0), "45 31 c0");
|
||||
CODEGEN_TEST_64(xMOV(rax, r8), "4c 89 c0");
|
||||
CODEGEN_TEST_64(xMOV(r8, rax), "49 89 c0");
|
||||
CODEGEN_TEST_64(xMOV(r8, r9), "4d 89 c8");
|
||||
CODEGEN_TEST_64(xMOV(rax, ptrNative[rcx]), "48 8b 01");
|
||||
CODEGEN_TEST_BOTH(xMOV(eaxd, ptrNative[rcx]), "8b 01");
|
||||
CODEGEN_TEST_BOTH(xMOV(eax, ptrNative[rcx]), "8b 01");
|
||||
CODEGEN_TEST_64(xMOV(ptrNative[rax], rcx), "48 89 08");
|
||||
CODEGEN_TEST_BOTH(xMOV(ptr32[rax], ecxd), "89 08");
|
||||
CODEGEN_TEST_BOTH(xMOV(ptr32[rax], ecx), "89 08");
|
||||
CODEGEN_TEST_64(xMOV(rax, ptrNative[r8]), "49 8b 00");
|
||||
CODEGEN_TEST_64(xMOV(ptrNative[r8], rax), "49 89 00");
|
||||
CODEGEN_TEST_64(xMOV(r8, ptrNative[r9]), "4d 8b 01");
|
||||
CODEGEN_TEST_64(xMOV(ptrNative[r8], r9), "4d 89 08");
|
||||
CODEGEN_TEST_64(xMOV(rax, ptrNative[rbx*4+3+rcx]), "48 8b 44 99 03");
|
||||
CODEGEN_TEST_64(xMOV(ptrNative[rbx*4+3+rax], rcx), "48 89 4c 98 03");
|
||||
CODEGEN_TEST_BOTH(xMOV(eaxd, ptr32[rbx*4+3+rcx]), "8b 44 99 03");
|
||||
CODEGEN_TEST_BOTH(xMOV(ptr32[rbx*4+3+rax], ecxd), "89 4c 98 03");
|
||||
CODEGEN_TEST_BOTH(xMOV(eax, ptr32[rbx*4+3+rcx]), "8b 44 99 03");
|
||||
CODEGEN_TEST_BOTH(xMOV(ptr32[rbx*4+3+rax], ecx), "89 4c 98 03");
|
||||
CODEGEN_TEST_64(xMOV(r8, ptrNative[r10*4+3+r9]), "4f 8b 44 91 03");
|
||||
CODEGEN_TEST_64(xMOV(ptrNative[r9*4+3+r8], r10), "4f 89 54 88 03");
|
||||
CODEGEN_TEST_64(xMOV(ptrNative[r8], 0), "49 c7 00 00 00 00 00");
|
||||
|
@ -56,14 +56,18 @@ TEST(CodegenTests, MOVTest)
|
|||
TEST(CodegenTests, LEATest)
|
||||
{
|
||||
CODEGEN_TEST_64(xLEA(rax, ptr[rcx]), "48 89 c8"); // Converted to mov rax, rcx
|
||||
CODEGEN_TEST_BOTH(xLEA(eaxd, ptr[rcx]), "89 c8"); // Converted to mov eax, ecx
|
||||
CODEGEN_TEST_BOTH(xLEA(eax, ptr[rcx]), "89 c8"); // Converted to mov eax, ecx
|
||||
CODEGEN_TEST_64(xLEA(rax, ptr[r8]), "4c 89 c0"); // Converted to mov rax, r8
|
||||
CODEGEN_TEST_64(xLEA(r8, ptr[r9]), "4d 89 c8"); // Converted to mov r8, r9
|
||||
CODEGEN_TEST_64(xLEA(rax, ptr[rbx*4+3+rcx]), "48 8d 44 99 03");
|
||||
CODEGEN_TEST_BOTH(xLEA(eaxd, ptr32[rbx*4+3+rcx]), "8d 44 99 03");
|
||||
CODEGEN_TEST_BOTH(xLEA(eax, ptr32[rbx*4+3+rcx]), "8d 44 99 03");
|
||||
CODEGEN_TEST_64(xLEA(r8, ptr[r10*4+3+r9]), "4f 8d 44 91 03");
|
||||
CODEGEN_TEST_64(xLEA(r8, ptr[base]), "4c 8d 05 f9 ff ff ff");
|
||||
CODEGEN_TEST_64(xLoadFarAddr(r8, base), "4c 8d 05 f9 ff ff ff");
|
||||
CODEGEN_TEST_64(xLoadFarAddr(r8, (void*)0x1234567890), "49 b8 90 78 56 34 12 00 00 00");
|
||||
CODEGEN_TEST_BOTH(xLEA(rax, ptr[(void*)0x1234]), "b8 34 12 00 00"); // Converted to mov rax, 0x1234
|
||||
CODEGEN_TEST_BOTH(xLoadFarAddr(rax, (void*)0x1234), "b8 34 12 00 00");
|
||||
CODEGEN_TEST(xLEA_Writeback(rbx), "bb cd cd cd cd", "48 8d 1d cd cd cd 0d");
|
||||
}
|
||||
|
||||
TEST(CodegenTests, PUSHTest)
|
||||
|
@ -100,7 +104,7 @@ TEST(CodegenTests, POPTest)
|
|||
|
||||
TEST(CodegenTests, MathTest)
|
||||
{
|
||||
CODEGEN_TEST(xINC(eaxd), "40", "ff c0");
|
||||
CODEGEN_TEST(xINC(eax), "40", "ff c0");
|
||||
CODEGEN_TEST(xDEC(rax), "48", "48 ff c8");
|
||||
CODEGEN_TEST_64(xINC(r8), "49 ff c0");
|
||||
CODEGEN_TEST_64(xADD(r8, r9), "4d 01 c8");
|
||||
|
@ -108,33 +112,33 @@ TEST(CodegenTests, MathTest)
|
|||
CODEGEN_TEST_64(xADD(rax, 0x1234), "48 05 34 12 00 00");
|
||||
CODEGEN_TEST_64(xADD(ptr32[base], -0x60), "83 05 f9 ff ff ff a0");
|
||||
CODEGEN_TEST_64(xADD(ptr32[base], 0x1234), "81 05 f6 ff ff ff 34 12 00 00");
|
||||
CODEGEN_TEST_BOTH(xADD(eaxd, ebxd), "01 d8");
|
||||
CODEGEN_TEST_BOTH(xADD(eaxd, 0x1234), "05 34 12 00 00");
|
||||
CODEGEN_TEST_BOTH(xADD(eax, ebx), "01 d8");
|
||||
CODEGEN_TEST_BOTH(xADD(eax, 0x1234), "05 34 12 00 00");
|
||||
CODEGEN_TEST_64(xADD(r8, ptrNative[r10*4+3+r9]), "4f 03 44 91 03");
|
||||
CODEGEN_TEST_64(xADD(ptrNative[r9*4+3+r8], r10), "4f 01 54 88 03");
|
||||
CODEGEN_TEST_BOTH(xADD(eaxd, ptr32[rbx*4+3+rcx]), "03 44 99 03");
|
||||
CODEGEN_TEST_BOTH(xADD(ptr32[rax*4+3+rbx], ecxd), "01 4c 83 03");
|
||||
CODEGEN_TEST_BOTH(xADD(eax, ptr32[rbx*4+3+rcx]), "03 44 99 03");
|
||||
CODEGEN_TEST_BOTH(xADD(ptr32[rax*4+3+rbx], ecx), "01 4c 83 03");
|
||||
CODEGEN_TEST_64(xSUB(r8, 0x12), "49 83 e8 12");
|
||||
CODEGEN_TEST_64(xSUB(rax, 0x1234), "48 2d 34 12 00 00");
|
||||
CODEGEN_TEST_BOTH(xSUB(eaxd, ptr32[rcx*4+rax]), "2b 04 88");
|
||||
CODEGEN_TEST_BOTH(xSUB(eax, ptr32[rcx*4+rax]), "2b 04 88");
|
||||
CODEGEN_TEST_64(xMUL(ptr32[base]), "f7 2d fa ff ff ff");
|
||||
CODEGEN_TEST(xMUL(ptr32[(void*)0x1234]), "f7 2d 34 12 00 00", "f7 2c 25 34 12 00 00");
|
||||
CODEGEN_TEST_BOTH(xDIV(ecxd), "f7 f9");
|
||||
CODEGEN_TEST_BOTH(xDIV(ecx), "f7 f9");
|
||||
}
|
||||
|
||||
TEST(CodegenTests, BitwiseTest)
|
||||
{
|
||||
CODEGEN_TEST_64(xSHR(r8, cl), "49 d3 e8");
|
||||
CODEGEN_TEST_64(xSHR(rax, cl), "48 d3 e8");
|
||||
CODEGEN_TEST_BOTH(xSHR(ecxd, cl), "d3 e9");
|
||||
CODEGEN_TEST_BOTH(xSHR(ecx, cl), "d3 e9");
|
||||
CODEGEN_TEST_64(xSAR(r8, 1), "49 d1 f8");
|
||||
CODEGEN_TEST_64(xSAR(rax, 60), "48 c1 f8 3c");
|
||||
CODEGEN_TEST_BOTH(xSAR(eaxd, 30), "c1 f8 1e");
|
||||
CODEGEN_TEST_BOTH(xSHL(ebxd, 30), "c1 e3 1e");
|
||||
CODEGEN_TEST_BOTH(xSAR(eax, 30), "c1 f8 1e");
|
||||
CODEGEN_TEST_BOTH(xSHL(ebx, 30), "c1 e3 1e");
|
||||
CODEGEN_TEST_64(xSHL(ptr32[base], 4), "c1 25 f9 ff ff ff 04");
|
||||
CODEGEN_TEST_64(xAND(r8, r9), "4d 21 c8");
|
||||
CODEGEN_TEST_64(xXOR(rax, ptrNative[r10]), "49 33 02");
|
||||
CODEGEN_TEST_BOTH(xOR(esid, ptr32[rax+rbx]), "0b 34 18");
|
||||
CODEGEN_TEST_BOTH(xOR(esi, ptr32[rax+rbx]), "0b 34 18");
|
||||
CODEGEN_TEST_64(xNOT(r8), "49 f7 d0");
|
||||
CODEGEN_TEST_64(xNOT(ptrNative[rax]), "48 f7 10");
|
||||
CODEGEN_TEST_BOTH(xNOT(ptr32[rbx]), "f7 13");
|
||||
|
|
Loading…
Reference in New Issue