EE/IOP/VU: x86-64 recompiler support

This commit is contained in:
Tellow Krinkle 2020-04-15 16:11:53 -05:00 committed by tellowkrinkle
parent eeca29b6d3
commit dc57270fb8
31 changed files with 680 additions and 493 deletions

View File

@ -66,6 +66,7 @@ struct xImpl_FastCall
void operator()(void *f, u32 a1, const xRegister32 &a2) const; void operator()(void *f, u32 a1, const xRegister32 &a2) const;
void operator()(void *f, const xIndirect32 &a1) const; void operator()(void *f, const xIndirect32 &a1) const;
void operator()(void *f, u32 a1, u32 a2) const; void operator()(void *f, u32 a1, u32 a2) const;
void operator()(void *f, void *a1) const;
#ifdef __M_X86_64 #ifdef __M_X86_64
void operator()(void *f, const xRegisterLong &a1, const xRegisterLong &a2 = xEmptyReg) const; void operator()(void *f, const xRegisterLong &a1, const xRegisterLong &a2 = xEmptyReg) const;

View File

@ -145,6 +145,8 @@ extern void xBSWAP(const xRegister32or64 &to);
extern void xLEA(xRegister64 to, const xIndirectVoid &src, bool preserve_flags = false); extern void xLEA(xRegister64 to, const xIndirectVoid &src, bool preserve_flags = false);
extern void xLEA(xRegister32 to, const xIndirectVoid &src, bool preserve_flags = false); extern void xLEA(xRegister32 to, const xIndirectVoid &src, bool preserve_flags = false);
extern void xLEA(xRegister16 to, const xIndirectVoid &src, bool preserve_flags = false); extern void xLEA(xRegister16 to, const xIndirectVoid &src, bool preserve_flags = false);
/// LEA with a target that will be decided later, guarantees that no optimizations are performed that could change what needs to be written in
extern u32* xLEA_Writeback(xAddressReg to);
// ----- Push / Pop Instructions ----- // ----- Push / Pop Instructions -----
// Note: pushad/popad implementations are intentionally left out. The instructions are // Note: pushad/popad implementations are intentionally left out. The instructions are
@ -198,6 +200,27 @@ public:
~xScopedStackFrame(); ~xScopedStackFrame();
}; };
//////////////////////////////////////////////////////////////////////////////////////////
/// Helper object to save some temporary registers before the call
class xScopedSavedRegisters
{
std::vector<std::reference_wrapper<const xAddressReg>> regs;
public:
xScopedSavedRegisters(std::initializer_list<std::reference_wrapper<const xAddressReg>> regs);
~xScopedSavedRegisters();
};
//////////////////////////////////////////////////////////////////////////////////////////
/// Helper function to calculate base+offset taking into account the limitations of x86-64's RIP-relative addressing
/// (Will either return `base+offset` or LEA `base` into `tmpRegister` and return `tmpRegister+offset`)
xAddressVoid xComplexAddress(const xAddressReg& tmpRegister, void *base, const xAddressVoid& offset);
//////////////////////////////////////////////////////////////////////////////////////////
/// Helper function to load addresses that may be far from the current instruction pointer
/// On i386, resolves to `mov dst, (sptr)addr`
/// On x86-64, resolves to either `mov dst, (sptr)addr` or `lea dst, [addr]` depending on the distance from RIP
void xLoadFarAddr(const xAddressReg& dst, void *addr);
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
// JMP / Jcc Instructions! // JMP / Jcc Instructions!
@ -445,8 +468,8 @@ extern void xMOVNTDQA(const xIndirectVoid &to, const xRegisterSSE &from);
extern void xMOVNTPD(const xIndirectVoid &to, const xRegisterSSE &from); extern void xMOVNTPD(const xIndirectVoid &to, const xRegisterSSE &from);
extern void xMOVNTPS(const xIndirectVoid &to, const xRegisterSSE &from); extern void xMOVNTPS(const xIndirectVoid &to, const xRegisterSSE &from);
extern void xMOVMSKPS(const xRegister32or64 &to, const xRegisterSSE &from); extern void xMOVMSKPS(const xRegister32 &to, const xRegisterSSE &from);
extern void xMOVMSKPD(const xRegister32or64 &to, const xRegisterSSE &from); extern void xMOVMSKPD(const xRegister32 &to, const xRegisterSSE &from);
extern void xMASKMOV(const xRegisterSSE &to, const xRegisterSSE &from); extern void xMASKMOV(const xRegisterSSE &to, const xRegisterSSE &from);
extern void xPMOVMSKB(const xRegister32or64 &to, const xRegisterSSE &from); extern void xPMOVMSKB(const xRegister32or64 &to, const xRegisterSSE &from);

View File

@ -489,6 +489,7 @@ public:
#else #else
#define xRegisterLong xRegister32 #define xRegisterLong xRegister32
#endif #endif
static const int wordsize = sizeof(sptr);
class xAddressReg : public xRegisterLong class xAddressReg : public xRegisterLong
{ {
@ -648,14 +649,9 @@ extern const xAddressReg
r8, r9, r10, r11, r8, r9, r10, r11,
r12, r13, r14, r15; r12, r13, r14, r15;
extern const xAddressReg
eax, ebx, ecx, edx,
esi, edi, ebp, esp;
// Temporary registers to aid the move to x86-64
extern const xRegister32 extern const xRegister32
eaxd, ebxd, ecxd, edxd, eax, ebx, ecx, edx,
esid, edid, ebpd, espd, esi, edi, ebp, esp,
r8d, r9d, r10d, r11d, r8d, r9d, r10d, r11d,
r12d, r13d, r14d, r15d; r12d, r13d, r14d, r15d;

View File

@ -104,6 +104,11 @@ void xImpl_FastCall::operator()(void *f, u32 a1, const xRegisterLong &a2) const
} }
#endif #endif
void xImpl_FastCall::operator()(void *f, void *a1) const {
xLEA(arg1reg, ptr[a1]);
(*this)(f, arg1reg, arg2reg);
}
void xImpl_FastCall::operator()(void *f, u32 a1, const xRegister32 &a2) const { void xImpl_FastCall::operator()(void *f, u32 a1, const xRegister32 &a2) const {
if (!a2.IsEmpty()) { xMOV(arg2regd, a2); } if (!a2.IsEmpty()) { xMOV(arg2regd, a2); }
xMOV(arg1regd, a1); xMOV(arg1regd, a1);

View File

@ -711,8 +711,8 @@ __fi void xMOVNTPS(const xIndirectVoid &to, const xRegisterSSE &from) { xOpWrite
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
__fi void xMOVMSKPS(const xRegister32or64 &to, const xRegisterSSE &from) { xOpWrite0F(0x50, to, from); } __fi void xMOVMSKPS(const xRegister32 &to, const xRegisterSSE &from) { xOpWrite0F(0x50, to, from); }
__fi void xMOVMSKPD(const xRegister32or64 &to, const xRegisterSSE &from) { xOpWrite0F(0x66, 0x50, to, from, true); } __fi void xMOVMSKPD(const xRegister32 &to, const xRegisterSSE &from) { xOpWrite0F(0x66, 0x50, to, from, true); }
// xMASKMOV: // xMASKMOV:
// Selectively write bytes from mm1/xmm1 to memory location using the byte mask in mm2/xmm2. // Selectively write bytes from mm1/xmm1 to memory location using the byte mask in mm2/xmm2.

View File

@ -130,17 +130,11 @@ const xAddressReg
r12(12), r13(13), r12(12), r13(13),
r14(14), r15(15); r14(14), r15(15);
const xAddressReg const xRegister32
eax(0), ebx(3), eax(0), ebx(3),
ecx(1), edx(2), ecx(1), edx(2),
esp(4), ebp(5), esp(4), ebp(5),
esi(6), edi(7); esi(6), edi(7),
const xRegister32
eaxd(0), ebxd(3),
ecxd(1), edxd(2),
espd(4), ebpd(5),
esid(6), edid(7),
r8d(8), r9d(9), r8d(8), r9d(9),
r10d(10), r11d(11), r10d(10), r11d(11),
r12d(12), r13d(13), r12d(12), r13d(13),
@ -173,10 +167,10 @@ const xAddressReg
calleeSavedReg2 = rsi; calleeSavedReg2 = rsi;
const xRegister32 const xRegister32
arg1regd = ecxd, arg1regd = ecx,
arg2regd = edxd, arg2regd = edx,
calleeSavedReg1d = edid, calleeSavedReg1d = edi,
calleeSavedReg2d = esid; calleeSavedReg2d = esi;
#else #else
const xAddressReg const xAddressReg
arg1reg = rdi, arg1reg = rdi,
@ -187,8 +181,8 @@ const xAddressReg
calleeSavedReg2 = r13; calleeSavedReg2 = r13;
const xRegister32 const xRegister32
arg1regd = edid, arg1regd = edi,
arg2regd = esid, arg2regd = esi,
calleeSavedReg1d = r12d, calleeSavedReg1d = r12d,
calleeSavedReg2d = r13d; calleeSavedReg2d = r13d;
#endif #endif
@ -367,7 +361,7 @@ void EmitSibMagic(uint regfield, const xIndirectVoid &info, int extraRIPOffset)
EmitSibMagic(regfield, (void *)info.Displacement, extraRIPOffset); EmitSibMagic(regfield, (void *)info.Displacement, extraRIPOffset);
return; return;
} else { } else {
if (info.Index == ebp && displacement_size == 0) if (info.Index == rbp && displacement_size == 0)
displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]!
ModRM(displacement_size, regfield, info.Index.Id & 7); ModRM(displacement_size, regfield, info.Index.Id & 7);
@ -385,7 +379,7 @@ void EmitSibMagic(uint regfield, const xIndirectVoid &info, int extraRIPOffset)
xWrite<s32>(info.Displacement); xWrite<s32>(info.Displacement);
return; return;
} else { } else {
if (info.Base == ebp && displacement_size == 0) if (info.Base == rbp && displacement_size == 0)
displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]!
ModRM(displacement_size, regfield, ModRm_UseSib); ModRM(displacement_size, regfield, ModRm_UseSib);
@ -896,7 +890,7 @@ static void EmitLeaMagic(const xRegisterInt &to, const xIndirectVoid &src, bool
} else { } else {
if (src.Scale == 0) { if (src.Scale == 0) {
if (!preserve_flags) { if (!preserve_flags) {
if (src.Index == esp) { if (src.Index == rsp) {
// ESP is not encodable as an index (ix86 ignores it), thus: // ESP is not encodable as an index (ix86 ignores it), thus:
_xMovRtoR(to, sizeMatchedBase); // will do the trick! _xMovRtoR(to, sizeMatchedBase); // will do the trick!
if (src.Displacement) if (src.Displacement)
@ -907,7 +901,7 @@ static void EmitLeaMagic(const xRegisterInt &to, const xIndirectVoid &src, bool
_g1_EmitOp(G1Type_ADD, to, sizeMatchedIndex); _g1_EmitOp(G1Type_ADD, to, sizeMatchedIndex);
return; return;
} }
} else if ((src.Index == esp) && (src.Displacement == 0)) { } else if ((src.Index == rsp) && (src.Displacement == 0)) {
// special case handling of ESP as Index, which is replaceable with // special case handling of ESP as Index, which is replaceable with
// a single MOV even when preserve_flags is set! :D // a single MOV even when preserve_flags is set! :D
@ -937,6 +931,17 @@ __emitinline void xLEA(xRegister16 to, const xIndirectVoid &src, bool preserve_f
EmitLeaMagic(to, src, preserve_flags); EmitLeaMagic(to, src, preserve_flags);
} }
__emitinline u32* xLEA_Writeback(xAddressReg to)
{
#ifdef __M_X86_64
xOpWrite(0, 0x8d, to, ptr[(void*)(0xdcdcdcd + (uptr)xGetPtr() + 7)]);
#else
xOpAccWrite(0, 0xb8 | to.Id, 0, to);
xWrite32(0xcdcdcdcd);
#endif
return (u32*)xGetPtr() - 1;
}
// ===================================================================================================== // =====================================================================================================
// TEST / INC / DEC // TEST / INC / DEC
// ===================================================================================================== // =====================================================================================================
@ -1145,6 +1150,14 @@ __emitinline void xRestoreReg(const xRegisterSSE &dest)
#endif #endif
static void stackAlign(int offset, bool moveDown) {
int needed = (16 - (offset % 16)) % 16;
if (moveDown) {
needed = -needed;
}
ALIGN_STACK(needed);
}
xScopedStackFrame::xScopedStackFrame(bool base_frame, bool save_base_pointer, int offset) xScopedStackFrame::xScopedStackFrame(bool base_frame, bool save_base_pointer, int offset)
{ {
m_base_frame = base_frame; m_base_frame = base_frame;
@ -1188,12 +1201,12 @@ xScopedStackFrame::xScopedStackFrame(bool base_frame, bool save_base_pointer, in
#endif #endif
ALIGN_STACK(-(16 - m_offset % 16)); stackAlign(m_offset, true);
} }
xScopedStackFrame::~xScopedStackFrame() xScopedStackFrame::~xScopedStackFrame()
{ {
ALIGN_STACK(16 - m_offset % 16); stackAlign(m_offset, false);
#ifdef __M_X86_64 #ifdef __M_X86_64
@ -1226,4 +1239,47 @@ xScopedStackFrame::~xScopedStackFrame()
} }
} }
xScopedSavedRegisters::xScopedSavedRegisters(std::initializer_list<std::reference_wrapper<const xAddressReg>> regs)
: regs(regs)
{
for (auto reg : regs)
{
const xAddressReg& regRef = reg;
xPUSH(regRef);
}
stackAlign(regs.size() * wordsize, true);
}
xScopedSavedRegisters::~xScopedSavedRegisters() {
stackAlign(regs.size() * wordsize, false);
for (auto it = regs.rbegin(); it < regs.rend(); ++it) {
const xAddressReg& regRef = *it;
xPOP(regRef);
}
}
xAddressVoid xComplexAddress(const xAddressReg& tmpRegister, void *base, const xAddressVoid& offset) {
if ((sptr)base == (s32)(sptr)base) {
return offset + base;
} else {
xLEA(tmpRegister, ptr[base]);
return offset + tmpRegister;
}
}
void xLoadFarAddr(const xAddressReg& dst, void *addr) {
#ifdef __M_X86_64
sptr iaddr = (sptr)addr;
sptr rip = (sptr)xGetPtr() + 7; // LEA will be 7 bytes
sptr disp = iaddr - rip;
if (disp == (s32)disp) {
xLEA(dst, ptr[addr]);
} else {
xMOV64(dst, iaddr);
}
#else
xMOV(dst, (sptr)addr);
#endif
}
} // End namespace x86Emitter } // End namespace x86Emitter

View File

@ -782,7 +782,7 @@ void vtlb_Term()
//nothing to do for now //nothing to do for now
} }
constexpr size_t VMAP_SIZE = sizeof(sptr) * VTLB_VMAP_ITEMS; constexpr size_t VMAP_SIZE = sizeof(VTLBVirtual) * VTLB_VMAP_ITEMS;
// Reserves the vtlb core allocation used by various emulation components! // Reserves the vtlb core allocation used by various emulation components!
// [TODO] basemem - request allocating memory at the specified virtual location, which can allow // [TODO] basemem - request allocating memory at the specified virtual location, which can allow

View File

@ -230,14 +230,14 @@ public:
#define PC_GETBLOCK_(x, reclut) ((BASEBLOCK*)(reclut[((u32)(x)) >> 16] + (x)*(sizeof(BASEBLOCK)/4))) #define PC_GETBLOCK_(x, reclut) ((BASEBLOCK*)(reclut[((u32)(x)) >> 16] + (x)*(sizeof(BASEBLOCK)/4)))
static void recLUT_SetPage(uptr reclut[0x10000], uptr hwlut[0x10000], static void recLUT_SetPage(uptr reclut[0x10000], u32 hwlut[0x10000],
BASEBLOCK *mapbase, uint pagebase, uint pageidx, uint mappage) BASEBLOCK *mapbase, uint pagebase, uint pageidx, uint mappage)
{ {
// this value is in 64k pages! // this value is in 64k pages!
uint page = pagebase + pageidx; uint page = pagebase + pageidx;
pxAssert( page < 0x10000 ); pxAssert( page < 0x10000 );
reclut[page] = (uptr)&mapbase[(mappage - page) << 14]; reclut[page] = (uptr)&mapbase[((s32)mappage - (s32)page) << 14];
if (hwlut) if (hwlut)
hwlut[page] = 0u - (pagebase << 16); hwlut[page] = 0u - (pagebase << 16);
} }

View File

@ -118,12 +118,15 @@ extern _x86regs x86regs[iREGCNT_GPR], s_saveX86regs[iREGCNT_GPR];
uptr _x86GetAddr(int type, int reg); uptr _x86GetAddr(int type, int reg);
void _initX86regs(); void _initX86regs();
int _getFreeX86reg(int mode); int _getFreeX86reg(int mode);
int _allocX86reg(x86Emitter::xRegisterLong x86reg, int type, int reg, int mode); [[deprecated]] int _allocX86reg(x86Emitter::xRegister64 x86reg, int type, int reg, int mode);
int _allocX86reg(x86Emitter::xRegister32 x86reg, int type, int reg, int mode);
// To resolve ambiguity between 32 and 64, delete once everything's on 32
int _allocX86reg(x86Emitter::xRegisterEmpty x86reg, int type, int reg, int mode);
void _deleteX86reg(int type, int reg, int flush); void _deleteX86reg(int type, int reg, int flush);
int _checkX86reg(int type, int reg, int mode); int _checkX86reg(int type, int reg, int mode);
void _addNeededX86reg(int type, int reg); void _addNeededX86reg(int type, int reg);
void _clearNeededX86regs(); void _clearNeededX86regs();
void _freeX86reg(const x86Emitter::xRegisterLong& x86reg); void _freeX86reg(const x86Emitter::xRegister32& x86reg);
void _freeX86reg(int x86reg); void _freeX86reg(int x86reg);
void _freeX86regs(); void _freeX86regs();
void _flushCachedRegs(); void _flushCachedRegs();

View File

@ -397,9 +397,9 @@ void FPU_MUL(int info, int regd, int sreg, int treg, bool acc)
if (CHECK_FPUMULHACK) if (CHECK_FPUMULHACK)
{ {
xMOVD(ecx, xRegisterSSE(sreg)); xMOVD(arg1regd, xRegisterSSE(sreg));
xMOVD(edx, xRegisterSSE(treg)); xMOVD(arg2regd, xRegisterSSE(treg));
xFastCall((void*)(uptr)&FPU_MUL_HACK, ecx, edx); //returns the hacked result or 0 xFastCall((void*)(uptr)&FPU_MUL_HACK, arg1regd, arg2regd); //returns the hacked result or 0
xTEST(eax, eax); xTEST(eax, eax);
noHack = JZ8(0); noHack = JZ8(0);
xMOVDZX(xRegisterSSE(regd), eax); xMOVDZX(xRegisterSSE(regd), eax);

View File

@ -1500,16 +1500,18 @@ void recQFSRV()
int info = eeRecompileCodeXMM(XMMINFO_WRITED); int info = eeRecompileCodeXMM(XMMINFO_WRITED);
xMOV(eax, ptr32[&cpuRegs.sa]); xMOV(eax, ptr32[&cpuRegs.sa]);
xMOVDQU(xRegisterSSE(EEREC_D), ptr32[eax + &cpuRegs.GPR.r[_Rt_]]); xLEA(rcx, ptr[&cpuRegs.GPR.r[_Rt_]]);
xMOVDQU(xRegisterSSE(EEREC_D), ptr32[rax + rcx]);
return; return;
} }
int info = eeRecompileCodeXMM( XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED ); int info = eeRecompileCodeXMM( XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED );
xMOV(eax, ptr32[&cpuRegs.sa]); xMOV(eax, ptr32[&cpuRegs.sa]);
xMOVDQA(ptr32[&tempqw[0]], xRegisterSSE(EEREC_T)); xLEA(rcx, ptr[tempqw]);
xMOVDQA(ptr32[&tempqw[4]], xRegisterSSE(EEREC_S)); xMOVDQA(ptr32[rcx], xRegisterSSE(EEREC_T));
xMOVDQU(xRegisterSSE(EEREC_D), ptr32[eax + &tempqw]); xMOVDQA(ptr32[rcx+16], xRegisterSSE(EEREC_S));
xMOVDQU(xRegisterSSE(EEREC_D), ptr32[rax + rcx]);
_clearNeededXMMregs(); _clearNeededXMMregs();
} }

View File

@ -46,7 +46,7 @@ u32 g_psxMaxRecMem = 0;
u32 s_psxrecblocks[] = {0}; u32 s_psxrecblocks[] = {0};
uptr psxRecLUT[0x10000]; uptr psxRecLUT[0x10000];
uptr psxhwLUT[0x10000]; u32 psxhwLUT[0x10000];
static __fi u32 HWADDR(u32 mem) { return psxhwLUT[mem >> 16] + mem; } static __fi u32 HWADDR(u32 mem) { return psxhwLUT[mem >> 16] + mem; }
@ -126,13 +126,13 @@ static DynGenFunc* _DynGen_JITCompile()
u8* retval = xGetPtr(); u8* retval = xGetPtr();
xFastCall((void*)iopRecRecompile, ptr[&psxRegs.pc] ); xFastCall((void*)iopRecRecompile, ptr32[&psxRegs.pc] );
xMOV( eax, ptr[&psxRegs.pc] ); xMOV( eax, ptr[&psxRegs.pc] );
xMOV( ebx, eax ); xMOV( ebx, eax );
xSHR( eax, 16 ); xSHR( eax, 16 );
xMOV( ecx, ptr[psxRecLUT + (eax*4)] ); xMOV( rcx, ptrNative[xComplexAddress(rcx, psxRecLUT, rax*wordsize)] );
xJMP( ptr32[ecx+ebx] ); xJMP( ptrNative[rbx*(wordsize/4) + rcx] );
return (DynGenFunc*)retval; return (DynGenFunc*)retval;
} }
@ -152,8 +152,8 @@ static DynGenFunc* _DynGen_DispatcherReg()
xMOV( eax, ptr[&psxRegs.pc] ); xMOV( eax, ptr[&psxRegs.pc] );
xMOV( ebx, eax ); xMOV( ebx, eax );
xSHR( eax, 16 ); xSHR( eax, 16 );
xMOV( ecx, ptr[psxRecLUT + (eax*4)] ); xMOV( rcx, ptrNative[xComplexAddress(rcx, psxRecLUT, rax*wordsize)] );
xJMP( ptr32[ecx+ebx] ); xJMP( ptrNative[rbx*(wordsize/4) + rcx] );
return (DynGenFunc*)retval; return (DynGenFunc*)retval;
} }
@ -391,7 +391,7 @@ void _psxDeleteReg(int reg, int flush)
_deleteX86reg(X86TYPE_PSX, reg, flush ? 0 : 2); _deleteX86reg(X86TYPE_PSX, reg, flush ? 0 : 2);
} }
void _psxMoveGPRtoR(const xRegisterLong& to, int fromgpr) void _psxMoveGPRtoR(const xRegister32& to, int fromgpr)
{ {
if( PSX_IS_CONST1(fromgpr) ) if( PSX_IS_CONST1(fromgpr) )
xMOV(to, g_psxConstRegs[fromgpr] ); xMOV(to, g_psxConstRegs[fromgpr] );
@ -863,22 +863,22 @@ void psxSetBranchReg(u32 reg)
psxbranch = 1; psxbranch = 1;
if( reg != 0xffffffff ) { if( reg != 0xffffffff ) {
_allocX86reg(esi, X86TYPE_PCWRITEBACK, 0, MODE_WRITE); _allocX86reg(calleeSavedReg2d, X86TYPE_PCWRITEBACK, 0, MODE_WRITE);
_psxMoveGPRtoR(esi, reg); _psxMoveGPRtoR(calleeSavedReg2d, reg);
psxRecompileNextInstruction(1); psxRecompileNextInstruction(1);
if( x86regs[esi.GetId()].inuse ) { if( x86regs[calleeSavedReg2d.GetId()].inuse ) {
pxAssert( x86regs[esi.GetId()].type == X86TYPE_PCWRITEBACK ); pxAssert( x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK );
xMOV(ptr[&psxRegs.pc], esi); xMOV(ptr32[&psxRegs.pc], calleeSavedReg2d);
x86regs[esi.GetId()].inuse = 0; x86regs[calleeSavedReg2d.GetId()].inuse = 0;
#ifdef PCSX2_DEBUG #ifdef PCSX2_DEBUG
xOR( esi, esi ); xOR( calleeSavedReg2d, calleeSavedReg2d );
#endif #endif
} }
else { else {
xMOV(eax, ptr[&g_recWriteback]); xMOV(eax, ptr32[&g_recWriteback]);
xMOV(ptr[&psxRegs.pc], eax); xMOV(ptr32[&psxRegs.pc], eax);
#ifdef PCSX2_DEBUG #ifdef PCSX2_DEBUG
xOR( eax, eax ); xOR( eax, eax );

View File

@ -48,7 +48,7 @@ void _psxFlushCall(int flushtype);
void _psxOnWriteReg(int reg); void _psxOnWriteReg(int reg);
void _psxMoveGPRtoR(const x86Emitter::xRegisterLong& to, int fromgpr); void _psxMoveGPRtoR(const x86Emitter::xRegister32& to, int fromgpr);
#if 0 #if 0
void _psxMoveGPRtoM(uptr to, int fromgpr); void _psxMoveGPRtoM(uptr to, int fromgpr);
void _psxMoveGPRtoRm(x86IntRegType to, int fromgpr); void _psxMoveGPRtoRm(x86IntRegType to, int fromgpr);

View File

@ -65,9 +65,9 @@ void rpsxADDconst(int dreg, int sreg, u32 off, int info)
if (sreg == dreg) { if (sreg == dreg) {
xADD(ptr32[&psxRegs.GPR.r[dreg]], off); xADD(ptr32[&psxRegs.GPR.r[dreg]], off);
} else { } else {
xMOV(eax, ptr[&psxRegs.GPR.r[sreg]]); xMOV(eax, ptr32[&psxRegs.GPR.r[sreg]]);
if (off) xADD(eax, off); if (off) xADD(eax, off);
xMOV(ptr[&psxRegs.GPR.r[dreg]], eax); xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax);
} }
} }
else { else {
@ -97,7 +97,7 @@ void rpsxSLTconst(int info, int dreg, int sreg, int imm)
xXOR(eax, eax); xXOR(eax, eax);
xCMP(ptr32[&psxRegs.GPR.r[sreg]], imm); xCMP(ptr32[&psxRegs.GPR.r[sreg]], imm);
xSETL(al); xSETL(al);
xMOV(ptr[&psxRegs.GPR.r[dreg]], eax); xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax);
} }
void rpsxSLTI_(int info) { rpsxSLTconst(info, _Rt_, _Rs_, _Imm_); } void rpsxSLTI_(int info) { rpsxSLTconst(info, _Rt_, _Rs_, _Imm_); }
@ -115,7 +115,7 @@ void rpsxSLTUconst(int info, int dreg, int sreg, int imm)
xXOR(eax, eax); xXOR(eax, eax);
xCMP(ptr32[&psxRegs.GPR.r[sreg]], imm); xCMP(ptr32[&psxRegs.GPR.r[sreg]], imm);
xSETB(al); xSETB(al);
xMOV(ptr[&psxRegs.GPR.r[dreg]], eax); xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax);
} }
void rpsxSLTIU_(int info) { rpsxSLTUconst(info, _Rt_, _Rs_, (s32)_Imm_); } void rpsxSLTIU_(int info) { rpsxSLTUconst(info, _Rt_, _Rs_, (s32)_Imm_); }
@ -134,9 +134,9 @@ void rpsxANDconst(int info, int dreg, int sreg, u32 imm)
if (sreg == dreg) { if (sreg == dreg) {
xAND(ptr32[&psxRegs.GPR.r[dreg]], imm); xAND(ptr32[&psxRegs.GPR.r[dreg]], imm);
} else { } else {
xMOV(eax, ptr[&psxRegs.GPR.r[sreg]]); xMOV(eax, ptr32[&psxRegs.GPR.r[sreg]]);
xAND(eax, imm); xAND(eax, imm);
xMOV(ptr[&psxRegs.GPR.r[dreg]], eax); xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax);
} }
} else { } else {
xMOV(ptr32[&psxRegs.GPR.r[dreg]], 0); xMOV(ptr32[&psxRegs.GPR.r[dreg]], 0);
@ -160,15 +160,15 @@ void rpsxORconst(int info, int dreg, int sreg, u32 imm)
xOR(ptr32[&psxRegs.GPR.r[dreg]], imm); xOR(ptr32[&psxRegs.GPR.r[dreg]], imm);
} }
else { else {
xMOV(eax, ptr[&psxRegs.GPR.r[sreg]]); xMOV(eax, ptr32[&psxRegs.GPR.r[sreg]]);
xOR(eax, imm); xOR(eax, imm);
xMOV(ptr[&psxRegs.GPR.r[dreg]], eax); xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax);
} }
} }
else { else {
if( dreg != sreg ) { if( dreg != sreg ) {
xMOV(ecx, ptr[&psxRegs.GPR.r[sreg]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]);
xMOV(ptr[&psxRegs.GPR.r[dreg]], ecx); xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx);
} }
} }
} }
@ -189,9 +189,9 @@ void rpsxXORconst(int info, int dreg, int sreg, u32 imm)
xNOT(ptr32[&psxRegs.GPR.r[dreg]]); xNOT(ptr32[&psxRegs.GPR.r[dreg]]);
} }
else { else {
xMOV(ecx, ptr[&psxRegs.GPR.r[sreg]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]);
xNOT(ecx); xNOT(ecx);
xMOV(ptr[&psxRegs.GPR.r[dreg]], ecx); xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx);
} }
} }
else if (imm) { else if (imm) {
@ -200,15 +200,15 @@ void rpsxXORconst(int info, int dreg, int sreg, u32 imm)
xXOR(ptr32[&psxRegs.GPR.r[dreg]], imm); xXOR(ptr32[&psxRegs.GPR.r[dreg]], imm);
} }
else { else {
xMOV(eax, ptr[&psxRegs.GPR.r[sreg]]); xMOV(eax, ptr32[&psxRegs.GPR.r[sreg]]);
xXOR(eax, imm); xXOR(eax, imm);
xMOV(ptr[&psxRegs.GPR.r[dreg]], eax); xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax);
} }
} }
else { else {
if( dreg != sreg ) { if( dreg != sreg ) {
xMOV(ecx, ptr[&psxRegs.GPR.r[sreg]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]);
xMOV(ptr[&psxRegs.GPR.r[dreg]], ecx); xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx);
} }
} }
} }
@ -241,16 +241,16 @@ void rpsxADDU_constt(int info)
void rpsxADDU_(int info) void rpsxADDU_(int info)
{ {
if (_Rs_ && _Rt_) { if (_Rs_ && _Rt_) {
xMOV(eax, ptr[&psxRegs.GPR.r[_Rs_]]); xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
xADD(eax, ptr[&psxRegs.GPR.r[_Rt_]]); xADD(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
} else if (_Rs_) { } else if (_Rs_) {
xMOV(eax, ptr[&psxRegs.GPR.r[_Rs_]]); xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
} else if (_Rt_) { } else if (_Rt_) {
xMOV(eax, ptr[&psxRegs.GPR.r[_Rt_]]); xMOV(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
} else { } else {
xXOR(eax, eax); xXOR(eax, eax);
} }
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax); xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
} }
PSXRECOMPILE_CONSTCODE0(ADDU); PSXRECOMPILE_CONSTCODE0(ADDU);
@ -266,8 +266,8 @@ void rpsxSUBU_const()
void rpsxSUBU_consts(int info) void rpsxSUBU_consts(int info)
{ {
xMOV(eax, g_psxConstRegs[_Rs_]); xMOV(eax, g_psxConstRegs[_Rs_]);
xSUB(eax, ptr[&psxRegs.GPR.r[_Rt_]]); xSUB(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax); xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
} }
void rpsxSUBU_constt(int info) { rpsxADDconst(_Rd_, _Rs_, -(int)g_psxConstRegs[_Rt_], info); } void rpsxSUBU_constt(int info) { rpsxADDconst(_Rd_, _Rs_, -(int)g_psxConstRegs[_Rt_], info); }
@ -278,13 +278,13 @@ void rpsxSUBU_(int info)
if (!_Rd_) return; if (!_Rd_) return;
if( _Rd_ == _Rs_ ) { if( _Rd_ == _Rs_ ) {
xMOV(eax, ptr[&psxRegs.GPR.r[_Rt_]]); xMOV(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
xSUB(ptr[&psxRegs.GPR.r[_Rd_]], eax); xSUB(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
} }
else { else {
xMOV(eax, ptr[&psxRegs.GPR.r[_Rs_]]); xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
xSUB(eax, ptr[&psxRegs.GPR.r[_Rt_]]); xSUB(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax); xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
} }
} }
@ -296,13 +296,13 @@ void rpsxLogicalOp(int info, int op)
{ {
if( _Rd_ == _Rs_ || _Rd_ == _Rt_ ) { if( _Rd_ == _Rs_ || _Rd_ == _Rt_ ) {
int vreg = _Rd_ == _Rs_ ? _Rt_ : _Rs_; int vreg = _Rd_ == _Rs_ ? _Rt_ : _Rs_;
xMOV(ecx, ptr[&psxRegs.GPR.r[vreg]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[vreg]]);
switch(op) { switch(op) {
case 0: xAND(ptr[&psxRegs.GPR.r[_Rd_]], ecx); break; case 0: xAND(ptr32[&psxRegs.GPR.r[_Rd_]], ecx); break;
case 1: xOR(ptr[&psxRegs.GPR.r[_Rd_]], ecx); break; case 1: xOR(ptr32[&psxRegs.GPR.r[_Rd_]], ecx); break;
case 2: xXOR(ptr[&psxRegs.GPR.r[_Rd_]], ecx); break; case 2: xXOR(ptr32[&psxRegs.GPR.r[_Rd_]], ecx); break;
case 3: xOR(ptr[&psxRegs.GPR.r[_Rd_]], ecx); break; case 3: xOR(ptr32[&psxRegs.GPR.r[_Rd_]], ecx); break;
default: pxAssert(0); default: pxAssert(0);
} }
@ -310,19 +310,19 @@ void rpsxLogicalOp(int info, int op)
xNOT(ptr32[&psxRegs.GPR.r[_Rd_]]); xNOT(ptr32[&psxRegs.GPR.r[_Rd_]]);
} }
else { else {
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
switch(op) { switch(op) {
case 0: xAND(ecx, ptr[&psxRegs.GPR.r[_Rt_]]); break; case 0: xAND(ecx, ptr32[&psxRegs.GPR.r[_Rt_]]); break;
case 1: xOR(ecx, ptr[&psxRegs.GPR.r[_Rt_]]); break; case 1: xOR(ecx, ptr32[&psxRegs.GPR.r[_Rt_]]); break;
case 2: xXOR(ecx, ptr[&psxRegs.GPR.r[_Rt_]]); break; case 2: xXOR(ecx, ptr32[&psxRegs.GPR.r[_Rt_]]); break;
case 3: xOR(ecx, ptr[&psxRegs.GPR.r[_Rt_]]); break; case 3: xOR(ecx, ptr32[&psxRegs.GPR.r[_Rt_]]); break;
default: pxAssert(0); default: pxAssert(0);
} }
if( op == 3 ) if( op == 3 )
xNOT(ecx); xNOT(ecx);
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], ecx); xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], ecx);
} }
} }
@ -374,10 +374,10 @@ void rpsxNORconst(int info, int dreg, int sreg, u32 imm)
xNOT(ptr32[&psxRegs.GPR.r[dreg]]); xNOT(ptr32[&psxRegs.GPR.r[dreg]]);
} }
else { else {
xMOV(ecx, ptr[&psxRegs.GPR.r[sreg]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]);
xOR(ecx, imm); xOR(ecx, imm);
xNOT(ecx); xNOT(ecx);
xMOV(ptr[&psxRegs.GPR.r[dreg]], ecx); xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx);
} }
} }
else { else {
@ -385,9 +385,9 @@ void rpsxNORconst(int info, int dreg, int sreg, u32 imm)
xNOT(ptr32[&psxRegs.GPR.r[dreg]]); xNOT(ptr32[&psxRegs.GPR.r[dreg]]);
} }
else { else {
xMOV(ecx, ptr[&psxRegs.GPR.r[sreg]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]);
xNOT(ecx); xNOT(ecx);
xMOV(ptr[&psxRegs.GPR.r[dreg]], ecx); xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx);
} }
} }
} }
@ -407,19 +407,19 @@ void rpsxSLT_const()
void rpsxSLT_consts(int info) void rpsxSLT_consts(int info)
{ {
xXOR(eax, eax); xXOR(eax, eax);
xCMP(ptr32[&psxRegs.GPR.r[_Rt_]], g_psxConstRegs[_Rs_]); xCMP(ptr32[&psxRegs.GPR.r[_Rt_]], g_psxConstRegs[_Rs_]);
xSETG(al); xSETG(al);
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax); xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
} }
void rpsxSLT_constt(int info) { rpsxSLTconst(info, _Rd_, _Rs_, g_psxConstRegs[_Rt_]); } void rpsxSLT_constt(int info) { rpsxSLTconst(info, _Rd_, _Rs_, g_psxConstRegs[_Rt_]); }
void rpsxSLT_(int info) void rpsxSLT_(int info)
{ {
xMOV(eax, ptr[&psxRegs.GPR.r[_Rs_]]); xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
xCMP(eax, ptr[&psxRegs.GPR.r[_Rt_]]); xCMP(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
xSETL(al); xSETL(al);
xAND(eax, 0xff); xAND(eax, 0xff);
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax); xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
} }
PSXRECOMPILE_CONSTCODE0(SLT); PSXRECOMPILE_CONSTCODE0(SLT);
@ -433,9 +433,9 @@ void rpsxSLTU_const()
void rpsxSLTU_consts(int info) void rpsxSLTU_consts(int info)
{ {
xXOR(eax, eax); xXOR(eax, eax);
xCMP(ptr32[&psxRegs.GPR.r[_Rt_]], g_psxConstRegs[_Rs_]); xCMP(ptr32[&psxRegs.GPR.r[_Rt_]], g_psxConstRegs[_Rs_]);
xSETA(al); xSETA(al);
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax); xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
} }
void rpsxSLTU_constt(int info) { rpsxSLTUconst(info, _Rd_, _Rs_, g_psxConstRegs[_Rt_]); } void rpsxSLTU_constt(int info) { rpsxSLTUconst(info, _Rd_, _Rs_, g_psxConstRegs[_Rt_]); }
@ -444,11 +444,11 @@ void rpsxSLTU_(int info)
// Rd = Rs < Rt (unsigned) // Rd = Rs < Rt (unsigned)
if (!_Rd_) return; if (!_Rd_) return;
xMOV(eax, ptr[&psxRegs.GPR.r[_Rs_]]); xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
xCMP(eax, ptr[&psxRegs.GPR.r[_Rt_]]); xCMP(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
xSBB(eax, eax); xSBB(eax, eax);
xNEG(eax); xNEG(eax);
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax); xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
} }
PSXRECOMPILE_CONSTCODE0(SLTU); PSXRECOMPILE_CONSTCODE0(SLTU);
@ -468,18 +468,18 @@ void rpsxMULTsuperconst(int info, int sreg, int imm, int sign)
xMOV(eax, imm); xMOV(eax, imm);
if( sign ) xMUL(ptr32[&psxRegs.GPR.r[sreg]]); if( sign ) xMUL(ptr32[&psxRegs.GPR.r[sreg]]);
else xUMUL(ptr32[&psxRegs.GPR.r[sreg]]); else xUMUL(ptr32[&psxRegs.GPR.r[sreg]]);
xMOV(ptr[&psxRegs.GPR.n.lo], eax); xMOV(ptr32[&psxRegs.GPR.n.lo], eax);
xMOV(ptr[&psxRegs.GPR.n.hi], edx); xMOV(ptr32[&psxRegs.GPR.n.hi], edx);
} }
void rpsxMULTsuper(int info, int sign) void rpsxMULTsuper(int info, int sign)
{ {
// Lo/Hi = Rs * Rt (signed) // Lo/Hi = Rs * Rt (signed)
xMOV(eax, ptr[&psxRegs.GPR.r[_Rs_]]); xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
if( sign ) xMUL(ptr32[&psxRegs.GPR.r[_Rt_]]); if( sign ) xMUL(ptr32[&psxRegs.GPR.r[_Rt_]]);
else xUMUL(ptr32[&psxRegs.GPR.r[_Rt_]]); else xUMUL(ptr32[&psxRegs.GPR.r[_Rt_]]);
xMOV(ptr[&psxRegs.GPR.n.lo], eax); xMOV(ptr32[&psxRegs.GPR.n.lo], eax);
xMOV(ptr[&psxRegs.GPR.n.hi], edx); xMOV(ptr32[&psxRegs.GPR.n.hi], edx);
} }
void rpsxMULT_consts(int info) { rpsxMULTsuperconst(info, _Rt_, g_psxConstRegs[_Rs_], 1); } void rpsxMULT_consts(int info) { rpsxMULTsuperconst(info, _Rt_, g_psxConstRegs[_Rs_], 1); }
@ -542,12 +542,12 @@ void rpsxDIVsuper(int info, int sign, int process = 0)
if( process & PROCESS_CONSTT ) if( process & PROCESS_CONSTT )
xMOV(ecx, g_psxConstRegs[_Rt_]); xMOV(ecx, g_psxConstRegs[_Rt_]);
else else
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rt_]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rt_]]);
if( process & PROCESS_CONSTS ) if( process & PROCESS_CONSTS )
xMOV(eax, g_psxConstRegs[_Rs_]); xMOV(eax, g_psxConstRegs[_Rs_]);
else else
xMOV(eax, ptr[&psxRegs.GPR.r[_Rs_]]); xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
u8 *end1; u8 *end1;
if (sign) //test for overflow (x86 will just throw an exception) if (sign) //test for overflow (x86 will just throw an exception)
@ -593,8 +593,8 @@ void rpsxDIVsuper(int info, int sign, int process = 0)
if (sign) x86SetJ8( end1 ); if (sign) x86SetJ8( end1 );
x86SetJ8( end2 ); x86SetJ8( end2 );
xMOV(ptr[&psxRegs.GPR.n.lo], eax); xMOV(ptr32[&psxRegs.GPR.n.lo], eax);
xMOV(ptr[&psxRegs.GPR.n.hi], edx); xMOV(ptr32[&psxRegs.GPR.n.hi], edx);
} }
void rpsxDIV_consts(int info) { rpsxDIVsuper(info, 1, PROCESS_CONSTS); } void rpsxDIV_consts(int info) { rpsxDIVsuper(info, 1, PROCESS_CONSTS); }
@ -639,12 +639,12 @@ static void rpsxLB()
_psxOnWriteReg(_Rt_); _psxOnWriteReg(_Rt_);
_psxDeleteReg(_Rt_, 0); _psxDeleteReg(_Rt_, 0);
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_); if (_Imm_) xADD(ecx, _Imm_);
xFastCall((void*)iopMemRead8, ecx ); // returns value in EAX xFastCall((void*)iopMemRead8, ecx ); // returns value in EAX
if (_Rt_) { if (_Rt_) {
xMOVSX(eax, al); xMOVSX(eax, al);
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax); xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax);
} }
PSX_DEL_CONST(_Rt_); PSX_DEL_CONST(_Rt_);
} }
@ -655,12 +655,12 @@ static void rpsxLBU()
_psxOnWriteReg(_Rt_); _psxOnWriteReg(_Rt_);
_psxDeleteReg(_Rt_, 0); _psxDeleteReg(_Rt_, 0);
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_); if (_Imm_) xADD(ecx, _Imm_);
xFastCall((void*)iopMemRead8, ecx ); // returns value in EAX xFastCall((void*)iopMemRead8, ecx ); // returns value in EAX
if (_Rt_) { if (_Rt_) {
xMOVZX(eax, al); xMOVZX(eax, al);
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax); xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax);
} }
PSX_DEL_CONST(_Rt_); PSX_DEL_CONST(_Rt_);
} }
@ -671,12 +671,12 @@ static void rpsxLH()
_psxOnWriteReg(_Rt_); _psxOnWriteReg(_Rt_);
_psxDeleteReg(_Rt_, 0); _psxDeleteReg(_Rt_, 0);
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_); if (_Imm_) xADD(ecx, _Imm_);
xFastCall((void*)iopMemRead16, ecx ); // returns value in EAX xFastCall((void*)iopMemRead16, ecx ); // returns value in EAX
if (_Rt_) { if (_Rt_) {
xMOVSX(eax, ax); xMOVSX(eax, ax);
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax); xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax);
} }
PSX_DEL_CONST(_Rt_); PSX_DEL_CONST(_Rt_);
} }
@ -687,12 +687,12 @@ static void rpsxLHU()
_psxOnWriteReg(_Rt_); _psxOnWriteReg(_Rt_);
_psxDeleteReg(_Rt_, 0); _psxDeleteReg(_Rt_, 0);
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_); if (_Imm_) xADD(ecx, _Imm_);
xFastCall((void*)iopMemRead16, ecx ); // returns value in EAX xFastCall((void*)iopMemRead16, ecx ); // returns value in EAX
if (_Rt_) { if (_Rt_) {
xMOVZX(eax, ax); xMOVZX(eax, ax);
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax); xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax);
} }
PSX_DEL_CONST(_Rt_); PSX_DEL_CONST(_Rt_);
} }
@ -704,7 +704,7 @@ static void rpsxLW()
_psxDeleteReg(_Rt_, 0); _psxDeleteReg(_Rt_, 0);
_psxFlushCall(FLUSH_EVERYTHING); _psxFlushCall(FLUSH_EVERYTHING);
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_); if (_Imm_) xADD(ecx, _Imm_);
xTEST(ecx, 0x10000000); xTEST(ecx, 0x10000000);
@ -712,18 +712,17 @@ static void rpsxLW()
xFastCall((void*)iopMemRead32, ecx ); // returns value in EAX xFastCall((void*)iopMemRead32, ecx ); // returns value in EAX
if (_Rt_) { if (_Rt_) {
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax); xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax);
} }
j8Ptr[1] = JMP8(0); j8Ptr[1] = JMP8(0);
x86SetJ8(j8Ptr[0]); x86SetJ8(j8Ptr[0]);
// read from psM directly // read from psM directly
xAND(ecx, 0x1fffff); xAND(ecx, 0x1fffff);
xADD(ecx, (uptr)iopMem->Main);
xMOV(ecx, ptr[ecx]); xMOV(ecx, ptr32[xComplexAddress(rax, iopMem->Main, rcx)]);
if (_Rt_) { if (_Rt_) {
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], ecx); xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], ecx);
} }
x86SetJ8(j8Ptr[1]); x86SetJ8(j8Ptr[1]);
@ -735,10 +734,10 @@ static void rpsxSB()
_psxDeleteReg(_Rs_, 1); _psxDeleteReg(_Rs_, 1);
_psxDeleteReg(_Rt_, 1); _psxDeleteReg(_Rt_, 1);
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]); xMOV(arg1regd, ptr32[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_); if (_Imm_) xADD(arg1regd, _Imm_);
xMOV( edx, ptr[&psxRegs.GPR.r[_Rt_]] ); xMOV( arg2regd, ptr32[&psxRegs.GPR.r[_Rt_]] );
xFastCall((void*)iopMemWrite8, ecx, edx ); xFastCall((void*)iopMemWrite8, arg1regd, arg2regd );
} }
static void rpsxSH() static void rpsxSH()
@ -746,10 +745,10 @@ static void rpsxSH()
_psxDeleteReg(_Rs_, 1); _psxDeleteReg(_Rs_, 1);
_psxDeleteReg(_Rt_, 1); _psxDeleteReg(_Rt_, 1);
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]); xMOV(arg1regd, ptr32[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_); if (_Imm_) xADD(arg1regd, _Imm_);
xMOV( edx, ptr[&psxRegs.GPR.r[_Rt_]] ); xMOV( arg2regd, ptr32[&psxRegs.GPR.r[_Rt_]] );
xFastCall((void*)iopMemWrite16, ecx, edx ); xFastCall((void*)iopMemWrite16, arg1regd, arg2regd );
} }
static void rpsxSW() static void rpsxSW()
@ -757,10 +756,10 @@ static void rpsxSW()
_psxDeleteReg(_Rs_, 1); _psxDeleteReg(_Rs_, 1);
_psxDeleteReg(_Rt_, 1); _psxDeleteReg(_Rt_, 1);
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]); xMOV(arg1regd, ptr32[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_); if (_Imm_) xADD(arg1regd, _Imm_);
xMOV( edx, ptr[&psxRegs.GPR.r[_Rt_]] ); xMOV( arg2regd, ptr32[&psxRegs.GPR.r[_Rt_]] );
xFastCall((void*)iopMemWrite32, ecx, edx ); xFastCall((void*)iopMemWrite32, arg1regd, arg2regd );
} }
//// SLL //// SLL
@ -782,19 +781,19 @@ void rpsxShiftConst(int info, int rdreg, int rtreg, int imm, int shifttype)
} }
} }
else { else {
xMOV(eax, ptr[&psxRegs.GPR.r[rtreg]]); xMOV(eax, ptr32[&psxRegs.GPR.r[rtreg]]);
switch(shifttype) { switch(shifttype) {
case 0: xSHL(eax, imm); break; case 0: xSHL(eax, imm); break;
case 1: xSHR(eax, imm); break; case 1: xSHR(eax, imm); break;
case 2: xSAR(eax, imm); break; case 2: xSAR(eax, imm); break;
} }
xMOV(ptr[&psxRegs.GPR.r[rdreg]], eax); xMOV(ptr32[&psxRegs.GPR.r[rdreg]], eax);
} }
} }
else { else {
if( rdreg != rtreg ) { if( rdreg != rtreg ) {
xMOV(eax, ptr[&psxRegs.GPR.r[rtreg]]); xMOV(eax, ptr32[&psxRegs.GPR.r[rtreg]]);
xMOV(ptr[&psxRegs.GPR.r[rdreg]], eax); xMOV(ptr32[&psxRegs.GPR.r[rdreg]], eax);
} }
} }
} }
@ -834,23 +833,23 @@ void rpsxShiftVconsts(int info, int shifttype)
void rpsxShiftVconstt(int info, int shifttype) void rpsxShiftVconstt(int info, int shifttype)
{ {
xMOV(eax, g_psxConstRegs[_Rt_]); xMOV(eax, g_psxConstRegs[_Rt_]);
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
switch(shifttype) { switch(shifttype) {
case 0: xSHL(eax, cl); break; case 0: xSHL(eax, cl); break;
case 1: xSHR(eax, cl); break; case 1: xSHR(eax, cl); break;
case 2: xSAR(eax, cl); break; case 2: xSAR(eax, cl); break;
} }
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax); xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
} }
void rpsxSLLV_consts(int info) { rpsxShiftVconsts(info, 0); } void rpsxSLLV_consts(int info) { rpsxShiftVconsts(info, 0); }
void rpsxSLLV_constt(int info) { rpsxShiftVconstt(info, 0); } void rpsxSLLV_constt(int info) { rpsxShiftVconstt(info, 0); }
void rpsxSLLV_(int info) void rpsxSLLV_(int info)
{ {
xMOV(eax, ptr[&psxRegs.GPR.r[_Rt_]]); xMOV(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
xSHL(eax, cl); xSHL(eax, cl);
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax); xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
} }
PSXRECOMPILE_CONSTCODE0(SLLV); PSXRECOMPILE_CONSTCODE0(SLLV);
@ -865,10 +864,10 @@ void rpsxSRLV_consts(int info) { rpsxShiftVconsts(info, 1); }
void rpsxSRLV_constt(int info) { rpsxShiftVconstt(info, 1); } void rpsxSRLV_constt(int info) { rpsxShiftVconstt(info, 1); }
void rpsxSRLV_(int info) void rpsxSRLV_(int info)
{ {
xMOV(eax, ptr[&psxRegs.GPR.r[_Rt_]]); xMOV(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
xSHR(eax, cl); xSHR(eax, cl);
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax); xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
} }
PSXRECOMPILE_CONSTCODE0(SRLV); PSXRECOMPILE_CONSTCODE0(SRLV);
@ -883,10 +882,10 @@ void rpsxSRAV_consts(int info) { rpsxShiftVconsts(info, 2); }
void rpsxSRAV_constt(int info) { rpsxShiftVconstt(info, 2); } void rpsxSRAV_constt(int info) { rpsxShiftVconstt(info, 2); }
void rpsxSRAV_(int info) void rpsxSRAV_(int info)
{ {
xMOV(eax, ptr[&psxRegs.GPR.r[_Rt_]]); xMOV(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]); xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
xSAR(eax, cl); xSAR(eax, cl);
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax); xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
} }
PSXRECOMPILE_CONSTCODE0(SRAV); PSXRECOMPILE_CONSTCODE0(SRAV);
@ -900,8 +899,8 @@ void rpsxMFHI()
_psxOnWriteReg(_Rd_); _psxOnWriteReg(_Rd_);
_psxDeleteReg(_Rd_, 0); _psxDeleteReg(_Rd_, 0);
xMOV(eax, ptr[&psxRegs.GPR.n.hi]); xMOV(eax, ptr32[&psxRegs.GPR.n.hi]);
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax); xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
} }
void rpsxMTHI() void rpsxMTHI()
@ -911,8 +910,8 @@ void rpsxMTHI()
} }
else { else {
_psxDeleteReg(_Rs_, 1); _psxDeleteReg(_Rs_, 1);
xMOV(eax, ptr[&psxRegs.GPR.r[_Rs_]]); xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
xMOV(ptr[&psxRegs.GPR.n.hi], eax); xMOV(ptr32[&psxRegs.GPR.n.hi], eax);
} }
} }
@ -922,8 +921,8 @@ void rpsxMFLO()
_psxOnWriteReg(_Rd_); _psxOnWriteReg(_Rd_);
_psxDeleteReg(_Rd_, 0); _psxDeleteReg(_Rd_, 0);
xMOV(eax, ptr[&psxRegs.GPR.n.lo]); xMOV(eax, ptr32[&psxRegs.GPR.n.lo]);
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax); xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
} }
void rpsxMTLO() void rpsxMTLO()
@ -933,8 +932,8 @@ void rpsxMTLO()
} }
else { else {
_psxDeleteReg(_Rs_, 1); _psxDeleteReg(_Rs_, 1);
xMOV(eax, ptr[&psxRegs.GPR.r[_Rs_]]); xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
xMOV(ptr[&psxRegs.GPR.n.lo], eax); xMOV(ptr32[&psxRegs.GPR.n.lo], eax);
} }
} }
@ -965,8 +964,8 @@ void rpsxJR()
void rpsxJALR() void rpsxJALR()
{ {
// jalr Rs // jalr Rs
_allocX86reg(esi, X86TYPE_PCWRITEBACK, 0, MODE_WRITE); _allocX86reg(calleeSavedReg2d, X86TYPE_PCWRITEBACK, 0, MODE_WRITE);
_psxMoveGPRtoR(esi, _Rs_); _psxMoveGPRtoR(calleeSavedReg2d, _Rs_);
if ( _Rd_ ) if ( _Rd_ )
{ {
@ -977,18 +976,18 @@ void rpsxJALR()
psxRecompileNextInstruction(1); psxRecompileNextInstruction(1);
if( x86regs[esi.GetId()].inuse ) { if( x86regs[calleeSavedReg2d.GetId()].inuse ) {
pxAssert( x86regs[esi.GetId()].type == X86TYPE_PCWRITEBACK ); pxAssert( x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK );
xMOV(ptr[&psxRegs.pc], esi); xMOV(ptr32[&psxRegs.pc], calleeSavedReg2d);
x86regs[esi.GetId()].inuse = 0; x86regs[calleeSavedReg2d.GetId()].inuse = 0;
#ifdef PCSX2_DEBUG #ifdef PCSX2_DEBUG
xOR( esi, esi ); xOR( calleeSavedReg2d, calleeSavedReg2d );
#endif #endif
} }
else { else {
xMOV(eax, ptr[&g_recWriteback]); xMOV(eax, ptr32[&g_recWriteback]);
xMOV(ptr[&psxRegs.pc], eax); xMOV(ptr32[&psxRegs.pc], eax);
#ifdef PCSX2_DEBUG #ifdef PCSX2_DEBUG
xOR( eax, eax ); xOR( eax, eax );
#endif #endif
@ -1016,8 +1015,8 @@ void rpsxSetBranchEQ(int info, int process)
s_pbranchjmp = JNE32( 0 ); s_pbranchjmp = JNE32( 0 );
} }
else { else {
xMOV(eax, ptr[&psxRegs.GPR.r[ _Rs_ ] ]); xMOV(eax, ptr32[&psxRegs.GPR.r[ _Rs_ ] ]);
xCMP(eax, ptr[&psxRegs.GPR.r[ _Rt_ ] ]); xCMP(eax, ptr32[&psxRegs.GPR.r[ _Rt_ ] ]);
s_pbranchjmp = JNE32( 0 ); s_pbranchjmp = JNE32( 0 );
} }
} }
@ -1342,8 +1341,8 @@ void rpsxMFC0()
if (!_Rt_) return; if (!_Rt_) return;
_psxOnWriteReg(_Rt_); _psxOnWriteReg(_Rt_);
xMOV(eax, ptr[&psxRegs.CP0.r[_Rd_]]); xMOV(eax, ptr32[&psxRegs.CP0.r[_Rd_]]);
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax); xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax);
} }
void rpsxCFC0() void rpsxCFC0()
@ -1352,8 +1351,8 @@ void rpsxCFC0()
if (!_Rt_) return; if (!_Rt_) return;
_psxOnWriteReg(_Rt_); _psxOnWriteReg(_Rt_);
xMOV(eax, ptr[&psxRegs.CP0.r[_Rd_]]); xMOV(eax, ptr32[&psxRegs.CP0.r[_Rd_]]);
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax); xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax);
} }
void rpsxMTC0() void rpsxMTC0()
@ -1364,8 +1363,8 @@ void rpsxMTC0()
} }
else { else {
_psxDeleteReg(_Rt_, 1); _psxDeleteReg(_Rt_, 1);
xMOV(eax, ptr[&psxRegs.GPR.r[_Rt_]]); xMOV(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
xMOV(ptr[&psxRegs.CP0.r[_Rd_]], eax); xMOV(ptr32[&psxRegs.CP0.r[_Rd_]], eax);
} }
} }
@ -1377,13 +1376,13 @@ void rpsxCTC0()
void rpsxRFE() void rpsxRFE()
{ {
xMOV(eax, ptr[&psxRegs.CP0.n.Status]); xMOV(eax, ptr32[&psxRegs.CP0.n.Status]);
xMOV(ecx, eax); xMOV(ecx, eax);
xAND(eax, 0xfffffff0); xAND(eax, 0xfffffff0);
xAND(ecx, 0x3c); xAND(ecx, 0x3c);
xSHR(ecx, 2); xSHR(ecx, 2);
xOR(eax, ecx); xOR(eax, ecx);
xMOV(ptr[&psxRegs.CP0.n.Status], eax); xMOV(ptr32[&psxRegs.CP0.n.Status], eax);
// Test the IOP's INTC status, so that any pending ints get raised. // Test the IOP's INTC status, so that any pending ints get raised.

View File

@ -104,7 +104,7 @@ extern u32 g_cpuHasConstReg, g_cpuFlushedConstReg;
u32* _eeGetConstReg(int reg); u32* _eeGetConstReg(int reg);
// finds where the GPR is stored and moves lower 32 bits to EAX // finds where the GPR is stored and moves lower 32 bits to EAX
void _eeMoveGPRtoR(const x86Emitter::xRegisterLong& to, int fromgpr); void _eeMoveGPRtoR(const x86Emitter::xRegister32& to, int fromgpr);
void _eeMoveGPRtoM(uptr to, int fromgpr); void _eeMoveGPRtoM(uptr to, int fromgpr);
void _eeMoveGPRtoRm(x86IntRegType to, int fromgpr); void _eeMoveGPRtoRm(x86IntRegType to, int fromgpr);
void eeSignExtendTo(int gpr, bool onlyupper=false); void eeSignExtendTo(int gpr, bool onlyupper=false);

View File

@ -239,7 +239,17 @@ void _flushConstRegs()
} }
} }
int _allocX86reg(xRegisterLong x86reg, int type, int reg, int mode) int _allocX86reg(xRegisterEmpty x86reg, int type, int reg, int mode)
{
return _allocX86reg(xRegister32(x86reg), type, reg, mode);
}
int _allocX86reg(xRegister64 x86reg, int type, int reg, int mode)
{
return _allocX86reg(xRegister32(x86reg.Id), type, reg, mode);
}
int _allocX86reg(xRegister32 x86reg, int type, int reg, int mode)
{ {
uint i; uint i;
pxAssertDev( reg >= 0 && reg < 32, "Register index out of bounds." ); pxAssertDev( reg >= 0 && reg < 32, "Register index out of bounds." );
@ -313,7 +323,7 @@ int _allocX86reg(xRegisterLong x86reg, int type, int reg, int mode)
} }
if (x86reg.IsEmpty()) if (x86reg.IsEmpty())
x86reg = xRegisterLong(_getFreeX86reg(oldmode)); x86reg = xRegister32(_getFreeX86reg(oldmode));
else else
_freeX86reg(x86reg); _freeX86reg(x86reg);
@ -440,7 +450,7 @@ void _deleteX86reg(int type, int reg, int flush)
} }
// Temporary solution to support eax/ebx... type // Temporary solution to support eax/ebx... type
void _freeX86reg(const x86Emitter::xRegisterLong& x86reg) void _freeX86reg(const x86Emitter::xRegister32& x86reg)
{ {
_freeX86reg(x86reg.GetId()); _freeX86reg(x86reg.GetId());
} }

View File

@ -51,7 +51,7 @@ using namespace R5900;
u32 maxrecmem = 0; u32 maxrecmem = 0;
static __aligned16 uptr recLUT[_64kb]; static __aligned16 uptr recLUT[_64kb];
static __aligned16 uptr hwLUT[_64kb]; static __aligned16 u32 hwLUT[_64kb];
static __fi u32 HWADDR(u32 mem) { return hwLUT[mem >> 16] + mem; } static __fi u32 HWADDR(u32 mem) { return hwLUT[mem >> 16] + mem; }
@ -75,7 +75,7 @@ static const int RECCONSTBUF_SIZE = 16384 * 2; // 64 bit consts in 32 bit units
static RecompiledCodeReserve* recMem = NULL; static RecompiledCodeReserve* recMem = NULL;
static u8* recRAMCopy = NULL; static u8* recRAMCopy = NULL;
static u8* recLutReserve_RAM = NULL; static u8* recLutReserve_RAM = NULL;
static const size_t recLutSize = Ps2MemSize::MainRam + Ps2MemSize::Rom + Ps2MemSize::Rom1 + Ps2MemSize::Rom2; static const size_t recLutSize = (Ps2MemSize::MainRam + Ps2MemSize::Rom + Ps2MemSize::Rom1 + Ps2MemSize::Rom2) * wordsize / 4;
static uptr m_ConfiguredCacheReserve = 64; static uptr m_ConfiguredCacheReserve = 64;
@ -153,7 +153,7 @@ u32* _eeGetConstReg(int reg)
return &cpuRegs.GPR.r[ reg ].UL[0]; return &cpuRegs.GPR.r[ reg ].UL[0];
} }
void _eeMoveGPRtoR(const xRegisterLong& to, int fromgpr) void _eeMoveGPRtoR(const xRegister32& to, int fromgpr)
{ {
if( fromgpr == 0 ) if( fromgpr == 0 )
xXOR(to, to); // zero register should use xor, thanks --air xXOR(to, to); // zero register should use xor, thanks --air
@ -346,13 +346,17 @@ static DynGenFunc* _DynGen_JITCompile()
u8* retval = xGetAlignedCallTarget(); u8* retval = xGetAlignedCallTarget();
xFastCall((void*)recRecompile, ptr[&cpuRegs.pc] ); xFastCall((void*)recRecompile, ptr32[&cpuRegs.pc] );
// C equivalent:
// u32 addr = cpuRegs.pc;
// void(**base)() = (void(**)())recLUT[addr >> 16];
// base[addr >> 2]();
xMOV( eax, ptr[&cpuRegs.pc] ); xMOV( eax, ptr[&cpuRegs.pc] );
xMOV( ebx, eax ); xMOV( ebx, eax );
xSHR( eax, 16 ); xSHR( eax, 16 );
xMOV( ecx, ptr[recLUT + (eax*4)] ); xMOV( rcx, ptrNative[xComplexAddress(rcx, recLUT, rax*wordsize)] );
xJMP( ptr32[ecx+ebx] ); xJMP( ptrNative[rbx*(wordsize/4) + rcx] );
return (DynGenFunc*)retval; return (DynGenFunc*)retval;
} }
@ -369,11 +373,15 @@ static DynGenFunc* _DynGen_DispatcherReg()
{ {
u8* retval = xGetPtr(); // fallthrough target, can't align it! u8* retval = xGetPtr(); // fallthrough target, can't align it!
// C equivalent:
// u32 addr = cpuRegs.pc;
// void(**base)() = (void(**)())recLUT[addr >> 16];
// base[addr >> 2]();
xMOV( eax, ptr[&cpuRegs.pc] ); xMOV( eax, ptr[&cpuRegs.pc] );
xMOV( ebx, eax ); xMOV( ebx, eax );
xSHR( eax, 16 ); xSHR( eax, 16 );
xMOV( ecx, ptr[recLUT + (eax*4)] ); xMOV( rcx, ptrNative[xComplexAddress(rcx, recLUT, rax*wordsize)] );
xJMP( ptr32[ecx+ebx] ); xJMP( ptrNative[rbx*(wordsize/4) + rcx] );
return (DynGenFunc*)retval; return (DynGenFunc*)retval;
} }
@ -461,7 +469,7 @@ static void _DynGen_Dispatchers()
static __ri void ClearRecLUT(BASEBLOCK* base, int memsize) static __ri void ClearRecLUT(BASEBLOCK* base, int memsize)
{ {
for (int i = 0; i < memsize/4; i++) for (int i = 0; i < memsize/(int)sizeof(uptr); i++)
base[i].SetFnptr((uptr)JITCompile); base[i].SetFnptr((uptr)JITCompile);
} }
@ -521,7 +529,7 @@ static void recAlloc()
for (int i = 0; i < 0x10000; i++) for (int i = 0; i < 0x10000; i++)
recLUT_SetPage(recLUT, 0, 0, 0, i, 0); recLUT_SetPage(recLUT, 0, 0, 0, i, 0);
for ( int i = 0x0000; i < Ps2MemSize::MainRam / 0x10000; i++ ) for ( int i = 0x0000; i < (int)(Ps2MemSize::MainRam / 0x10000); i++ )
{ {
recLUT_SetPage(recLUT, hwLUT, recRAM, 0x0000, i, i); recLUT_SetPage(recLUT, hwLUT, recRAM, 0x0000, i, i);
recLUT_SetPage(recLUT, hwLUT, recRAM, 0x2000, i, i); recLUT_SetPage(recLUT, hwLUT, recRAM, 0x2000, i, i);
@ -864,21 +872,21 @@ void SetBranchReg( u32 reg )
// xMOV(ptr[&cpuRegs.pc], eax); // xMOV(ptr[&cpuRegs.pc], eax);
// } // }
// } // }
_allocX86reg(esi, X86TYPE_PCWRITEBACK, 0, MODE_WRITE); _allocX86reg(calleeSavedReg2d, X86TYPE_PCWRITEBACK, 0, MODE_WRITE);
_eeMoveGPRtoR(esi, reg); _eeMoveGPRtoR(calleeSavedReg2d, reg);
if (EmuConfig.Gamefixes.GoemonTlbHack) { if (EmuConfig.Gamefixes.GoemonTlbHack) {
xMOV(ecx, esi); xMOV(ecx, calleeSavedReg2d);
vtlb_DynV2P(); vtlb_DynV2P();
xMOV(esi, eax); xMOV(calleeSavedReg2d, eax);
} }
recompileNextInstruction(1); recompileNextInstruction(1);
if( x86regs[esi.GetId()].inuse ) { if( x86regs[calleeSavedReg2d.GetId()].inuse ) {
pxAssert( x86regs[esi.GetId()].type == X86TYPE_PCWRITEBACK ); pxAssert( x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK );
xMOV(ptr[&cpuRegs.pc], esi); xMOV(ptr[&cpuRegs.pc], calleeSavedReg2d);
x86regs[esi.GetId()].inuse = 0; x86regs[calleeSavedReg2d.GetId()].inuse = 0;
} }
else { else {
xMOV(eax, ptr[&g_recWriteback]); xMOV(eax, ptr[&g_recWriteback]);
@ -1525,8 +1533,8 @@ static void memory_protect_recompiled_code(u32 startpc, u32 size)
break; break;
case ProtMode_Manual: case ProtMode_Manual:
xMOV( ecx, inpage_ptr ); xMOV( arg1regd, inpage_ptr );
xMOV( edx, inpage_sz / 4 ); xMOV( arg2regd, inpage_sz / 4 );
//xMOV( eax, startpc ); // uncomment this to access startpc (as eax) in dyna_block_discard //xMOV( eax, startpc ); // uncomment this to access startpc (as eax) in dyna_block_discard
u32 lpc = inpage_ptr; u32 lpc = inpage_ptr;
@ -1737,7 +1745,7 @@ static void __fastcall recRecompile( const u32 startpc )
// Game will unmap some virtual addresses. If a constant address were hardcoded in the block, we would be in a bad situation. // Game will unmap some virtual addresses. If a constant address were hardcoded in the block, we would be in a bad situation.
eeRecNeedsReset = true; eeRecNeedsReset = true;
// 0x3563b8 is the start address of the function that invalidate entry in TLB cache // 0x3563b8 is the start address of the function that invalidate entry in TLB cache
xFastCall((void*)GoemonUnloadTlb, ptr[&cpuRegs.GPR.n.a0.UL[0]]); xFastCall((void*)GoemonUnloadTlb, ptr32[&cpuRegs.GPR.n.a0.UL[0]]);
} }
} }

View File

@ -148,7 +148,7 @@ void recSLTIU_(int info)
x86SetJ8(j8Ptr[0]); x86SetJ8(j8Ptr[0]);
x86SetJ8(j8Ptr[1]); x86SetJ8(j8Ptr[1]);
xMOV(ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], eax); xMOV(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], eax);
xMOV(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ]], 0 ); xMOV(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ]], 0 );
} }
@ -178,7 +178,7 @@ void recSLTI_(int info)
x86SetJ8(j8Ptr[0]); x86SetJ8(j8Ptr[0]);
x86SetJ8(j8Ptr[1]); x86SetJ8(j8Ptr[1]);
xMOV(ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], eax); xMOV(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], eax);
xMOV(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ]], 0 ); xMOV(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ]], 0 );
} }

View File

@ -103,13 +103,13 @@ void recJALR()
EE::Profiler.EmitOp(eeOpcode::JALR); EE::Profiler.EmitOp(eeOpcode::JALR);
int newpc = pc + 4; int newpc = pc + 4;
_allocX86reg(esi, X86TYPE_PCWRITEBACK, 0, MODE_WRITE); _allocX86reg(calleeSavedReg2d, X86TYPE_PCWRITEBACK, 0, MODE_WRITE);
_eeMoveGPRtoR(esi, _Rs_); _eeMoveGPRtoR(calleeSavedReg2d, _Rs_);
if (EmuConfig.Gamefixes.GoemonTlbHack) { if (EmuConfig.Gamefixes.GoemonTlbHack) {
xMOV(ecx, esi); xMOV(ecx, calleeSavedReg2d);
vtlb_DynV2P(); vtlb_DynV2P();
xMOV(esi, eax); xMOV(calleeSavedReg2d, eax);
} }
// uncomment when there are NO instructions that need to call interpreter // uncomment when there are NO instructions that need to call interpreter
// int mmreg; // int mmreg;
@ -147,10 +147,10 @@ void recJALR()
_clearNeededXMMregs(); _clearNeededXMMregs();
recompileNextInstruction(1); recompileNextInstruction(1);
if( x86regs[esi.GetId()].inuse ) { if( x86regs[calleeSavedReg2d.GetId()].inuse ) {
pxAssert( x86regs[esi.GetId()].type == X86TYPE_PCWRITEBACK ); pxAssert( x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK );
xMOV(ptr[&cpuRegs.pc], esi); xMOV(ptr[&cpuRegs.pc], calleeSavedReg2d);
x86regs[esi.GetId()].inuse = 0; x86regs[calleeSavedReg2d.GetId()].inuse = 0;
} }
else { else {
xMOV(eax, ptr[&g_recWriteback]); xMOV(eax, ptr[&g_recWriteback]);

View File

@ -99,13 +99,13 @@ void recLoad64( u32 bits, bool sign )
{ {
pxAssume( bits == 64 || bits == 128 ); pxAssume( bits == 64 || bits == 128 );
// Load EDX with the destination. // Load arg2 with the destination.
// 64/128 bit modes load the result directly into the cpuRegs.GPR struct. // 64/128 bit modes load the result directly into the cpuRegs.GPR struct.
if (_Rt_) if (_Rt_)
xMOV(edx, (uptr)&cpuRegs.GPR.r[_Rt_].UL[0]); xLEA(arg2reg, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
else else
xMOV(edx, (uptr)&dummyValue[0]); xLEA(arg2reg, ptr[&dummyValue[0]]);
if (GPR_IS_CONST1(_Rs_)) if (GPR_IS_CONST1(_Rs_))
{ {
@ -121,11 +121,11 @@ void recLoad64( u32 bits, bool sign )
else else
{ {
// Load ECX with the source memory address that we're reading from. // Load ECX with the source memory address that we're reading from.
_eeMoveGPRtoR(ecx, _Rs_); _eeMoveGPRtoR(arg1regd, _Rs_);
if (_Imm_ != 0) if (_Imm_ != 0)
xADD(ecx, _Imm_); xADD(arg1regd, _Imm_);
if (bits == 128) // force 16 byte alignment on 128 bit reads if (bits == 128) // force 16 byte alignment on 128 bit reads
xAND(ecx, ~0x0F); xAND(arg1regd, ~0x0F);
_eeOnLoadWrite(_Rt_); _eeOnLoadWrite(_Rt_);
_deleteEEreg(_Rt_, 0); _deleteEEreg(_Rt_, 0);
@ -154,10 +154,10 @@ void recLoad32( u32 bits, bool sign )
} }
else else
{ {
// Load ECX with the source memory address that we're reading from. // Load arg1 with the source memory address that we're reading from.
_eeMoveGPRtoR(ecx, _Rs_); _eeMoveGPRtoR(arg1regd, _Rs_);
if (_Imm_ != 0) if (_Imm_ != 0)
xADD(ecx, _Imm_ ); xADD(arg1regd, _Imm_ );
_eeOnLoadWrite(_Rt_); _eeOnLoadWrite(_Rt_);
_deleteEEreg(_Rt_, 0); _deleteEEreg(_Rt_, 0);
@ -194,12 +194,12 @@ void recStore(u32 bits)
if (bits < 64) if (bits < 64)
{ {
_eeMoveGPRtoR(edx, _Rt_); _eeMoveGPRtoR(arg2regd, _Rt_);
} }
else if (bits == 128 || bits == 64) else if (bits == 128 || bits == 64)
{ {
_flushEEreg(_Rt_); // flush register to mem _flushEEreg(_Rt_); // flush register to mem
xMOV(edx, (uptr)&cpuRegs.GPR.r[_Rt_].UL[0]); xLEA(arg2reg, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
} }
// Load ECX with the destination address, or issue a direct optimized write // Load ECX with the destination address, or issue a direct optimized write
@ -215,11 +215,11 @@ void recStore(u32 bits)
} }
else else
{ {
_eeMoveGPRtoR(ecx, _Rs_); _eeMoveGPRtoR(arg1regd, _Rs_);
if (_Imm_ != 0) if (_Imm_ != 0)
xADD(ecx, _Imm_); xADD(arg1regd, _Imm_);
if (bits == 128) if (bits == 128)
xAND(ecx, ~0x0F); xAND(arg1regd, ~0x0F);
iFlushCall(FLUSH_FULLVTLB); iFlushCall(FLUSH_FULLVTLB);
@ -253,30 +253,30 @@ void recLWL()
iFlushCall(FLUSH_FULLVTLB); iFlushCall(FLUSH_FULLVTLB);
_deleteEEreg(_Rt_, 1); _deleteEEreg(_Rt_, 1);
_eeMoveGPRtoR(ecx, _Rs_); _eeMoveGPRtoR(arg1regd, _Rs_);
if (_Imm_ != 0) if (_Imm_ != 0)
xADD(ecx, _Imm_); xADD(arg1regd, _Imm_);
// edi = bit offset in word // calleeSavedReg1 = bit offset in word
xMOV(edi, ecx); xMOV(calleeSavedReg1d, arg1regd);
xAND(edi, 3); xAND(calleeSavedReg1d, 3);
xSHL(edi, 3); xSHL(calleeSavedReg1d, 3);
xAND(ecx, ~3); xAND(arg1regd, ~3);
vtlb_DynGenRead32(32, false); vtlb_DynGenRead32(32, false);
if (!_Rt_) if (!_Rt_)
return; return;
// mask off bytes loaded // mask off bytes loaded
xMOV(ecx, edi); xMOV(ecx, calleeSavedReg1d);
xMOV(edx, 0xffffff); xMOV(edx, 0xffffff);
xSHR(edx, cl); xSHR(edx, cl);
xAND(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], edx); xAND(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], edx);
// OR in bytes loaded // OR in bytes loaded
xMOV(ecx, 24); xNEG(ecx);
xSUB(ecx, edi); xADD(ecx, 24);
xSHL(eax, cl); xSHL(eax, cl);
xOR(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], eax); xOR(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
@ -301,16 +301,16 @@ void recLWR()
iFlushCall(FLUSH_FULLVTLB); iFlushCall(FLUSH_FULLVTLB);
_deleteEEreg(_Rt_, 1); _deleteEEreg(_Rt_, 1);
_eeMoveGPRtoR(ecx, _Rs_); _eeMoveGPRtoR(arg1regd, _Rs_);
if (_Imm_ != 0) if (_Imm_ != 0)
xADD(ecx, _Imm_); xADD(arg1regd, _Imm_);
// edi = bit offset in word // edi = bit offset in word
xMOV(edi, ecx); xMOV(calleeSavedReg1d, arg1regd);
xAND(edi, 3); xAND(calleeSavedReg1d, 3);
xSHL(edi, 3); xSHL(calleeSavedReg1d, 3);
xAND(ecx, ~3); xAND(arg1regd, ~3);
vtlb_DynGenRead32(32, false); vtlb_DynGenRead32(32, false);
if (!_Rt_) if (!_Rt_)
@ -318,17 +318,17 @@ void recLWR()
// mask off bytes loaded // mask off bytes loaded
xMOV(ecx, 24); xMOV(ecx, 24);
xSUB(ecx, edi); xSUB(ecx, calleeSavedReg1d);
xMOV(edx, 0xffffff00); xMOV(edx, 0xffffff00);
xSHL(edx, cl); xSHL(edx, cl);
xAND(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], edx); xAND(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], edx);
// OR in bytes loaded // OR in bytes loaded
xMOV(ecx, edi); xMOV(ecx, calleeSavedReg1d);
xSHR(eax, cl); xSHR(eax, cl);
xOR(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], eax); xOR(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
xCMP(edi, 0); xCMP(ecx, 0);
xForwardJump8 nosignextend(Jcc_NotEqual); xForwardJump8 nosignextend(Jcc_NotEqual);
// if ((addr & 3) == 0) // if ((addr & 3) == 0)
xCDQ(); xCDQ();
@ -351,38 +351,38 @@ void recSWL()
#ifdef REC_STORES #ifdef REC_STORES
iFlushCall(FLUSH_FULLVTLB); iFlushCall(FLUSH_FULLVTLB);
_eeMoveGPRtoR(ecx, _Rs_); _eeMoveGPRtoR(arg1regd, _Rs_);
if (_Imm_ != 0) if (_Imm_ != 0)
xADD(ecx, _Imm_); xADD(arg1regd, _Imm_);
// edi = bit offset in word // edi = bit offset in word
xMOV(edi, ecx); xMOV(calleeSavedReg1d, arg1regd);
xAND(edi, 3); xAND(calleeSavedReg1d, 3);
xSHL(edi, 3); xSHL(calleeSavedReg1d, 3);
xAND(ecx, ~3); xAND(arg1regd, ~3);
vtlb_DynGenRead32(32, false); vtlb_DynGenRead32(32, false);
// mask read -> edx // mask read -> arg2
xMOV(ecx, edi); xMOV(ecx, calleeSavedReg1d);
xMOV(edx, 0xffffff00); xMOV(arg2regd, 0xffffff00);
xSHL(edx, cl); xSHL(arg2regd, cl);
xAND(edx, eax); xAND(arg2regd, eax);
if (_Rt_) if (_Rt_)
{ {
// mask write and OR -> edx // mask write and OR -> edx
xMOV(ecx, 24); xNEG(ecx);
xSUB(ecx, edi); xADD(ecx, 24);
_eeMoveGPRtoR(eax, _Rt_); _eeMoveGPRtoR(eax, _Rt_);
xSHR(eax, cl); xSHR(eax, cl);
xOR(edx, eax); xOR(arg2regd, eax);
} }
_eeMoveGPRtoR(ecx, _Rs_); _eeMoveGPRtoR(arg1regd, _Rs_);
if (_Imm_ != 0) if (_Imm_ != 0)
xADD(ecx, _Imm_); xADD(arg1regd, _Imm_);
xAND(ecx, ~3); xAND(arg1regd, ~3);
vtlb_DynGenWrite(32); vtlb_DynGenWrite(32);
#else #else
@ -401,38 +401,38 @@ void recSWR()
#ifdef REC_STORES #ifdef REC_STORES
iFlushCall(FLUSH_FULLVTLB); iFlushCall(FLUSH_FULLVTLB);
_eeMoveGPRtoR(ecx, _Rs_); _eeMoveGPRtoR(arg1regd, _Rs_);
if (_Imm_ != 0) if (_Imm_ != 0)
xADD(ecx, _Imm_); xADD(arg1regd, _Imm_);
// edi = bit offset in word // edi = bit offset in word
xMOV(edi, ecx); xMOV(calleeSavedReg1d, arg1regd);
xAND(edi, 3); xAND(calleeSavedReg1d, 3);
xSHL(edi, 3); xSHL(calleeSavedReg1d, 3);
xAND(ecx, ~3); xAND(arg1regd, ~3);
vtlb_DynGenRead32(32, false); vtlb_DynGenRead32(32, false);
// mask read -> edx // mask read -> edx
xMOV(ecx, 24); xMOV(ecx, 24);
xSUB(ecx, edi); xSUB(ecx, calleeSavedReg1d);
xMOV(edx, 0xffffff); xMOV(arg2regd, 0xffffff);
xSHR(edx, cl); xSHR(arg2regd, cl);
xAND(edx, eax); xAND(arg2regd, eax);
if (_Rt_) if (_Rt_)
{ {
// mask write and OR -> edx // mask write and OR -> edx
xMOV(ecx, edi); xMOV(ecx, calleeSavedReg1d);
_eeMoveGPRtoR(eax, _Rt_); _eeMoveGPRtoR(eax, _Rt_);
xSHL(eax, cl); xSHL(eax, cl);
xOR(edx, eax); xOR(arg2regd, eax);
} }
_eeMoveGPRtoR(ecx, _Rs_); _eeMoveGPRtoR(arg1regd, _Rs_);
if (_Imm_ != 0) if (_Imm_ != 0)
xADD(ecx, _Imm_); xADD(arg1regd, _Imm_);
xAND(ecx, ~3); xAND(arg1regd, ~3);
vtlb_DynGenWrite(32); vtlb_DynGenWrite(32);
#else #else
@ -512,9 +512,9 @@ void recLWC1()
} }
else else
{ {
_eeMoveGPRtoR(ecx, _Rs_); _eeMoveGPRtoR(arg1regd, _Rs_);
if (_Imm_ != 0) if (_Imm_ != 0)
xADD(ecx, _Imm_); xADD(arg1regd, _Imm_);
iFlushCall(FLUSH_FULLVTLB); iFlushCall(FLUSH_FULLVTLB);
@ -536,7 +536,7 @@ void recSWC1()
#else #else
_deleteFPtoXMMreg(_Rt_, 1); _deleteFPtoXMMreg(_Rt_, 1);
xMOV(edx, ptr32[&fpuRegs.fpr[_Rt_].UL] ); xMOV(arg2regd, ptr32[&fpuRegs.fpr[_Rt_].UL] );
if( GPR_IS_CONST1( _Rs_ ) ) if( GPR_IS_CONST1( _Rs_ ) )
{ {
@ -545,9 +545,9 @@ void recSWC1()
} }
else else
{ {
_eeMoveGPRtoR(ecx, _Rs_); _eeMoveGPRtoR(arg1regd, _Rs_);
if (_Imm_ != 0) if (_Imm_ != 0)
xADD(ecx, _Imm_); xADD(arg1regd, _Imm_);
iFlushCall(FLUSH_FULLVTLB); iFlushCall(FLUSH_FULLVTLB);
@ -574,9 +574,9 @@ void recSWC1()
void recLQC2() void recLQC2()
{ {
if (_Rt_) if (_Rt_)
xMOV(edx, (uptr)&VU0.VF[_Ft_].UD[0]); xLEA(arg2reg, ptr[&VU0.VF[_Ft_].UD[0]]);
else else
xMOV(edx, (uptr)&dummyValue[0]); xLEA(arg2reg, ptr[&dummyValue[0]]);
if (GPR_IS_CONST1(_Rs_)) if (GPR_IS_CONST1(_Rs_))
{ {
@ -586,9 +586,9 @@ void recLQC2()
} }
else else
{ {
_eeMoveGPRtoR(ecx, _Rs_); _eeMoveGPRtoR(arg1regd, _Rs_);
if (_Imm_ != 0) if (_Imm_ != 0)
xADD(ecx, _Imm_); xADD(arg1regd, _Imm_);
iFlushCall(FLUSH_FULLVTLB); iFlushCall(FLUSH_FULLVTLB);
@ -602,7 +602,7 @@ void recLQC2()
void recSQC2() void recSQC2()
{ {
xMOV(edx, (uptr)&VU0.VF[_Ft_].UD[0]); xLEA(arg2reg, ptr[&VU0.VF[_Ft_].UD[0]]);
if (GPR_IS_CONST1(_Rs_)) if (GPR_IS_CONST1(_Rs_))
{ {
@ -611,9 +611,9 @@ void recSQC2()
} }
else else
{ {
_eeMoveGPRtoR(ecx, _Rs_); _eeMoveGPRtoR(arg1regd, _Rs_);
if (_Imm_ != 0) if (_Imm_ != 0)
xADD(ecx, _Imm_); xADD(arg1regd, _Imm_);
iFlushCall(FLUSH_FULLVTLB); iFlushCall(FLUSH_FULLVTLB);

View File

@ -73,6 +73,12 @@ static void iMOV128_SSE( const xIndirectVoid& destRm, const xIndirectVoid& srcRm
// //
static void iMOV64_Smart( const xIndirectVoid& destRm, const xIndirectVoid& srcRm ) static void iMOV64_Smart( const xIndirectVoid& destRm, const xIndirectVoid& srcRm )
{ {
if (wordsize == 8) {
xMOV(rax, srcRm);
xMOV(destRm, rax);
return;
}
if( _hasFreeXMMreg() ) if( _hasFreeXMMreg() )
{ {
// Move things using MOVLPS: // Move things using MOVLPS:
@ -92,8 +98,8 @@ static void iMOV64_Smart( const xIndirectVoid& destRm, const xIndirectVoid& srcR
/* /*
// Pseudo-Code For the following Dynarec Implementations --> // Pseudo-Code For the following Dynarec Implementations -->
u32 vmv=vmap[addr>>VTLB_PAGE_BITS]; u32 vmv = vmap[addr>>VTLB_PAGE_BITS].raw();
s32 ppf=addr+vmv; sptr ppf=addr+vmv;
if (!(ppf<0)) if (!(ppf<0))
{ {
data[0]=*reinterpret_cast<DataType*>(ppf); data[0]=*reinterpret_cast<DataType*>(ppf);
@ -105,7 +111,7 @@ static void iMOV64_Smart( const xIndirectVoid& destRm, const xIndirectVoid& srcR
{ {
//has to: translate, find function, call function //has to: translate, find function, call function
u32 hand=(u8)vmv; u32 hand=(u8)vmv;
u32 paddr=ppf-hand+0x80000000; u32 paddr=(ppf-hand) << 1;
//Console.WriteLn("Translated 0x%08X to 0x%08X",params addr,paddr); //Console.WriteLn("Translated 0x%08X to 0x%08X",params addr,paddr);
return reinterpret_cast<TemplateHelper<DataSize,false>::HandlerType*>(RWFT[TemplateHelper<DataSize,false>::sidx][0][hand])(paddr,data); return reinterpret_cast<TemplateHelper<DataSize,false>::HandlerType*>(RWFT[TemplateHelper<DataSize,false>::sidx][0][hand])(paddr,data);
} }
@ -114,26 +120,28 @@ static void iMOV64_Smart( const xIndirectVoid& destRm, const xIndirectVoid& srcR
mov eax,ecx; mov eax,ecx;
shr eax,VTLB_PAGE_BITS; shr eax,VTLB_PAGE_BITS;
mov eax,[eax*4+vmap]; mov rax,[rax*wordsize+vmap];
add ecx,eax; add rcx,rax;
js _fullread; js _fullread;
//these are wrong order, just an example ... //these are wrong order, just an example ...
mov [eax],ecx; mov [rax],ecx;
mov ecx,[edx]; mov ecx,[rdx];
mov [eax+4],ecx; mov [rax+4],ecx;
mov ecx,[edx+4]; mov ecx,[rdx+4];
mov [eax+4+4],ecx; mov [rax+4+4],ecx;
mov ecx,[edx+4+4]; mov ecx,[rdx+4+4];
mov [eax+4+4+4+4],ecx; mov [rax+4+4+4+4],ecx;
mov ecx,[edx+4+4+4+4]; mov ecx,[rdx+4+4+4+4];
///.... ///....
jmp cont; jmp cont;
_fullread: _fullread:
movzx eax,al; movzx eax,al;
sub ecx,eax; sub ecx,eax;
#ifndef __M_X86_64 // The x86-64 marker will be cleared by using 32-bit ops
sub ecx,0x80000000; sub ecx,0x80000000;
#endif
call [eax+stuff]; call [eax+stuff];
cont: cont:
........ ........
@ -146,17 +154,16 @@ namespace vtlb_private
// Prepares eax, ecx, and, ebx for Direct or Indirect operations. // Prepares eax, ecx, and, ebx for Direct or Indirect operations.
// Returns the writeback pointer for ebx (return address from indirect handling) // Returns the writeback pointer for ebx (return address from indirect handling)
// //
static uptr* DynGen_PrepRegs() static u32* DynGen_PrepRegs()
{ {
// Warning dirty ebx (in case someone got the very bad idea to move this code) // Warning dirty ebx (in case someone got the very bad idea to move this code)
EE::Profiler.EmitMem(); EE::Profiler.EmitMem();
xMOV( eax, ecx ); xMOV( eax, arg1regd );
xSHR( eax, VTLB_PAGE_BITS ); xSHR( eax, VTLB_PAGE_BITS );
xMOV( eax, ptr[(eax*4) + vtlbdata.vmap] ); xMOV( rax, ptrNative[xComplexAddress(rbx, vtlbdata.vmap, rax*wordsize)] );
xMOV( ebx, 0xcdcdcdcd ); u32* writeback = xLEA_Writeback( rbx );
uptr* writeback = ((uptr*)xGetPtr()) - 1; xADD( arg1reg, rax );
xADD( ecx, eax );
return writeback; return writeback;
} }
@ -168,28 +175,28 @@ namespace vtlb_private
{ {
case 8: case 8:
if( sign ) if( sign )
xMOVSX( eax, ptr8[ecx] ); xMOVSX( eax, ptr8[arg1reg] );
else else
xMOVZX( eax, ptr8[ecx] ); xMOVZX( eax, ptr8[arg1reg] );
break; break;
case 16: case 16:
if( sign ) if( sign )
xMOVSX( eax, ptr16[ecx] ); xMOVSX( eax, ptr16[arg1reg] );
else else
xMOVZX( eax, ptr16[ecx] ); xMOVZX( eax, ptr16[arg1reg] );
break; break;
case 32: case 32:
xMOV( eax, ptr[ecx] ); xMOV( eax, ptr[arg1reg] );
break; break;
case 64: case 64:
iMOV64_Smart( ptr[edx], ptr[ecx] ); iMOV64_Smart( ptr[arg2reg], ptr[arg1reg] );
break; break;
case 128: case 128:
iMOV128_SSE( ptr[edx], ptr[ecx] ); iMOV128_SSE( ptr[arg2reg], ptr[arg1reg] );
break; break;
jNO_DEFAULT jNO_DEFAULT
@ -199,27 +206,29 @@ namespace vtlb_private
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
static void DynGen_DirectWrite( u32 bits ) static void DynGen_DirectWrite( u32 bits )
{ {
// TODO: x86Emitter can't use dil (and xRegister8(rdi.Id) is not dil)
switch(bits) switch(bits)
{ {
//8 , 16, 32 : data on EDX //8 , 16, 32 : data on EDX
case 8: case 8:
xMOV( ptr[ecx], dl ); xMOV( edx, arg2regd );
xMOV( ptr[arg1reg], dl );
break; break;
case 16: case 16:
xMOV( ptr[ecx], dx ); xMOV( ptr[arg1reg], xRegister16(arg2reg.Id) );
break; break;
case 32: case 32:
xMOV( ptr[ecx], edx ); xMOV( ptr[arg1reg], arg2regd );
break; break;
case 64: case 64:
iMOV64_Smart( ptr[ecx], ptr[edx] ); iMOV64_Smart( ptr[arg1reg], ptr[arg2reg] );
break; break;
case 128: case 128:
iMOV128_SSE( ptr[ecx], ptr[edx] ); iMOV128_SSE( ptr[arg1reg], ptr[arg2reg] );
break; break;
} }
} }
@ -274,15 +283,23 @@ static void DynGen_IndirectDispatch( int mode, int bits, bool sign = false )
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
// Generates the various instances of the indirect dispatchers // Generates the various instances of the indirect dispatchers
// In: arg1reg: vtlb entry, arg2reg: data ptr (if mode >= 64), rbx: function return ptr
// Out: eax: result (if mode < 64)
static void DynGen_IndirectTlbDispatcher( int mode, int bits, bool sign ) static void DynGen_IndirectTlbDispatcher( int mode, int bits, bool sign )
{ {
xMOVZX( eax, al ); xMOVZX( eax, al );
xSUB( ecx, 0x80000000 ); if (wordsize != 8) xSUB( arg1regd, 0x80000000 );
xSUB( ecx, eax ); xSUB( arg1regd, eax );
// jump to the indirect handler, which is a __fastcall C++ function. // jump to the indirect handler, which is a __fastcall C++ function.
// [ecx is address, edx is data] // [ecx is address, edx is data]
xFastCall(ptr32[(eax*4) + vtlbdata.RWFT[bits][mode]], ecx, edx); sptr table = (sptr)vtlbdata.RWFT[bits][mode];
if (table == (s32)table) {
xFastCall(ptrNative[(rax*wordsize) + table], arg1reg, arg2reg);
} else {
xLEA(arg3reg, ptr[(void*)table]);
xFastCall(ptrNative[(rax*wordsize) + arg3reg], arg1reg, arg2reg);
}
if (!mode) if (!mode)
{ {
@ -302,7 +319,7 @@ static void DynGen_IndirectTlbDispatcher( int mode, int bits, bool sign )
} }
} }
xJMP( ebx ); xJMP( rbx );
} }
// One-time initialization procedure. Multiple subsequent calls during the lifespan of the // One-time initialization procedure. Multiple subsequent calls during the lifespan of the
@ -338,18 +355,30 @@ void vtlb_dynarec_init()
Perf::any.map((uptr)m_IndirectDispatchers, __pagesize, "TLB Dispatcher"); Perf::any.map((uptr)m_IndirectDispatchers, __pagesize, "TLB Dispatcher");
} }
static void vtlb_SetWriteback(u32 *writeback)
{
uptr val = (uptr)xGetPtr();
if (wordsize == 8)
{
pxAssertMsg(*((u8*)writeback - 2) == 0x8d, "Expected codegen to be an LEA");
val -= ((uptr)writeback + 4);
}
pxAssertMsg((sptr)val == (s32)val, "Writeback too far away!");
*writeback = val;
}
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
// Dynarec Load Implementations // Dynarec Load Implementations
void vtlb_DynGenRead64(u32 bits) void vtlb_DynGenRead64(u32 bits)
{ {
pxAssume( bits == 64 || bits == 128 ); pxAssume( bits == 64 || bits == 128 );
uptr* writeback = DynGen_PrepRegs(); u32* writeback = DynGen_PrepRegs();
DynGen_IndirectDispatch( 0, bits ); DynGen_IndirectDispatch( 0, bits );
DynGen_DirectRead( bits, false ); DynGen_DirectRead( bits, false );
*writeback = (uptr)xGetPtr(); // return target for indirect's call/ret vtlb_SetWriteback(writeback); // return target for indirect's call/ret
} }
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
@ -360,12 +389,12 @@ void vtlb_DynGenRead32(u32 bits, bool sign)
{ {
pxAssume( bits <= 32 ); pxAssume( bits <= 32 );
uptr* writeback = DynGen_PrepRegs(); u32* writeback = DynGen_PrepRegs();
DynGen_IndirectDispatch( 0, bits, sign && bits < 32 ); DynGen_IndirectDispatch( 0, bits, sign && bits < 32 );
DynGen_DirectRead( bits, sign ); DynGen_DirectRead( bits, sign );
*writeback = (uptr)xGetPtr(); vtlb_SetWriteback(writeback);
} }
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
@ -378,15 +407,15 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const )
auto vmv = vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS]; auto vmv = vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS];
if( !vmv.isHandler(addr_const) ) if( !vmv.isHandler(addr_const) )
{ {
auto ppf = vmv.assumeHandlerGetPAddr(addr_const); auto ppf = vmv.assumePtr(addr_const);
switch( bits ) switch( bits )
{ {
case 64: case 64:
iMOV64_Smart( ptr[edx], ptr[(void*)ppf] ); iMOV64_Smart( ptr[arg2reg], ptr[(void*)ppf] );
break; break;
case 128: case 128:
iMOV128_SSE( ptr[edx], ptr[(void*)ppf] ); iMOV128_SSE( ptr[arg2reg], ptr[(void*)ppf] );
break; break;
jNO_DEFAULT jNO_DEFAULT
@ -405,7 +434,7 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const )
} }
iFlushCall(FLUSH_FULLVTLB); iFlushCall(FLUSH_FULLVTLB);
xFastCall( vmv.assumeHandlerGetRaw(szidx, 0), paddr ); xFastCall( vmv.assumeHandlerGetRaw(szidx, 0), paddr, arg2reg );
} }
} }
@ -442,7 +471,7 @@ void vtlb_DynGenRead32_Const( u32 bits, bool sign, u32 addr_const )
break; break;
case 32: case 32:
xMOV( eax, ptr[(void*)ppf] ); xMOV( eax, ptr32[(u32*)ppf] );
break; break;
} }
} }
@ -494,12 +523,12 @@ void vtlb_DynGenRead32_Const( u32 bits, bool sign, u32 addr_const )
void vtlb_DynGenWrite(u32 sz) void vtlb_DynGenWrite(u32 sz)
{ {
uptr* writeback = DynGen_PrepRegs(); u32* writeback = DynGen_PrepRegs();
DynGen_IndirectDispatch( 1, sz ); DynGen_IndirectDispatch( 1, sz );
DynGen_DirectWrite( sz ); DynGen_DirectWrite( sz );
*writeback = (uptr)xGetPtr(); vtlb_SetWriteback(writeback);
} }
@ -514,28 +543,30 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const )
auto vmv = vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS]; auto vmv = vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS];
if( !vmv.isHandler(addr_const) ) if( !vmv.isHandler(addr_const) )
{ {
// TODO: x86Emitter can't use dil (and xRegister8(rdi.Id) is not dil)
auto ppf = vmv.assumePtr(addr_const); auto ppf = vmv.assumePtr(addr_const);
switch(bits) switch(bits)
{ {
//8 , 16, 32 : data on EDX //8 , 16, 32 : data on arg2
case 8: case 8:
xMOV( edx, arg2regd );
xMOV( ptr[(void*)ppf], dl ); xMOV( ptr[(void*)ppf], dl );
break; break;
case 16: case 16:
xMOV( ptr[(void*)ppf], dx ); xMOV( ptr[(void*)ppf], xRegister16(arg2reg.Id) );
break; break;
case 32: case 32:
xMOV( ptr[(void*)ppf], edx ); xMOV( ptr[(void*)ppf], arg2regd );
break; break;
case 64: case 64:
iMOV64_Smart( ptr[(void*)ppf], ptr[edx] ); iMOV64_Smart( ptr[(void*)ppf], ptr[arg2reg] );
break; break;
case 128: case 128:
iMOV128_SSE( ptr[(void*)ppf], ptr[edx] ); iMOV128_SSE( ptr[(void*)ppf], ptr[arg2reg] );
break; break;
} }
@ -556,7 +587,7 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const )
} }
iFlushCall(FLUSH_FULLVTLB); iFlushCall(FLUSH_FULLVTLB);
xFastCall( vmv.assumeHandlerGetRaw(szidx, true), paddr, edx ); xFastCall( vmv.assumeHandlerGetRaw(szidx, true), paddr, arg2reg );
} }
} }
@ -565,13 +596,14 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const )
// ecx - virtual address // ecx - virtual address
// Returns physical address in eax. // Returns physical address in eax.
// Clobbers edx
void vtlb_DynV2P() void vtlb_DynV2P()
{ {
xMOV(eax, ecx); xMOV(eax, ecx);
xAND(ecx, VTLB_PAGE_MASK); // vaddr & VTLB_PAGE_MASK xAND(ecx, VTLB_PAGE_MASK); // vaddr & VTLB_PAGE_MASK
xSHR(eax, VTLB_PAGE_BITS); xSHR(eax, VTLB_PAGE_BITS);
xMOV(eax, ptr[(eax*4) + vtlbdata.ppmap]); //vtlbdata.ppmap[vaddr>>VTLB_PAGE_BITS]; xMOV(eax, ptr[xComplexAddress(rdx, vtlbdata.ppmap, rax*4)]); //vtlbdata.ppmap[vaddr>>VTLB_PAGE_BITS];
xOR(eax, ecx); xOR(eax, ecx);
} }

View File

@ -178,25 +178,25 @@ void normJumpCompile(mV, microFlagCycles& mFC, bool isEvilJump) {
mVUpBlock->jumpCache = new microJumpCache[mProgSize/2]; mVUpBlock->jumpCache = new microJumpCache[mProgSize/2];
} }
if (isEvilJump) xMOV(gprT2, ptr32[&mVU.evilBranch]); if (isEvilJump) xMOV(arg1regd, ptr32[&mVU.evilBranch]);
else xMOV(gprT2, ptr32[&mVU.branch]); else xMOV(arg1regd, ptr32[&mVU.branch]);
if (doJumpCaching) xMOV(gprT3, (uptr)mVUpBlock); if (doJumpCaching) xLoadFarAddr(arg2reg, mVUpBlock);
else xMOV(gprT3, (uptr)&mVUpBlock->pStateEnd); else xLoadFarAddr(arg2reg, &mVUpBlock->pStateEnd);
if(mVUup.eBit && isEvilJump)// E-bit EvilJump if(mVUup.eBit && isEvilJump)// E-bit EvilJump
{ {
//Xtreme G 3 does 2 conditional jumps, the first contains an E Bit on the first instruction //Xtreme G 3 does 2 conditional jumps, the first contains an E Bit on the first instruction
//So if it is taken, you need to end the program, else you get infinite loops. //So if it is taken, you need to end the program, else you get infinite loops.
mVUendProgram(mVU, &mFC, 2); mVUendProgram(mVU, &mFC, 2);
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], gprT2); xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], arg1regd);
xJMP(mVU.exitFunct); xJMP(mVU.exitFunct);
} }
if (!mVU.index) xFastCall((void*)(void(*)())mVUcompileJIT<0>, gprT2, gprT3); //(u32 startPC, uptr pState) if (!mVU.index) xFastCall((void*)(void(*)())mVUcompileJIT<0>, arg1reg, arg2reg); //(u32 startPC, uptr pState)
else xFastCall((void*)(void(*)())mVUcompileJIT<1>, gprT2, gprT3); else xFastCall((void*)(void(*)())mVUcompileJIT<1>, arg1reg, arg2reg);
mVUrestoreRegs(mVU); mVUrestoreRegs(mVU);
xJMP(gprT1); // Jump to rec-code address xJMP(gprT1q); // Jump to rec-code address
} }
void normBranch(mV, microFlagCycles& mFC) { void normBranch(mV, microFlagCycles& mFC) {

View File

@ -27,8 +27,8 @@ void mVUdispatcherAB(mV) {
xScopedStackFrame frame(false, true); xScopedStackFrame frame(false, true);
// __fastcall = The caller has already put the needed parameters in ecx/edx: // __fastcall = The caller has already put the needed parameters in ecx/edx:
if (!isVU1) { xFastCall((void*)mVUexecuteVU0, ecx, edx); } if (!isVU1) { xFastCall((void*)mVUexecuteVU0, arg1reg, arg2reg); }
else { xFastCall((void*)mVUexecuteVU1, ecx, edx); } else { xFastCall((void*)mVUexecuteVU1, arg1reg, arg2reg); }
// Load VU's MXCSR state // Load VU's MXCSR state
xLDMXCSR(g_sseVUMXCSR); xLDMXCSR(g_sseVUMXCSR);
@ -52,7 +52,7 @@ void mVUdispatcherAB(mV) {
xSHUF.PS(xmmPQ, xmmT1, 0); // wzyx = PPQQ xSHUF.PS(xmmPQ, xmmT1, 0); // wzyx = PPQQ
// Jump to Recompiled Code Block // Jump to Recompiled Code Block
xJMP(eax); xJMP(rax);
mVU.exitFunct = x86Ptr; mVU.exitFunct = x86Ptr;
@ -89,7 +89,7 @@ void mVUdispatcherCD(mV) {
xMOV(gprF3, ptr32[&mVU.statFlag[3]]); xMOV(gprF3, ptr32[&mVU.statFlag[3]]);
// Jump to Recompiled Code Block // Jump to Recompiled Code Block
xJMP(ptr32[&mVU.resumePtrXG]); xJMP(ptrNative[&mVU.resumePtrXG]);
mVU.exitFunctXG = x86Ptr; mVU.exitFunctXG = x86Ptr;

View File

@ -283,7 +283,7 @@ __fi void mVUsetupFlags(mV, microFlagCycles& mFC) {
xMOV(gprF3, getFlagReg(bStatus[3])); xMOV(gprF3, getFlagReg(bStatus[3]));
} }
else if (sortRegs == 2) { else if (sortRegs == 2) {
xMOV(gprT1, getFlagReg (bStatus[3])); xMOV(gprT1, getFlagReg (bStatus[3]));
xMOV(gprF0, getFlagReg (bStatus[0])); xMOV(gprF0, getFlagReg (bStatus[0]));
xMOV(gprF1, getFlagReg2(bStatus[1])); xMOV(gprF1, getFlagReg2(bStatus[1]));
xMOV(gprF2, getFlagReg2(bStatus[2])); xMOV(gprF2, getFlagReg2(bStatus[2]));
@ -291,7 +291,7 @@ __fi void mVUsetupFlags(mV, microFlagCycles& mFC) {
} }
else if (sortRegs == 3) { else if (sortRegs == 3) {
int gFlag = (bStatus[0] == bStatus[1]) ? bStatus[2] : bStatus[1]; int gFlag = (bStatus[0] == bStatus[1]) ? bStatus[2] : bStatus[1];
xMOV(gprT1, getFlagReg (gFlag)); xMOV(gprT1, getFlagReg (gFlag));
xMOV(gprT2, getFlagReg (bStatus[3])); xMOV(gprT2, getFlagReg (bStatus[3]));
xMOV(gprF0, getFlagReg (bStatus[0])); xMOV(gprF0, getFlagReg (bStatus[0]));
xMOV(gprF1, getFlagReg3(bStatus[1])); xMOV(gprF1, getFlagReg3(bStatus[1]));
@ -299,12 +299,12 @@ __fi void mVUsetupFlags(mV, microFlagCycles& mFC) {
xMOV(gprF3, gprT2); xMOV(gprF3, gprT2);
} }
else { else {
xMOV(gprT1, getFlagReg(bStatus[0])); xMOV(gprT1, getFlagReg(bStatus[0]));
xMOV(gprT2, getFlagReg(bStatus[1])); xMOV(gprT2, getFlagReg(bStatus[1]));
xMOV(gprT3, getFlagReg(bStatus[2])); xMOV(gprT3, getFlagReg(bStatus[2]));
xMOV(gprF3, getFlagReg(bStatus[3])); xMOV(gprF3, getFlagReg(bStatus[3]));
xMOV(gprF0, gprT1); xMOV(gprF0, gprT1);
xMOV(gprF1, gprT2); xMOV(gprF1, gprT2);
xMOV(gprF2, gprT3); xMOV(gprF2, gprT3);
} }
} }

View File

@ -841,16 +841,14 @@ mVUop(mVU_ILW) {
analyzeVIreg2(mVU, _It_, mVUlow.VI_write, 4); analyzeVIreg2(mVU, _It_, mVUlow.VI_write, 4);
} }
pass2 { pass2 {
xAddressVoid ptr(mVU.regs().Mem + offsetSS); void *ptr = mVU.regs().Mem + offsetSS;
mVUallocVIa(mVU, gprT2, _Is_); mVUallocVIa(mVU, gprT2, _Is_);
if (!_Is_) if (!_Is_)
xXOR(gprT2, gprT2); xXOR(gprT2, gprT2);
xADD(gprT2, _Imm11_); xADD(gprT2, _Imm11_);
mVUaddrFix (mVU, gprT2); mVUaddrFix (mVU, gprT2q);
ptr += gprT2; xMOVZX(gprT1, ptr16[xComplexAddress(gprT3q, ptr, gprT2q)]);
xMOVZX(gprT1, ptr16[ptr]);
mVUallocVIb(mVU, gprT1, _It_); mVUallocVIb(mVU, gprT1, _It_);
mVU.profiler.EmitOp(opILW); mVU.profiler.EmitOp(opILW);
} }
@ -866,13 +864,14 @@ mVUop(mVU_ILWR) {
analyzeVIreg2(mVU, _It_, mVUlow.VI_write, 4); analyzeVIreg2(mVU, _It_, mVUlow.VI_write, 4);
} }
pass2 { pass2 {
xAddressVoid ptr(mVU.regs().Mem + offsetSS); void *ptr = mVU.regs().Mem + offsetSS;
if (_Is_) { if (_Is_) {
mVUallocVIa(mVU, gprT2, _Is_); mVUallocVIa(mVU, gprT2, _Is_);
mVUaddrFix (mVU, gprT2); mVUaddrFix (mVU, gprT2q);
ptr += gprT2; xMOVZX(gprT1, ptr16[xComplexAddress(gprT3q, ptr, gprT2q)]);
} else {
xMOVZX(gprT1, ptr16[ptr]);
} }
xMOVZX(gprT1, ptr16[ptr]);
mVUallocVIb(mVU, gprT1, _It_); mVUallocVIb(mVU, gprT1, _It_);
mVU.profiler.EmitOp(opILWR); mVU.profiler.EmitOp(opILWR);
} }
@ -883,26 +882,44 @@ mVUop(mVU_ILWR) {
// ISW/ISWR // ISW/ISWR
//------------------------------------------------------------------ //------------------------------------------------------------------
static void writeBackISW(microVU& mVU, void *base_ptr, xAddressReg reg) {
if (!reg.IsEmpty() && (sptr)base_ptr != (s32)(sptr)base_ptr) {
int register_offset = -1;
auto writeBackAt = [&](int offset){
if (register_offset == -1) {
xLEA(gprT3q, ptr[(void*)((sptr)base_ptr + offset)]);
register_offset = offset;
}
xMOV(ptr32[gprT3q+gprT2q+(offset-register_offset)], gprT1);
};
if (_X) writeBackAt(0);
if (_Y) writeBackAt(4);
if (_Z) writeBackAt(8);
if (_W) writeBackAt(12);
} else {
if (_X) xMOV(ptr32[base_ptr+reg], gprT1);
if (_Y) xMOV(ptr32[base_ptr+reg+4], gprT1);
if (_Z) xMOV(ptr32[base_ptr+reg+8], gprT1);
if (_W) xMOV(ptr32[base_ptr+reg+12], gprT1);
}
}
mVUop(mVU_ISW) { mVUop(mVU_ISW) {
pass1 { pass1 {
analyzeVIreg1(mVU, _Is_, mVUlow.VI_read[0]); analyzeVIreg1(mVU, _Is_, mVUlow.VI_read[0]);
analyzeVIreg1(mVU, _It_, mVUlow.VI_read[1]); analyzeVIreg1(mVU, _It_, mVUlow.VI_read[1]);
} }
pass2 { pass2 {
xAddressVoid ptr(mVU.regs().Mem); void *ptr = mVU.regs().Mem;
mVUallocVIa(mVU, gprT2, _Is_); mVUallocVIa(mVU, gprT2, _Is_);
if (!_Is_) if (!_Is_)
xXOR(gprT2, gprT2); xXOR(gprT2, gprT2);
xADD(gprT2, _Imm11_); xADD(gprT2, _Imm11_);
mVUaddrFix (mVU, gprT2); mVUaddrFix (mVU, gprT2);
ptr += gprT2;
mVUallocVIa(mVU, gprT1, _It_); mVUallocVIa(mVU, gprT1, _It_);
if (_X) xMOV(ptr32[ptr], gprT1); writeBackISW(mVU, ptr, gprT2);
if (_Y) xMOV(ptr32[ptr+4], gprT1);
if (_Z) xMOV(ptr32[ptr+8], gprT1);
if (_W) xMOV(ptr32[ptr+12], gprT1);
mVU.profiler.EmitOp(opISW); mVU.profiler.EmitOp(opISW);
} }
pass3 { mVUlog("ISW.%s vi%02d, vi%02d + %d", _XYZW_String, _Ft_, _Fs_, _Imm11_); } pass3 { mVUlog("ISW.%s vi%02d, vi%02d + %d", _XYZW_String, _Ft_, _Fs_, _Imm11_); }
@ -913,17 +930,16 @@ mVUop(mVU_ISWR) {
analyzeVIreg1(mVU, _Is_, mVUlow.VI_read[0]); analyzeVIreg1(mVU, _Is_, mVUlow.VI_read[0]);
analyzeVIreg1(mVU, _It_, mVUlow.VI_read[1]); } analyzeVIreg1(mVU, _It_, mVUlow.VI_read[1]); }
pass2 { pass2 {
xAddressVoid ptr(mVU.regs().Mem); void *ptr = mVU.regs().Mem;
xAddressReg is = xEmptyReg;
if (_Is_) { if (_Is_) {
mVUallocVIa(mVU, gprT2, _Is_); mVUallocVIa(mVU, gprT2, _Is_);
mVUaddrFix (mVU, gprT2); mVUaddrFix (mVU, gprT2q);
ptr += gprT2; is = gprT2q;
} }
mVUallocVIa(mVU, gprT1, _It_); mVUallocVIa(mVU, gprT1, _It_);
if (_X) xMOV(ptr32[ptr], gprT1); writeBackISW(mVU, ptr, is);
if (_Y) xMOV(ptr32[ptr+4], gprT1);
if (_Z) xMOV(ptr32[ptr+8], gprT1);
if (_W) xMOV(ptr32[ptr+12], gprT1);
mVU.profiler.EmitOp(opISWR); mVU.profiler.EmitOp(opISWR);
} }
pass3 { mVUlog("ISWR.%s vi%02d, vi%02d", _XYZW_String, _Ft_, _Fs_); } pass3 { mVUlog("ISWR.%s vi%02d, vi%02d", _XYZW_String, _Ft_, _Fs_); }
@ -936,16 +952,15 @@ mVUop(mVU_ISWR) {
mVUop(mVU_LQ) { mVUop(mVU_LQ) {
pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, false); } pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, false); }
pass2 { pass2 {
xAddressVoid ptr(mVU.regs().Mem); void *ptr = mVU.regs().Mem;
mVUallocVIa(mVU, gprT2, _Is_); mVUallocVIa(mVU, gprT2, _Is_);
if (!_Is_) if (!_Is_)
xXOR(gprT2, gprT2); xXOR(gprT2, gprT2);
xADD(gprT2, _Imm11_); xADD(gprT2, _Imm11_);
mVUaddrFix(mVU, gprT2); mVUaddrFix(mVU, gprT2q);
ptr += gprT2;
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
mVUloadReg(Ft, ptr, _X_Y_Z_W); mVUloadReg(Ft, xComplexAddress(gprT3q, ptr, gprT2q), _X_Y_Z_W);
mVU.regAlloc->clearNeeded(Ft); mVU.regAlloc->clearNeeded(Ft);
mVU.profiler.EmitOp(opLQ); mVU.profiler.EmitOp(opLQ);
} }
@ -955,18 +970,25 @@ mVUop(mVU_LQ) {
mVUop(mVU_LQD) { mVUop(mVU_LQD) {
pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, true); } pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, true); }
pass2 { pass2 {
xAddressVoid ptr(mVU.regs().Mem); void * ptr = mVU.regs().Mem;
xAddressReg is = xEmptyReg;
if (_Is_ || isVU0) { // Access VU1 regs mem-map in !_Is_ case if (_Is_ || isVU0) { // Access VU1 regs mem-map in !_Is_ case
mVUallocVIa(mVU, gprT2, _Is_); mVUallocVIa(mVU, gprT2, _Is_);
xSUB(gprT2b, 1); xSUB(gprT2b, 1);
if (_Is_) mVUallocVIb(mVU, gprT2, _Is_); if (_Is_) mVUallocVIb(mVU, gprT2, _Is_);
mVUaddrFix (mVU, gprT2); mVUaddrFix (mVU, gprT2q);
ptr += gprT2; is = gprT2q;
}
else {
ptr = (void*)((sptr)ptr + (0xffff & (mVU.microMemSize-8)));
} }
else ptr += (0xffff & (mVU.microMemSize-8));
if (!mVUlow.noWriteVF) { if (!mVUlow.noWriteVF) {
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
mVUloadReg(Ft, ptr, _X_Y_Z_W); if (is.IsEmpty()) {
mVUloadReg(Ft, xAddressVoid(ptr), _X_Y_Z_W);
} else {
mVUloadReg(Ft, xComplexAddress(gprT3q, ptr, is), _X_Y_Z_W);
}
mVU.regAlloc->clearNeeded(Ft); mVU.regAlloc->clearNeeded(Ft);
} }
mVU.profiler.EmitOp(opLQD); mVU.profiler.EmitOp(opLQD);
@ -977,18 +999,23 @@ mVUop(mVU_LQD) {
mVUop(mVU_LQI) { mVUop(mVU_LQI) {
pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, true); } pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, true); }
pass2 { pass2 {
xAddressVoid ptr(mVU.regs().Mem); void *ptr = mVU.regs().Mem;
xAddressReg is = xEmptyReg;
if (_Is_) { if (_Is_) {
mVUallocVIa(mVU, gprT1, _Is_); mVUallocVIa(mVU, gprT1, _Is_);
xMOV(gprT2, gprT1); xMOV(gprT2, gprT1);
xADD(gprT1b, 1); xADD(gprT1b, 1);
mVUallocVIb(mVU, gprT1, _Is_); mVUallocVIb(mVU, gprT1, _Is_);
mVUaddrFix (mVU, gprT2); mVUaddrFix (mVU, gprT2q);
ptr += gprT2; is = gprT2q;
} }
if (!mVUlow.noWriteVF) { if (!mVUlow.noWriteVF) {
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
mVUloadReg(Ft, ptr, _X_Y_Z_W); if (is.IsEmpty()) {
mVUloadReg(Ft, xAddressVoid(ptr), _X_Y_Z_W);
} else {
mVUloadReg(Ft, xComplexAddress(gprT3q, ptr, is), _X_Y_Z_W);
}
mVU.regAlloc->clearNeeded(Ft); mVU.regAlloc->clearNeeded(Ft);
} }
mVU.profiler.EmitOp(opLQI); mVU.profiler.EmitOp(opLQI);
@ -1003,17 +1030,16 @@ mVUop(mVU_LQI) {
mVUop(mVU_SQ) { mVUop(mVU_SQ) {
pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, false); } pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, false); }
pass2 { pass2 {
xAddressVoid ptr(mVU.regs().Mem); void * ptr = mVU.regs().Mem;
mVUallocVIa(mVU, gprT2, _It_); mVUallocVIa(mVU, gprT2, _It_);
if (!_It_) if (!_It_)
xXOR(gprT2, gprT2); xXOR(gprT2, gprT2);
xADD(gprT2, _Imm11_); xADD(gprT2, _Imm11_);
mVUaddrFix(mVU, gprT2); mVUaddrFix(mVU, gprT2q);
ptr += gprT2;
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
mVUsaveReg(Fs, ptr, _X_Y_Z_W, 1); mVUsaveReg(Fs, xComplexAddress(gprT3q, ptr, gprT2q), _X_Y_Z_W, 1);
mVU.regAlloc->clearNeeded(Fs); mVU.regAlloc->clearNeeded(Fs);
mVU.profiler.EmitOp(opSQ); mVU.profiler.EmitOp(opSQ);
} }
@ -1023,17 +1049,24 @@ mVUop(mVU_SQ) {
mVUop(mVU_SQD) { mVUop(mVU_SQD) {
pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, true); } pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, true); }
pass2 { pass2 {
xAddressVoid ptr(mVU.regs().Mem); void *ptr = mVU.regs().Mem;
xAddressReg it = xEmptyReg;
if (_It_ || isVU0) {// Access VU1 regs mem-map in !_It_ case if (_It_ || isVU0) {// Access VU1 regs mem-map in !_It_ case
mVUallocVIa(mVU, gprT2, _It_); mVUallocVIa(mVU, gprT2, _It_);
xSUB(gprT2b, 1); xSUB(gprT2b, 1);
if (_It_) mVUallocVIb(mVU, gprT2, _It_); if (_It_) mVUallocVIb(mVU, gprT2, _It_);
mVUaddrFix (mVU, gprT2); mVUaddrFix (mVU, gprT2q);
ptr += gprT2; it = gprT2q;
}
else {
ptr = (void*)((sptr)ptr + (0xffff & (mVU.microMemSize-8)));
} }
else ptr += (0xffff & (mVU.microMemSize-8));
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
mVUsaveReg(Fs, ptr, _X_Y_Z_W, 1); if (it.IsEmpty()) {
mVUsaveReg(Fs, xAddressVoid(ptr), _X_Y_Z_W, 1);
} else {
mVUsaveReg(Fs, xComplexAddress(gprT3q, ptr, it), _X_Y_Z_W, 1);
}
mVU.regAlloc->clearNeeded(Fs); mVU.regAlloc->clearNeeded(Fs);
mVU.profiler.EmitOp(opSQD); mVU.profiler.EmitOp(opSQD);
} }
@ -1043,17 +1076,20 @@ mVUop(mVU_SQD) {
mVUop(mVU_SQI) { mVUop(mVU_SQI) {
pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, true); } pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, true); }
pass2 { pass2 {
xAddressVoid ptr(mVU.regs().Mem); void *ptr = mVU.regs().Mem;
if (_It_) { if (_It_) {
mVUallocVIa(mVU, gprT1, _It_); mVUallocVIa(mVU, gprT1, _It_);
xMOV(gprT2, gprT1); xMOV(gprT2, gprT1);
xADD(gprT1b, 1); xADD(gprT1b, 1);
mVUallocVIb(mVU, gprT1, _It_); mVUallocVIb(mVU, gprT1, _It_);
mVUaddrFix (mVU, gprT2); mVUaddrFix (mVU, gprT2q);
ptr += gprT2;
} }
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
mVUsaveReg(Fs, ptr, _X_Y_Z_W, 1); if (_It_) {
mVUsaveReg(Fs, xComplexAddress(gprT3q, ptr, gprT2q), _X_Y_Z_W, 1);
} else {
mVUsaveReg(Fs, xAddressVoid(ptr), _X_Y_Z_W, 1);
}
mVU.regAlloc->clearNeeded(Fs); mVU.regAlloc->clearNeeded(Fs);
mVU.profiler.EmitOp(opSQI); mVU.profiler.EmitOp(opSQI);
} }
@ -1409,7 +1445,7 @@ void normJumpPass2(mV) {
if (!mVUlow.evilBranch) { xMOV(ptr32[&mVU.branch], gprT1 ); } if (!mVUlow.evilBranch) { xMOV(ptr32[&mVU.branch], gprT1 ); }
else { xMOV(ptr32[&mVU.evilBranch], gprT1 ); } else { xMOV(ptr32[&mVU.evilBranch], gprT1 ); }
//If delay slot is conditional, it uses badBranch to go to its target //If delay slot is conditional, it uses badBranch to go to its target
if (mVUlow.badBranch) { xADD(gprT1, 8); xMOV(ptr32[&mVU.badBranch], gprT1); } if (mVUlow.badBranch) { xADD(gprT1, 8); xMOV(ptr32[&mVU.badBranch], gprT1); }
} }
} }

View File

@ -373,7 +373,8 @@ static void recCTC2() {
// Executing vu0 block here fixes the intro of Ratchet and Clank // Executing vu0 block here fixes the intro of Ratchet and Clank
// sVU's COP2 has a comment that "Donald Duck" needs this too... // sVU's COP2 has a comment that "Donald Duck" needs this too...
if (_Rd_) _eeMoveGPRtoM((uptr)&vu0Regs.VI[_Rd_].UL, _Rt_); if (_Rd_) _eeMoveGPRtoM((uptr)&vu0Regs.VI[_Rd_].UL, _Rt_);
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, (uptr)CpuVU0); xLoadFarAddr(arg1reg, CpuVU0);
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg);
break; break;
} }
} }

View File

@ -18,7 +18,7 @@
using namespace x86Emitter; using namespace x86Emitter;
typedef xRegisterSSE xmm; typedef xRegisterSSE xmm;
typedef xRegisterLong x32; typedef xRegister32 x32;
struct microVU; struct microVU;
@ -145,14 +145,24 @@ static const char branchSTR[16][8] = {
#define gprT1 eax // eax - Temp Reg #define gprT1 eax // eax - Temp Reg
#define gprT2 ecx // ecx - Temp Reg #define gprT2 ecx // ecx - Temp Reg
#define gprT3 edx // edx - Temp Reg #define gprT3 edx // edx - Temp Reg
#define gprT1q rax // eax - Temp Reg
#define gprT2q rcx // ecx - Temp Reg
#define gprT3q rdx // edx - Temp Reg
#define gprT1b ax // Low 16-bit of gprT1 (eax) #define gprT1b ax // Low 16-bit of gprT1 (eax)
#define gprT2b cx // Low 16-bit of gprT2 (ecx) #define gprT2b cx // Low 16-bit of gprT2 (ecx)
#define gprT3b dx // Low 16-bit of gprT3 (edx) #define gprT3b dx // Low 16-bit of gprT3 (edx)
#ifdef __M_X86_64
#define gprF0 ebx // Status Flag 0
#define gprF1 r12d // Status Flag 1
#define gprF2 r13d // Status Flag 2
#define gprF3 r14d // Status Flag 3
#else
#define gprF0 ebx // Status Flag 0 #define gprF0 ebx // Status Flag 0
#define gprF1 ebp // Status Flag 1 #define gprF1 ebp // Status Flag 1
#define gprF2 esi // Status Flag 2 #define gprF2 esi // Status Flag 2
#define gprF3 edi // Status Flag 3 #define gprF3 edi // Status Flag 3
#endif
// Function Params // Function Params
#define mP microVU& mVU, int recPass #define mP microVU& mVU, int recPass

View File

@ -236,6 +236,18 @@ __fi void mVUrestoreRegs(microVU& mVU, bool fromMemory = false) {
else xMOVAPS(xmmPQ, ptr128[&mVU.xmmBackup[xmmPQ.Id][0]]); else xMOVAPS(xmmPQ, ptr128[&mVU.xmmBackup[xmmPQ.Id][0]]);
} }
class mVUScopedXMMBackup {
microVU& mVU;
bool fromMemory;
public:
mVUScopedXMMBackup(microVU& mVU, bool fromMemory): mVU(mVU), fromMemory(fromMemory) {
mVUbackupRegs(mVU, fromMemory);
}
~mVUScopedXMMBackup() {
mVUrestoreRegs(mVU, fromMemory);
}
};
_mVUt void __fc mVUprintRegs() { _mVUt void __fc mVUprintRegs() {
microVU& mVU = mVUx; microVU& mVU = mVUx;
for(int i = 0; i < 8; i++) { for(int i = 0; i < 8; i++) {
@ -274,42 +286,31 @@ static void __fc mVUwaitMTVU() {
} }
// Transforms the Address in gprReg to valid VU0/VU1 Address // Transforms the Address in gprReg to valid VU0/VU1 Address
__fi void mVUaddrFix(mV, const x32& gprReg) __fi void mVUaddrFix(mV, const xAddressReg& gprReg)
{ {
if (isVU1) { if (isVU1) {
xAND(gprReg, 0x3ff); // wrap around xAND(xRegister32(gprReg.Id), 0x3ff); // wrap around
xSHL(gprReg, 4); xSHL(xRegister32(gprReg.Id), 4);
} }
else { else {
xTEST(gprReg, 0x400); xTEST(xRegister32(gprReg.Id), 0x400);
xForwardJNZ8 jmpA; // if addr & 0x4000, reads VU1's VF regs and VI regs xForwardJNZ8 jmpA; // if addr & 0x4000, reads VU1's VF regs and VI regs
xAND(gprReg, 0xff); // if !(addr & 0x4000), wrap around xAND(xRegister32(gprReg.Id), 0xff); // if !(addr & 0x4000), wrap around
xForwardJump32 jmpB; xForwardJump32 jmpB;
jmpA.SetTarget(); jmpA.SetTarget();
if (THREAD_VU1) { if (THREAD_VU1) {
mVUbackupRegs(mVU, true); {
xPUSH(gprT1); mVUScopedXMMBackup mVUSave(mVU, true);
xPUSH(gprT2); xScopedSavedRegisters save {gprT1q, gprT2q, gprT3q};
xPUSH(gprT3); if (IsDevBuild && !isCOP2) { // Lets see which games do this!
// Align the stackframe (GCC only, since GCC assumes stackframe is always aligned) xMOV(arg1regd, mVU.prog.cur->idx); // Note: Kernel does it via COP2 to initialize VU1!
#ifdef __GNUC__ xMOV(arg2regd, xPC); // So we don't spam console, we'll only check micro-mode...
xSUB(esp, 4); xFastCall((void*)mVUwarningRegAccess, arg1regd, arg2regd);
#endif }
if (IsDevBuild && !isCOP2) { // Lets see which games do this! xFastCall((void*)mVUwaitMTVU);
xMOV(gprT2, mVU.prog.cur->idx); // Note: Kernel does it via COP2 to initialize VU1!
xMOV(gprT3, xPC); // So we don't spam console, we'll only check micro-mode...
xCALL((void*)mVUwarningRegAccess);
} }
xCALL((void*)mVUwaitMTVU);
#ifdef __GNUC__
xADD(esp, 4);
#endif
xPOP (gprT3);
xPOP (gprT2);
xPOP (gprT1);
mVUrestoreRegs(mVU, true);
} }
xAND(gprReg, 0x3f); // ToDo: theres a potential problem if VU0 overrides VU1's VF0/VI0 regs! xAND(xRegister32(gprReg.Id), 0x3f); // ToDo: theres a potential problem if VU0 overrides VU1's VF0/VI0 regs!
xADD(gprReg, (u128*)VU1.VF - (u128*)VU0.Mem); xADD(gprReg, (u128*)VU1.VF - (u128*)VU0.Mem);
jmpB.SetTarget(); jmpB.SetTarget();
xSHL(gprReg, 4); // multiply by 16 (shift left by 4) xSHL(gprReg, 4); // multiply by 16 (shift left by 4)
@ -568,38 +569,38 @@ void mVUcustomSearch() {
memset(mVUsearchXMM, 0xcc, __pagesize); memset(mVUsearchXMM, 0xcc, __pagesize);
xSetPtr(mVUsearchXMM); xSetPtr(mVUsearchXMM);
xMOVAPS (xmm0, ptr32[ecx]); xMOVAPS (xmm0, ptr32[arg1reg]);
xPCMP.EQD(xmm0, ptr32[edx]); xPCMP.EQD(xmm0, ptr32[arg2reg]);
xMOVAPS (xmm1, ptr32[ecx + 0x10]); xMOVAPS (xmm1, ptr32[arg1reg + 0x10]);
xPCMP.EQD(xmm1, ptr32[edx + 0x10]); xPCMP.EQD(xmm1, ptr32[arg2reg + 0x10]);
xPAND (xmm0, xmm1); xPAND (xmm0, xmm1);
xMOVMSKPS(eax, xmm0); xMOVMSKPS(eax, xmm0);
xCMP (eax, 0xf); xCMP (eax, 0xf);
xForwardJL8 exitPoint; xForwardJL8 exitPoint;
xMOVAPS (xmm0, ptr32[ecx + 0x20]); xMOVAPS (xmm0, ptr32[arg1reg + 0x20]);
xPCMP.EQD(xmm0, ptr32[edx + 0x20]); xPCMP.EQD(xmm0, ptr32[arg2reg + 0x20]);
xMOVAPS (xmm1, ptr32[ecx + 0x30]); xMOVAPS (xmm1, ptr32[arg1reg + 0x30]);
xPCMP.EQD(xmm1, ptr32[edx + 0x30]); xPCMP.EQD(xmm1, ptr32[arg2reg + 0x30]);
xPAND (xmm0, xmm1); xPAND (xmm0, xmm1);
xMOVAPS (xmm2, ptr32[ecx + 0x40]); xMOVAPS (xmm2, ptr32[arg1reg + 0x40]);
xPCMP.EQD(xmm2, ptr32[edx + 0x40]); xPCMP.EQD(xmm2, ptr32[arg2reg + 0x40]);
xMOVAPS (xmm3, ptr32[ecx + 0x50]); xMOVAPS (xmm3, ptr32[arg1reg + 0x50]);
xPCMP.EQD(xmm3, ptr32[edx + 0x50]); xPCMP.EQD(xmm3, ptr32[arg2reg + 0x50]);
xPAND (xmm2, xmm3); xPAND (xmm2, xmm3);
xMOVAPS (xmm4, ptr32[ecx + 0x60]); xMOVAPS (xmm4, ptr32[arg1reg + 0x60]);
xPCMP.EQD(xmm4, ptr32[edx + 0x60]); xPCMP.EQD(xmm4, ptr32[arg2reg + 0x60]);
xMOVAPS (xmm5, ptr32[ecx + 0x70]); xMOVAPS (xmm5, ptr32[arg1reg + 0x70]);
xPCMP.EQD(xmm5, ptr32[edx + 0x70]); xPCMP.EQD(xmm5, ptr32[arg2reg + 0x70]);
xPAND (xmm4, xmm5); xPAND (xmm4, xmm5);
xMOVAPS (xmm6, ptr32[ecx + 0x80]); xMOVAPS (xmm6, ptr32[arg1reg + 0x80]);
xPCMP.EQD(xmm6, ptr32[edx + 0x80]); xPCMP.EQD(xmm6, ptr32[arg2reg + 0x80]);
xMOVAPS (xmm7, ptr32[ecx + 0x90]); xMOVAPS (xmm7, ptr32[arg1reg + 0x90]);
xPCMP.EQD(xmm7, ptr32[edx + 0x90]); xPCMP.EQD(xmm7, ptr32[arg2reg + 0x90]);
xPAND (xmm6, xmm7); xPAND (xmm6, xmm7);
xPAND (xmm0, xmm2); xPAND (xmm0, xmm2);

View File

@ -221,10 +221,10 @@ void VifUnpackSSE_Dynarec::CompileRoutine() {
while (vNum) { while (vNum) {
ShiftDisplacementWindow( dstIndirect, ecx ); ShiftDisplacementWindow( dstIndirect, arg1reg );
if(UnpkNoOfIterations == 0) if(UnpkNoOfIterations == 0)
ShiftDisplacementWindow( srcIndirect, edx ); //Don't need to do this otherwise as we arent reading the source. ShiftDisplacementWindow( srcIndirect, arg2reg ); //Don't need to do this otherwise as we arent reading the source.
if (vCL < cycleSize) { if (vCL < cycleSize) {

View File

@ -63,8 +63,8 @@ VifUnpackSSE_Base::VifUnpackSSE_Base()
, UnpkLoopIteration(0) , UnpkLoopIteration(0)
, UnpkNoOfIterations(0) , UnpkNoOfIterations(0)
, IsAligned(0) , IsAligned(0)
, dstIndirect(ecx) // parameter 1 of __fastcall , dstIndirect(arg1reg)
, srcIndirect(edx) // parameter 2 of __fastcall , srcIndirect(arg2reg)
, workReg( xmm1 ) , workReg( xmm1 )
, destReg( xmm0 ) , destReg( xmm0 )
{ {

View File

@ -24,23 +24,23 @@ TEST(CodegenTests, MOVTest)
{ {
CODEGEN_TEST_BOTH(xMOV(rax, 0), "31 c0"); CODEGEN_TEST_BOTH(xMOV(rax, 0), "31 c0");
CODEGEN_TEST_64(xMOV(rax, rcx), "48 89 c8"); CODEGEN_TEST_64(xMOV(rax, rcx), "48 89 c8");
CODEGEN_TEST_BOTH(xMOV(eaxd, ecxd), "89 c8"); CODEGEN_TEST_BOTH(xMOV(eax, ecx), "89 c8");
CODEGEN_TEST_64(xMOV(r8, 0), "45 31 c0"); CODEGEN_TEST_64(xMOV(r8, 0), "45 31 c0");
CODEGEN_TEST_64(xMOV(rax, r8), "4c 89 c0"); CODEGEN_TEST_64(xMOV(rax, r8), "4c 89 c0");
CODEGEN_TEST_64(xMOV(r8, rax), "49 89 c0"); CODEGEN_TEST_64(xMOV(r8, rax), "49 89 c0");
CODEGEN_TEST_64(xMOV(r8, r9), "4d 89 c8"); CODEGEN_TEST_64(xMOV(r8, r9), "4d 89 c8");
CODEGEN_TEST_64(xMOV(rax, ptrNative[rcx]), "48 8b 01"); CODEGEN_TEST_64(xMOV(rax, ptrNative[rcx]), "48 8b 01");
CODEGEN_TEST_BOTH(xMOV(eaxd, ptrNative[rcx]), "8b 01"); CODEGEN_TEST_BOTH(xMOV(eax, ptrNative[rcx]), "8b 01");
CODEGEN_TEST_64(xMOV(ptrNative[rax], rcx), "48 89 08"); CODEGEN_TEST_64(xMOV(ptrNative[rax], rcx), "48 89 08");
CODEGEN_TEST_BOTH(xMOV(ptr32[rax], ecxd), "89 08"); CODEGEN_TEST_BOTH(xMOV(ptr32[rax], ecx), "89 08");
CODEGEN_TEST_64(xMOV(rax, ptrNative[r8]), "49 8b 00"); CODEGEN_TEST_64(xMOV(rax, ptrNative[r8]), "49 8b 00");
CODEGEN_TEST_64(xMOV(ptrNative[r8], rax), "49 89 00"); CODEGEN_TEST_64(xMOV(ptrNative[r8], rax), "49 89 00");
CODEGEN_TEST_64(xMOV(r8, ptrNative[r9]), "4d 8b 01"); CODEGEN_TEST_64(xMOV(r8, ptrNative[r9]), "4d 8b 01");
CODEGEN_TEST_64(xMOV(ptrNative[r8], r9), "4d 89 08"); CODEGEN_TEST_64(xMOV(ptrNative[r8], r9), "4d 89 08");
CODEGEN_TEST_64(xMOV(rax, ptrNative[rbx*4+3+rcx]), "48 8b 44 99 03"); CODEGEN_TEST_64(xMOV(rax, ptrNative[rbx*4+3+rcx]), "48 8b 44 99 03");
CODEGEN_TEST_64(xMOV(ptrNative[rbx*4+3+rax], rcx), "48 89 4c 98 03"); CODEGEN_TEST_64(xMOV(ptrNative[rbx*4+3+rax], rcx), "48 89 4c 98 03");
CODEGEN_TEST_BOTH(xMOV(eaxd, ptr32[rbx*4+3+rcx]), "8b 44 99 03"); CODEGEN_TEST_BOTH(xMOV(eax, ptr32[rbx*4+3+rcx]), "8b 44 99 03");
CODEGEN_TEST_BOTH(xMOV(ptr32[rbx*4+3+rax], ecxd), "89 4c 98 03"); CODEGEN_TEST_BOTH(xMOV(ptr32[rbx*4+3+rax], ecx), "89 4c 98 03");
CODEGEN_TEST_64(xMOV(r8, ptrNative[r10*4+3+r9]), "4f 8b 44 91 03"); CODEGEN_TEST_64(xMOV(r8, ptrNative[r10*4+3+r9]), "4f 8b 44 91 03");
CODEGEN_TEST_64(xMOV(ptrNative[r9*4+3+r8], r10), "4f 89 54 88 03"); CODEGEN_TEST_64(xMOV(ptrNative[r9*4+3+r8], r10), "4f 89 54 88 03");
CODEGEN_TEST_64(xMOV(ptrNative[r8], 0), "49 c7 00 00 00 00 00"); CODEGEN_TEST_64(xMOV(ptrNative[r8], 0), "49 c7 00 00 00 00 00");
@ -56,14 +56,18 @@ TEST(CodegenTests, MOVTest)
TEST(CodegenTests, LEATest) TEST(CodegenTests, LEATest)
{ {
CODEGEN_TEST_64(xLEA(rax, ptr[rcx]), "48 89 c8"); // Converted to mov rax, rcx CODEGEN_TEST_64(xLEA(rax, ptr[rcx]), "48 89 c8"); // Converted to mov rax, rcx
CODEGEN_TEST_BOTH(xLEA(eaxd, ptr[rcx]), "89 c8"); // Converted to mov eax, ecx CODEGEN_TEST_BOTH(xLEA(eax, ptr[rcx]), "89 c8"); // Converted to mov eax, ecx
CODEGEN_TEST_64(xLEA(rax, ptr[r8]), "4c 89 c0"); // Converted to mov rax, r8 CODEGEN_TEST_64(xLEA(rax, ptr[r8]), "4c 89 c0"); // Converted to mov rax, r8
CODEGEN_TEST_64(xLEA(r8, ptr[r9]), "4d 89 c8"); // Converted to mov r8, r9 CODEGEN_TEST_64(xLEA(r8, ptr[r9]), "4d 89 c8"); // Converted to mov r8, r9
CODEGEN_TEST_64(xLEA(rax, ptr[rbx*4+3+rcx]), "48 8d 44 99 03"); CODEGEN_TEST_64(xLEA(rax, ptr[rbx*4+3+rcx]), "48 8d 44 99 03");
CODEGEN_TEST_BOTH(xLEA(eaxd, ptr32[rbx*4+3+rcx]), "8d 44 99 03"); CODEGEN_TEST_BOTH(xLEA(eax, ptr32[rbx*4+3+rcx]), "8d 44 99 03");
CODEGEN_TEST_64(xLEA(r8, ptr[r10*4+3+r9]), "4f 8d 44 91 03"); CODEGEN_TEST_64(xLEA(r8, ptr[r10*4+3+r9]), "4f 8d 44 91 03");
CODEGEN_TEST_64(xLEA(r8, ptr[base]), "4c 8d 05 f9 ff ff ff"); CODEGEN_TEST_64(xLEA(r8, ptr[base]), "4c 8d 05 f9 ff ff ff");
CODEGEN_TEST_64(xLoadFarAddr(r8, base), "4c 8d 05 f9 ff ff ff");
CODEGEN_TEST_64(xLoadFarAddr(r8, (void*)0x1234567890), "49 b8 90 78 56 34 12 00 00 00");
CODEGEN_TEST_BOTH(xLEA(rax, ptr[(void*)0x1234]), "b8 34 12 00 00"); // Converted to mov rax, 0x1234 CODEGEN_TEST_BOTH(xLEA(rax, ptr[(void*)0x1234]), "b8 34 12 00 00"); // Converted to mov rax, 0x1234
CODEGEN_TEST_BOTH(xLoadFarAddr(rax, (void*)0x1234), "b8 34 12 00 00");
CODEGEN_TEST(xLEA_Writeback(rbx), "bb cd cd cd cd", "48 8d 1d cd cd cd 0d");
} }
TEST(CodegenTests, PUSHTest) TEST(CodegenTests, PUSHTest)
@ -100,7 +104,7 @@ TEST(CodegenTests, POPTest)
TEST(CodegenTests, MathTest) TEST(CodegenTests, MathTest)
{ {
CODEGEN_TEST(xINC(eaxd), "40", "ff c0"); CODEGEN_TEST(xINC(eax), "40", "ff c0");
CODEGEN_TEST(xDEC(rax), "48", "48 ff c8"); CODEGEN_TEST(xDEC(rax), "48", "48 ff c8");
CODEGEN_TEST_64(xINC(r8), "49 ff c0"); CODEGEN_TEST_64(xINC(r8), "49 ff c0");
CODEGEN_TEST_64(xADD(r8, r9), "4d 01 c8"); CODEGEN_TEST_64(xADD(r8, r9), "4d 01 c8");
@ -108,33 +112,33 @@ TEST(CodegenTests, MathTest)
CODEGEN_TEST_64(xADD(rax, 0x1234), "48 05 34 12 00 00"); CODEGEN_TEST_64(xADD(rax, 0x1234), "48 05 34 12 00 00");
CODEGEN_TEST_64(xADD(ptr32[base], -0x60), "83 05 f9 ff ff ff a0"); CODEGEN_TEST_64(xADD(ptr32[base], -0x60), "83 05 f9 ff ff ff a0");
CODEGEN_TEST_64(xADD(ptr32[base], 0x1234), "81 05 f6 ff ff ff 34 12 00 00"); CODEGEN_TEST_64(xADD(ptr32[base], 0x1234), "81 05 f6 ff ff ff 34 12 00 00");
CODEGEN_TEST_BOTH(xADD(eaxd, ebxd), "01 d8"); CODEGEN_TEST_BOTH(xADD(eax, ebx), "01 d8");
CODEGEN_TEST_BOTH(xADD(eaxd, 0x1234), "05 34 12 00 00"); CODEGEN_TEST_BOTH(xADD(eax, 0x1234), "05 34 12 00 00");
CODEGEN_TEST_64(xADD(r8, ptrNative[r10*4+3+r9]), "4f 03 44 91 03"); CODEGEN_TEST_64(xADD(r8, ptrNative[r10*4+3+r9]), "4f 03 44 91 03");
CODEGEN_TEST_64(xADD(ptrNative[r9*4+3+r8], r10), "4f 01 54 88 03"); CODEGEN_TEST_64(xADD(ptrNative[r9*4+3+r8], r10), "4f 01 54 88 03");
CODEGEN_TEST_BOTH(xADD(eaxd, ptr32[rbx*4+3+rcx]), "03 44 99 03"); CODEGEN_TEST_BOTH(xADD(eax, ptr32[rbx*4+3+rcx]), "03 44 99 03");
CODEGEN_TEST_BOTH(xADD(ptr32[rax*4+3+rbx], ecxd), "01 4c 83 03"); CODEGEN_TEST_BOTH(xADD(ptr32[rax*4+3+rbx], ecx), "01 4c 83 03");
CODEGEN_TEST_64(xSUB(r8, 0x12), "49 83 e8 12"); CODEGEN_TEST_64(xSUB(r8, 0x12), "49 83 e8 12");
CODEGEN_TEST_64(xSUB(rax, 0x1234), "48 2d 34 12 00 00"); CODEGEN_TEST_64(xSUB(rax, 0x1234), "48 2d 34 12 00 00");
CODEGEN_TEST_BOTH(xSUB(eaxd, ptr32[rcx*4+rax]), "2b 04 88"); CODEGEN_TEST_BOTH(xSUB(eax, ptr32[rcx*4+rax]), "2b 04 88");
CODEGEN_TEST_64(xMUL(ptr32[base]), "f7 2d fa ff ff ff"); CODEGEN_TEST_64(xMUL(ptr32[base]), "f7 2d fa ff ff ff");
CODEGEN_TEST(xMUL(ptr32[(void*)0x1234]), "f7 2d 34 12 00 00", "f7 2c 25 34 12 00 00"); CODEGEN_TEST(xMUL(ptr32[(void*)0x1234]), "f7 2d 34 12 00 00", "f7 2c 25 34 12 00 00");
CODEGEN_TEST_BOTH(xDIV(ecxd), "f7 f9"); CODEGEN_TEST_BOTH(xDIV(ecx), "f7 f9");
} }
TEST(CodegenTests, BitwiseTest) TEST(CodegenTests, BitwiseTest)
{ {
CODEGEN_TEST_64(xSHR(r8, cl), "49 d3 e8"); CODEGEN_TEST_64(xSHR(r8, cl), "49 d3 e8");
CODEGEN_TEST_64(xSHR(rax, cl), "48 d3 e8"); CODEGEN_TEST_64(xSHR(rax, cl), "48 d3 e8");
CODEGEN_TEST_BOTH(xSHR(ecxd, cl), "d3 e9"); CODEGEN_TEST_BOTH(xSHR(ecx, cl), "d3 e9");
CODEGEN_TEST_64(xSAR(r8, 1), "49 d1 f8"); CODEGEN_TEST_64(xSAR(r8, 1), "49 d1 f8");
CODEGEN_TEST_64(xSAR(rax, 60), "48 c1 f8 3c"); CODEGEN_TEST_64(xSAR(rax, 60), "48 c1 f8 3c");
CODEGEN_TEST_BOTH(xSAR(eaxd, 30), "c1 f8 1e"); CODEGEN_TEST_BOTH(xSAR(eax, 30), "c1 f8 1e");
CODEGEN_TEST_BOTH(xSHL(ebxd, 30), "c1 e3 1e"); CODEGEN_TEST_BOTH(xSHL(ebx, 30), "c1 e3 1e");
CODEGEN_TEST_64(xSHL(ptr32[base], 4), "c1 25 f9 ff ff ff 04"); CODEGEN_TEST_64(xSHL(ptr32[base], 4), "c1 25 f9 ff ff ff 04");
CODEGEN_TEST_64(xAND(r8, r9), "4d 21 c8"); CODEGEN_TEST_64(xAND(r8, r9), "4d 21 c8");
CODEGEN_TEST_64(xXOR(rax, ptrNative[r10]), "49 33 02"); CODEGEN_TEST_64(xXOR(rax, ptrNative[r10]), "49 33 02");
CODEGEN_TEST_BOTH(xOR(esid, ptr32[rax+rbx]), "0b 34 18"); CODEGEN_TEST_BOTH(xOR(esi, ptr32[rax+rbx]), "0b 34 18");
CODEGEN_TEST_64(xNOT(r8), "49 f7 d0"); CODEGEN_TEST_64(xNOT(r8), "49 f7 d0");
CODEGEN_TEST_64(xNOT(ptrNative[rax]), "48 f7 10"); CODEGEN_TEST_64(xNOT(ptrNative[rax]), "48 f7 10");
CODEGEN_TEST_BOTH(xNOT(ptr32[rbx]), "f7 13"); CODEGEN_TEST_BOTH(xNOT(ptr32[rbx]), "f7 13");