EE/IOP/VU: x86-64 recompiler support

This commit is contained in:
Tellow Krinkle 2020-04-15 16:11:53 -05:00 committed by tellowkrinkle
parent eeca29b6d3
commit dc57270fb8
31 changed files with 680 additions and 493 deletions

View File

@ -66,6 +66,7 @@ struct xImpl_FastCall
void operator()(void *f, u32 a1, const xRegister32 &a2) const;
void operator()(void *f, const xIndirect32 &a1) const;
void operator()(void *f, u32 a1, u32 a2) const;
void operator()(void *f, void *a1) const;
#ifdef __M_X86_64
void operator()(void *f, const xRegisterLong &a1, const xRegisterLong &a2 = xEmptyReg) const;

View File

@ -145,6 +145,8 @@ extern void xBSWAP(const xRegister32or64 &to);
extern void xLEA(xRegister64 to, const xIndirectVoid &src, bool preserve_flags = false);
extern void xLEA(xRegister32 to, const xIndirectVoid &src, bool preserve_flags = false);
extern void xLEA(xRegister16 to, const xIndirectVoid &src, bool preserve_flags = false);
/// LEA with a target that will be decided later, guarantees that no optimizations are performed that could change what needs to be written in
extern u32* xLEA_Writeback(xAddressReg to);
// ----- Push / Pop Instructions -----
// Note: pushad/popad implementations are intentionally left out. The instructions are
@ -198,6 +200,27 @@ public:
~xScopedStackFrame();
};
//////////////////////////////////////////////////////////////////////////////////////////
/// Helper object to save some temporary registers before the call
class xScopedSavedRegisters
{
std::vector<std::reference_wrapper<const xAddressReg>> regs;
public:
xScopedSavedRegisters(std::initializer_list<std::reference_wrapper<const xAddressReg>> regs);
~xScopedSavedRegisters();
};
//////////////////////////////////////////////////////////////////////////////////////////
/// Helper function to calculate base+offset taking into account the limitations of x86-64's RIP-relative addressing
/// (Will either return `base+offset` or LEA `base` into `tmpRegister` and return `tmpRegister+offset`)
xAddressVoid xComplexAddress(const xAddressReg& tmpRegister, void *base, const xAddressVoid& offset);
//////////////////////////////////////////////////////////////////////////////////////////
/// Helper function to load addresses that may be far from the current instruction pointer
/// On i386, resolves to `mov dst, (sptr)addr`
/// On x86-64, resolves to either `mov dst, (sptr)addr` or `lea dst, [addr]` depending on the distance from RIP
void xLoadFarAddr(const xAddressReg& dst, void *addr);
//////////////////////////////////////////////////////////////////////////////////////////
// JMP / Jcc Instructions!
@ -445,8 +468,8 @@ extern void xMOVNTDQA(const xIndirectVoid &to, const xRegisterSSE &from);
extern void xMOVNTPD(const xIndirectVoid &to, const xRegisterSSE &from);
extern void xMOVNTPS(const xIndirectVoid &to, const xRegisterSSE &from);
extern void xMOVMSKPS(const xRegister32or64 &to, const xRegisterSSE &from);
extern void xMOVMSKPD(const xRegister32or64 &to, const xRegisterSSE &from);
extern void xMOVMSKPS(const xRegister32 &to, const xRegisterSSE &from);
extern void xMOVMSKPD(const xRegister32 &to, const xRegisterSSE &from);
extern void xMASKMOV(const xRegisterSSE &to, const xRegisterSSE &from);
extern void xPMOVMSKB(const xRegister32or64 &to, const xRegisterSSE &from);

View File

@ -489,6 +489,7 @@ public:
#else
#define xRegisterLong xRegister32
#endif
static const int wordsize = sizeof(sptr);
class xAddressReg : public xRegisterLong
{
@ -648,14 +649,9 @@ extern const xAddressReg
r8, r9, r10, r11,
r12, r13, r14, r15;
extern const xAddressReg
eax, ebx, ecx, edx,
esi, edi, ebp, esp;
// Temporary registers to aid the move to x86-64
extern const xRegister32
eaxd, ebxd, ecxd, edxd,
esid, edid, ebpd, espd,
eax, ebx, ecx, edx,
esi, edi, ebp, esp,
r8d, r9d, r10d, r11d,
r12d, r13d, r14d, r15d;

View File

@ -104,6 +104,11 @@ void xImpl_FastCall::operator()(void *f, u32 a1, const xRegisterLong &a2) const
}
#endif
void xImpl_FastCall::operator()(void *f, void *a1) const {
xLEA(arg1reg, ptr[a1]);
(*this)(f, arg1reg, arg2reg);
}
void xImpl_FastCall::operator()(void *f, u32 a1, const xRegister32 &a2) const {
if (!a2.IsEmpty()) { xMOV(arg2regd, a2); }
xMOV(arg1regd, a1);

View File

@ -711,8 +711,8 @@ __fi void xMOVNTPS(const xIndirectVoid &to, const xRegisterSSE &from) { xOpWrite
// ------------------------------------------------------------------------
__fi void xMOVMSKPS(const xRegister32or64 &to, const xRegisterSSE &from) { xOpWrite0F(0x50, to, from); }
__fi void xMOVMSKPD(const xRegister32or64 &to, const xRegisterSSE &from) { xOpWrite0F(0x66, 0x50, to, from, true); }
__fi void xMOVMSKPS(const xRegister32 &to, const xRegisterSSE &from) { xOpWrite0F(0x50, to, from); }
__fi void xMOVMSKPD(const xRegister32 &to, const xRegisterSSE &from) { xOpWrite0F(0x66, 0x50, to, from, true); }
// xMASKMOV:
// Selectively write bytes from mm1/xmm1 to memory location using the byte mask in mm2/xmm2.

View File

@ -130,17 +130,11 @@ const xAddressReg
r12(12), r13(13),
r14(14), r15(15);
const xAddressReg
const xRegister32
eax(0), ebx(3),
ecx(1), edx(2),
esp(4), ebp(5),
esi(6), edi(7);
const xRegister32
eaxd(0), ebxd(3),
ecxd(1), edxd(2),
espd(4), ebpd(5),
esid(6), edid(7),
esi(6), edi(7),
r8d(8), r9d(9),
r10d(10), r11d(11),
r12d(12), r13d(13),
@ -173,10 +167,10 @@ const xAddressReg
calleeSavedReg2 = rsi;
const xRegister32
arg1regd = ecxd,
arg2regd = edxd,
calleeSavedReg1d = edid,
calleeSavedReg2d = esid;
arg1regd = ecx,
arg2regd = edx,
calleeSavedReg1d = edi,
calleeSavedReg2d = esi;
#else
const xAddressReg
arg1reg = rdi,
@ -187,8 +181,8 @@ const xAddressReg
calleeSavedReg2 = r13;
const xRegister32
arg1regd = edid,
arg2regd = esid,
arg1regd = edi,
arg2regd = esi,
calleeSavedReg1d = r12d,
calleeSavedReg2d = r13d;
#endif
@ -367,7 +361,7 @@ void EmitSibMagic(uint regfield, const xIndirectVoid &info, int extraRIPOffset)
EmitSibMagic(regfield, (void *)info.Displacement, extraRIPOffset);
return;
} else {
if (info.Index == ebp && displacement_size == 0)
if (info.Index == rbp && displacement_size == 0)
displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]!
ModRM(displacement_size, regfield, info.Index.Id & 7);
@ -385,7 +379,7 @@ void EmitSibMagic(uint regfield, const xIndirectVoid &info, int extraRIPOffset)
xWrite<s32>(info.Displacement);
return;
} else {
if (info.Base == ebp && displacement_size == 0)
if (info.Base == rbp && displacement_size == 0)
displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]!
ModRM(displacement_size, regfield, ModRm_UseSib);
@ -896,7 +890,7 @@ static void EmitLeaMagic(const xRegisterInt &to, const xIndirectVoid &src, bool
} else {
if (src.Scale == 0) {
if (!preserve_flags) {
if (src.Index == esp) {
if (src.Index == rsp) {
// ESP is not encodable as an index (ix86 ignores it), thus:
_xMovRtoR(to, sizeMatchedBase); // will do the trick!
if (src.Displacement)
@ -907,7 +901,7 @@ static void EmitLeaMagic(const xRegisterInt &to, const xIndirectVoid &src, bool
_g1_EmitOp(G1Type_ADD, to, sizeMatchedIndex);
return;
}
} else if ((src.Index == esp) && (src.Displacement == 0)) {
} else if ((src.Index == rsp) && (src.Displacement == 0)) {
// special case handling of ESP as Index, which is replaceable with
// a single MOV even when preserve_flags is set! :D
@ -937,6 +931,17 @@ __emitinline void xLEA(xRegister16 to, const xIndirectVoid &src, bool preserve_f
EmitLeaMagic(to, src, preserve_flags);
}
__emitinline u32* xLEA_Writeback(xAddressReg to)
{
#ifdef __M_X86_64
xOpWrite(0, 0x8d, to, ptr[(void*)(0xdcdcdcd + (uptr)xGetPtr() + 7)]);
#else
xOpAccWrite(0, 0xb8 | to.Id, 0, to);
xWrite32(0xcdcdcdcd);
#endif
return (u32*)xGetPtr() - 1;
}
// =====================================================================================================
// TEST / INC / DEC
// =====================================================================================================
@ -1145,6 +1150,14 @@ __emitinline void xRestoreReg(const xRegisterSSE &dest)
#endif
static void stackAlign(int offset, bool moveDown) {
int needed = (16 - (offset % 16)) % 16;
if (moveDown) {
needed = -needed;
}
ALIGN_STACK(needed);
}
xScopedStackFrame::xScopedStackFrame(bool base_frame, bool save_base_pointer, int offset)
{
m_base_frame = base_frame;
@ -1188,12 +1201,12 @@ xScopedStackFrame::xScopedStackFrame(bool base_frame, bool save_base_pointer, in
#endif
ALIGN_STACK(-(16 - m_offset % 16));
stackAlign(m_offset, true);
}
xScopedStackFrame::~xScopedStackFrame()
{
ALIGN_STACK(16 - m_offset % 16);
stackAlign(m_offset, false);
#ifdef __M_X86_64
@ -1226,4 +1239,47 @@ xScopedStackFrame::~xScopedStackFrame()
}
}
xScopedSavedRegisters::xScopedSavedRegisters(std::initializer_list<std::reference_wrapper<const xAddressReg>> regs)
: regs(regs)
{
for (auto reg : regs)
{
const xAddressReg& regRef = reg;
xPUSH(regRef);
}
stackAlign(regs.size() * wordsize, true);
}
xScopedSavedRegisters::~xScopedSavedRegisters() {
stackAlign(regs.size() * wordsize, false);
for (auto it = regs.rbegin(); it < regs.rend(); ++it) {
const xAddressReg& regRef = *it;
xPOP(regRef);
}
}
xAddressVoid xComplexAddress(const xAddressReg& tmpRegister, void *base, const xAddressVoid& offset) {
if ((sptr)base == (s32)(sptr)base) {
return offset + base;
} else {
xLEA(tmpRegister, ptr[base]);
return offset + tmpRegister;
}
}
void xLoadFarAddr(const xAddressReg& dst, void *addr) {
#ifdef __M_X86_64
sptr iaddr = (sptr)addr;
sptr rip = (sptr)xGetPtr() + 7; // LEA will be 7 bytes
sptr disp = iaddr - rip;
if (disp == (s32)disp) {
xLEA(dst, ptr[addr]);
} else {
xMOV64(dst, iaddr);
}
#else
xMOV(dst, (sptr)addr);
#endif
}
} // End namespace x86Emitter

View File

@ -782,7 +782,7 @@ void vtlb_Term()
//nothing to do for now
}
constexpr size_t VMAP_SIZE = sizeof(sptr) * VTLB_VMAP_ITEMS;
constexpr size_t VMAP_SIZE = sizeof(VTLBVirtual) * VTLB_VMAP_ITEMS;
// Reserves the vtlb core allocation used by various emulation components!
// [TODO] basemem - request allocating memory at the specified virtual location, which can allow

View File

@ -230,14 +230,14 @@ public:
#define PC_GETBLOCK_(x, reclut) ((BASEBLOCK*)(reclut[((u32)(x)) >> 16] + (x)*(sizeof(BASEBLOCK)/4)))
static void recLUT_SetPage(uptr reclut[0x10000], uptr hwlut[0x10000],
static void recLUT_SetPage(uptr reclut[0x10000], u32 hwlut[0x10000],
BASEBLOCK *mapbase, uint pagebase, uint pageidx, uint mappage)
{
// this value is in 64k pages!
uint page = pagebase + pageidx;
pxAssert( page < 0x10000 );
reclut[page] = (uptr)&mapbase[(mappage - page) << 14];
reclut[page] = (uptr)&mapbase[((s32)mappage - (s32)page) << 14];
if (hwlut)
hwlut[page] = 0u - (pagebase << 16);
}

View File

@ -118,12 +118,15 @@ extern _x86regs x86regs[iREGCNT_GPR], s_saveX86regs[iREGCNT_GPR];
uptr _x86GetAddr(int type, int reg);
void _initX86regs();
int _getFreeX86reg(int mode);
int _allocX86reg(x86Emitter::xRegisterLong x86reg, int type, int reg, int mode);
[[deprecated]] int _allocX86reg(x86Emitter::xRegister64 x86reg, int type, int reg, int mode);
int _allocX86reg(x86Emitter::xRegister32 x86reg, int type, int reg, int mode);
// To resolve ambiguity between 32 and 64, delete once everything's on 32
int _allocX86reg(x86Emitter::xRegisterEmpty x86reg, int type, int reg, int mode);
void _deleteX86reg(int type, int reg, int flush);
int _checkX86reg(int type, int reg, int mode);
void _addNeededX86reg(int type, int reg);
void _clearNeededX86regs();
void _freeX86reg(const x86Emitter::xRegisterLong& x86reg);
void _freeX86reg(const x86Emitter::xRegister32& x86reg);
void _freeX86reg(int x86reg);
void _freeX86regs();
void _flushCachedRegs();

View File

@ -397,9 +397,9 @@ void FPU_MUL(int info, int regd, int sreg, int treg, bool acc)
if (CHECK_FPUMULHACK)
{
xMOVD(ecx, xRegisterSSE(sreg));
xMOVD(edx, xRegisterSSE(treg));
xFastCall((void*)(uptr)&FPU_MUL_HACK, ecx, edx); //returns the hacked result or 0
xMOVD(arg1regd, xRegisterSSE(sreg));
xMOVD(arg2regd, xRegisterSSE(treg));
xFastCall((void*)(uptr)&FPU_MUL_HACK, arg1regd, arg2regd); //returns the hacked result or 0
xTEST(eax, eax);
noHack = JZ8(0);
xMOVDZX(xRegisterSSE(regd), eax);

View File

@ -1500,16 +1500,18 @@ void recQFSRV()
int info = eeRecompileCodeXMM(XMMINFO_WRITED);
xMOV(eax, ptr32[&cpuRegs.sa]);
xMOVDQU(xRegisterSSE(EEREC_D), ptr32[eax + &cpuRegs.GPR.r[_Rt_]]);
xLEA(rcx, ptr[&cpuRegs.GPR.r[_Rt_]]);
xMOVDQU(xRegisterSSE(EEREC_D), ptr32[rax + rcx]);
return;
}
int info = eeRecompileCodeXMM( XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED );
xMOV(eax, ptr32[&cpuRegs.sa]);
xMOVDQA(ptr32[&tempqw[0]], xRegisterSSE(EEREC_T));
xMOVDQA(ptr32[&tempqw[4]], xRegisterSSE(EEREC_S));
xMOVDQU(xRegisterSSE(EEREC_D), ptr32[eax + &tempqw]);
xLEA(rcx, ptr[tempqw]);
xMOVDQA(ptr32[rcx], xRegisterSSE(EEREC_T));
xMOVDQA(ptr32[rcx+16], xRegisterSSE(EEREC_S));
xMOVDQU(xRegisterSSE(EEREC_D), ptr32[rax + rcx]);
_clearNeededXMMregs();
}

View File

@ -46,7 +46,7 @@ u32 g_psxMaxRecMem = 0;
u32 s_psxrecblocks[] = {0};
uptr psxRecLUT[0x10000];
uptr psxhwLUT[0x10000];
u32 psxhwLUT[0x10000];
static __fi u32 HWADDR(u32 mem) { return psxhwLUT[mem >> 16] + mem; }
@ -126,13 +126,13 @@ static DynGenFunc* _DynGen_JITCompile()
u8* retval = xGetPtr();
xFastCall((void*)iopRecRecompile, ptr[&psxRegs.pc] );
xFastCall((void*)iopRecRecompile, ptr32[&psxRegs.pc] );
xMOV( eax, ptr[&psxRegs.pc] );
xMOV( ebx, eax );
xSHR( eax, 16 );
xMOV( ecx, ptr[psxRecLUT + (eax*4)] );
xJMP( ptr32[ecx+ebx] );
xMOV( rcx, ptrNative[xComplexAddress(rcx, psxRecLUT, rax*wordsize)] );
xJMP( ptrNative[rbx*(wordsize/4) + rcx] );
return (DynGenFunc*)retval;
}
@ -152,8 +152,8 @@ static DynGenFunc* _DynGen_DispatcherReg()
xMOV( eax, ptr[&psxRegs.pc] );
xMOV( ebx, eax );
xSHR( eax, 16 );
xMOV( ecx, ptr[psxRecLUT + (eax*4)] );
xJMP( ptr32[ecx+ebx] );
xMOV( rcx, ptrNative[xComplexAddress(rcx, psxRecLUT, rax*wordsize)] );
xJMP( ptrNative[rbx*(wordsize/4) + rcx] );
return (DynGenFunc*)retval;
}
@ -391,7 +391,7 @@ void _psxDeleteReg(int reg, int flush)
_deleteX86reg(X86TYPE_PSX, reg, flush ? 0 : 2);
}
void _psxMoveGPRtoR(const xRegisterLong& to, int fromgpr)
void _psxMoveGPRtoR(const xRegister32& to, int fromgpr)
{
if( PSX_IS_CONST1(fromgpr) )
xMOV(to, g_psxConstRegs[fromgpr] );
@ -863,22 +863,22 @@ void psxSetBranchReg(u32 reg)
psxbranch = 1;
if( reg != 0xffffffff ) {
_allocX86reg(esi, X86TYPE_PCWRITEBACK, 0, MODE_WRITE);
_psxMoveGPRtoR(esi, reg);
_allocX86reg(calleeSavedReg2d, X86TYPE_PCWRITEBACK, 0, MODE_WRITE);
_psxMoveGPRtoR(calleeSavedReg2d, reg);
psxRecompileNextInstruction(1);
if( x86regs[esi.GetId()].inuse ) {
pxAssert( x86regs[esi.GetId()].type == X86TYPE_PCWRITEBACK );
xMOV(ptr[&psxRegs.pc], esi);
x86regs[esi.GetId()].inuse = 0;
if( x86regs[calleeSavedReg2d.GetId()].inuse ) {
pxAssert( x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK );
xMOV(ptr32[&psxRegs.pc], calleeSavedReg2d);
x86regs[calleeSavedReg2d.GetId()].inuse = 0;
#ifdef PCSX2_DEBUG
xOR( esi, esi );
xOR( calleeSavedReg2d, calleeSavedReg2d );
#endif
}
else {
xMOV(eax, ptr[&g_recWriteback]);
xMOV(ptr[&psxRegs.pc], eax);
xMOV(eax, ptr32[&g_recWriteback]);
xMOV(ptr32[&psxRegs.pc], eax);
#ifdef PCSX2_DEBUG
xOR( eax, eax );

View File

@ -48,7 +48,7 @@ void _psxFlushCall(int flushtype);
void _psxOnWriteReg(int reg);
void _psxMoveGPRtoR(const x86Emitter::xRegisterLong& to, int fromgpr);
void _psxMoveGPRtoR(const x86Emitter::xRegister32& to, int fromgpr);
#if 0
void _psxMoveGPRtoM(uptr to, int fromgpr);
void _psxMoveGPRtoRm(x86IntRegType to, int fromgpr);

View File

@ -65,9 +65,9 @@ void rpsxADDconst(int dreg, int sreg, u32 off, int info)
if (sreg == dreg) {
xADD(ptr32[&psxRegs.GPR.r[dreg]], off);
} else {
xMOV(eax, ptr[&psxRegs.GPR.r[sreg]]);
xMOV(eax, ptr32[&psxRegs.GPR.r[sreg]]);
if (off) xADD(eax, off);
xMOV(ptr[&psxRegs.GPR.r[dreg]], eax);
xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax);
}
}
else {
@ -97,7 +97,7 @@ void rpsxSLTconst(int info, int dreg, int sreg, int imm)
xXOR(eax, eax);
xCMP(ptr32[&psxRegs.GPR.r[sreg]], imm);
xSETL(al);
xMOV(ptr[&psxRegs.GPR.r[dreg]], eax);
xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax);
}
void rpsxSLTI_(int info) { rpsxSLTconst(info, _Rt_, _Rs_, _Imm_); }
@ -115,7 +115,7 @@ void rpsxSLTUconst(int info, int dreg, int sreg, int imm)
xXOR(eax, eax);
xCMP(ptr32[&psxRegs.GPR.r[sreg]], imm);
xSETB(al);
xMOV(ptr[&psxRegs.GPR.r[dreg]], eax);
xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax);
}
void rpsxSLTIU_(int info) { rpsxSLTUconst(info, _Rt_, _Rs_, (s32)_Imm_); }
@ -134,9 +134,9 @@ void rpsxANDconst(int info, int dreg, int sreg, u32 imm)
if (sreg == dreg) {
xAND(ptr32[&psxRegs.GPR.r[dreg]], imm);
} else {
xMOV(eax, ptr[&psxRegs.GPR.r[sreg]]);
xMOV(eax, ptr32[&psxRegs.GPR.r[sreg]]);
xAND(eax, imm);
xMOV(ptr[&psxRegs.GPR.r[dreg]], eax);
xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax);
}
} else {
xMOV(ptr32[&psxRegs.GPR.r[dreg]], 0);
@ -160,15 +160,15 @@ void rpsxORconst(int info, int dreg, int sreg, u32 imm)
xOR(ptr32[&psxRegs.GPR.r[dreg]], imm);
}
else {
xMOV(eax, ptr[&psxRegs.GPR.r[sreg]]);
xMOV(eax, ptr32[&psxRegs.GPR.r[sreg]]);
xOR(eax, imm);
xMOV(ptr[&psxRegs.GPR.r[dreg]], eax);
xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax);
}
}
else {
if( dreg != sreg ) {
xMOV(ecx, ptr[&psxRegs.GPR.r[sreg]]);
xMOV(ptr[&psxRegs.GPR.r[dreg]], ecx);
xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]);
xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx);
}
}
}
@ -189,9 +189,9 @@ void rpsxXORconst(int info, int dreg, int sreg, u32 imm)
xNOT(ptr32[&psxRegs.GPR.r[dreg]]);
}
else {
xMOV(ecx, ptr[&psxRegs.GPR.r[sreg]]);
xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]);
xNOT(ecx);
xMOV(ptr[&psxRegs.GPR.r[dreg]], ecx);
xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx);
}
}
else if (imm) {
@ -200,15 +200,15 @@ void rpsxXORconst(int info, int dreg, int sreg, u32 imm)
xXOR(ptr32[&psxRegs.GPR.r[dreg]], imm);
}
else {
xMOV(eax, ptr[&psxRegs.GPR.r[sreg]]);
xMOV(eax, ptr32[&psxRegs.GPR.r[sreg]]);
xXOR(eax, imm);
xMOV(ptr[&psxRegs.GPR.r[dreg]], eax);
xMOV(ptr32[&psxRegs.GPR.r[dreg]], eax);
}
}
else {
if( dreg != sreg ) {
xMOV(ecx, ptr[&psxRegs.GPR.r[sreg]]);
xMOV(ptr[&psxRegs.GPR.r[dreg]], ecx);
xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]);
xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx);
}
}
}
@ -241,16 +241,16 @@ void rpsxADDU_constt(int info)
void rpsxADDU_(int info)
{
if (_Rs_ && _Rt_) {
xMOV(eax, ptr[&psxRegs.GPR.r[_Rs_]]);
xADD(eax, ptr[&psxRegs.GPR.r[_Rt_]]);
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
xADD(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
} else if (_Rs_) {
xMOV(eax, ptr[&psxRegs.GPR.r[_Rs_]]);
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
} else if (_Rt_) {
xMOV(eax, ptr[&psxRegs.GPR.r[_Rt_]]);
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
} else {
xXOR(eax, eax);
}
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax);
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
}
PSXRECOMPILE_CONSTCODE0(ADDU);
@ -266,8 +266,8 @@ void rpsxSUBU_const()
void rpsxSUBU_consts(int info)
{
xMOV(eax, g_psxConstRegs[_Rs_]);
xSUB(eax, ptr[&psxRegs.GPR.r[_Rt_]]);
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax);
xSUB(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
}
void rpsxSUBU_constt(int info) { rpsxADDconst(_Rd_, _Rs_, -(int)g_psxConstRegs[_Rt_], info); }
@ -278,13 +278,13 @@ void rpsxSUBU_(int info)
if (!_Rd_) return;
if( _Rd_ == _Rs_ ) {
xMOV(eax, ptr[&psxRegs.GPR.r[_Rt_]]);
xSUB(ptr[&psxRegs.GPR.r[_Rd_]], eax);
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
xSUB(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
}
else {
xMOV(eax, ptr[&psxRegs.GPR.r[_Rs_]]);
xSUB(eax, ptr[&psxRegs.GPR.r[_Rt_]]);
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax);
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
xSUB(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
}
}
@ -296,13 +296,13 @@ void rpsxLogicalOp(int info, int op)
{
if( _Rd_ == _Rs_ || _Rd_ == _Rt_ ) {
int vreg = _Rd_ == _Rs_ ? _Rt_ : _Rs_;
xMOV(ecx, ptr[&psxRegs.GPR.r[vreg]]);
xMOV(ecx, ptr32[&psxRegs.GPR.r[vreg]]);
switch(op) {
case 0: xAND(ptr[&psxRegs.GPR.r[_Rd_]], ecx); break;
case 1: xOR(ptr[&psxRegs.GPR.r[_Rd_]], ecx); break;
case 2: xXOR(ptr[&psxRegs.GPR.r[_Rd_]], ecx); break;
case 3: xOR(ptr[&psxRegs.GPR.r[_Rd_]], ecx); break;
case 0: xAND(ptr32[&psxRegs.GPR.r[_Rd_]], ecx); break;
case 1: xOR(ptr32[&psxRegs.GPR.r[_Rd_]], ecx); break;
case 2: xXOR(ptr32[&psxRegs.GPR.r[_Rd_]], ecx); break;
case 3: xOR(ptr32[&psxRegs.GPR.r[_Rd_]], ecx); break;
default: pxAssert(0);
}
@ -310,19 +310,19 @@ void rpsxLogicalOp(int info, int op)
xNOT(ptr32[&psxRegs.GPR.r[_Rd_]]);
}
else {
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
switch(op) {
case 0: xAND(ecx, ptr[&psxRegs.GPR.r[_Rt_]]); break;
case 1: xOR(ecx, ptr[&psxRegs.GPR.r[_Rt_]]); break;
case 2: xXOR(ecx, ptr[&psxRegs.GPR.r[_Rt_]]); break;
case 3: xOR(ecx, ptr[&psxRegs.GPR.r[_Rt_]]); break;
case 0: xAND(ecx, ptr32[&psxRegs.GPR.r[_Rt_]]); break;
case 1: xOR(ecx, ptr32[&psxRegs.GPR.r[_Rt_]]); break;
case 2: xXOR(ecx, ptr32[&psxRegs.GPR.r[_Rt_]]); break;
case 3: xOR(ecx, ptr32[&psxRegs.GPR.r[_Rt_]]); break;
default: pxAssert(0);
}
if( op == 3 )
xNOT(ecx);
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], ecx);
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], ecx);
}
}
@ -374,10 +374,10 @@ void rpsxNORconst(int info, int dreg, int sreg, u32 imm)
xNOT(ptr32[&psxRegs.GPR.r[dreg]]);
}
else {
xMOV(ecx, ptr[&psxRegs.GPR.r[sreg]]);
xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]);
xOR(ecx, imm);
xNOT(ecx);
xMOV(ptr[&psxRegs.GPR.r[dreg]], ecx);
xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx);
}
}
else {
@ -385,9 +385,9 @@ void rpsxNORconst(int info, int dreg, int sreg, u32 imm)
xNOT(ptr32[&psxRegs.GPR.r[dreg]]);
}
else {
xMOV(ecx, ptr[&psxRegs.GPR.r[sreg]]);
xMOV(ecx, ptr32[&psxRegs.GPR.r[sreg]]);
xNOT(ecx);
xMOV(ptr[&psxRegs.GPR.r[dreg]], ecx);
xMOV(ptr32[&psxRegs.GPR.r[dreg]], ecx);
}
}
}
@ -407,19 +407,19 @@ void rpsxSLT_const()
void rpsxSLT_consts(int info)
{
xXOR(eax, eax);
xCMP(ptr32[&psxRegs.GPR.r[_Rt_]], g_psxConstRegs[_Rs_]);
xSETG(al);
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax);
xCMP(ptr32[&psxRegs.GPR.r[_Rt_]], g_psxConstRegs[_Rs_]);
xSETG(al);
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
}
void rpsxSLT_constt(int info) { rpsxSLTconst(info, _Rd_, _Rs_, g_psxConstRegs[_Rt_]); }
void rpsxSLT_(int info)
{
xMOV(eax, ptr[&psxRegs.GPR.r[_Rs_]]);
xCMP(eax, ptr[&psxRegs.GPR.r[_Rt_]]);
xSETL(al);
xAND(eax, 0xff);
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax);
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
xCMP(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
xSETL(al);
xAND(eax, 0xff);
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
}
PSXRECOMPILE_CONSTCODE0(SLT);
@ -433,9 +433,9 @@ void rpsxSLTU_const()
void rpsxSLTU_consts(int info)
{
xXOR(eax, eax);
xCMP(ptr32[&psxRegs.GPR.r[_Rt_]], g_psxConstRegs[_Rs_]);
xSETA(al);
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax);
xCMP(ptr32[&psxRegs.GPR.r[_Rt_]], g_psxConstRegs[_Rs_]);
xSETA(al);
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
}
void rpsxSLTU_constt(int info) { rpsxSLTUconst(info, _Rd_, _Rs_, g_psxConstRegs[_Rt_]); }
@ -444,11 +444,11 @@ void rpsxSLTU_(int info)
// Rd = Rs < Rt (unsigned)
if (!_Rd_) return;
xMOV(eax, ptr[&psxRegs.GPR.r[_Rs_]]);
xCMP(eax, ptr[&psxRegs.GPR.r[_Rt_]]);
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
xCMP(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
xSBB(eax, eax);
xNEG(eax);
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax);
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
}
PSXRECOMPILE_CONSTCODE0(SLTU);
@ -468,18 +468,18 @@ void rpsxMULTsuperconst(int info, int sreg, int imm, int sign)
xMOV(eax, imm);
if( sign ) xMUL(ptr32[&psxRegs.GPR.r[sreg]]);
else xUMUL(ptr32[&psxRegs.GPR.r[sreg]]);
xMOV(ptr[&psxRegs.GPR.n.lo], eax);
xMOV(ptr[&psxRegs.GPR.n.hi], edx);
xMOV(ptr32[&psxRegs.GPR.n.lo], eax);
xMOV(ptr32[&psxRegs.GPR.n.hi], edx);
}
void rpsxMULTsuper(int info, int sign)
{
// Lo/Hi = Rs * Rt (signed)
xMOV(eax, ptr[&psxRegs.GPR.r[_Rs_]]);
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
if( sign ) xMUL(ptr32[&psxRegs.GPR.r[_Rt_]]);
else xUMUL(ptr32[&psxRegs.GPR.r[_Rt_]]);
xMOV(ptr[&psxRegs.GPR.n.lo], eax);
xMOV(ptr[&psxRegs.GPR.n.hi], edx);
xMOV(ptr32[&psxRegs.GPR.n.lo], eax);
xMOV(ptr32[&psxRegs.GPR.n.hi], edx);
}
void rpsxMULT_consts(int info) { rpsxMULTsuperconst(info, _Rt_, g_psxConstRegs[_Rs_], 1); }
@ -542,12 +542,12 @@ void rpsxDIVsuper(int info, int sign, int process = 0)
if( process & PROCESS_CONSTT )
xMOV(ecx, g_psxConstRegs[_Rt_]);
else
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rt_]]);
xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rt_]]);
if( process & PROCESS_CONSTS )
xMOV(eax, g_psxConstRegs[_Rs_]);
else
xMOV(eax, ptr[&psxRegs.GPR.r[_Rs_]]);
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
u8 *end1;
if (sign) //test for overflow (x86 will just throw an exception)
@ -593,8 +593,8 @@ void rpsxDIVsuper(int info, int sign, int process = 0)
if (sign) x86SetJ8( end1 );
x86SetJ8( end2 );
xMOV(ptr[&psxRegs.GPR.n.lo], eax);
xMOV(ptr[&psxRegs.GPR.n.hi], edx);
xMOV(ptr32[&psxRegs.GPR.n.lo], eax);
xMOV(ptr32[&psxRegs.GPR.n.hi], edx);
}
void rpsxDIV_consts(int info) { rpsxDIVsuper(info, 1, PROCESS_CONSTS); }
@ -639,12 +639,12 @@ static void rpsxLB()
_psxOnWriteReg(_Rt_);
_psxDeleteReg(_Rt_, 0);
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_);
xFastCall((void*)iopMemRead8, ecx ); // returns value in EAX
if (_Rt_) {
xMOVSX(eax, al);
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax);
xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax);
}
PSX_DEL_CONST(_Rt_);
}
@ -655,12 +655,12 @@ static void rpsxLBU()
_psxOnWriteReg(_Rt_);
_psxDeleteReg(_Rt_, 0);
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_);
xFastCall((void*)iopMemRead8, ecx ); // returns value in EAX
if (_Rt_) {
xMOVZX(eax, al);
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax);
xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax);
}
PSX_DEL_CONST(_Rt_);
}
@ -671,12 +671,12 @@ static void rpsxLH()
_psxOnWriteReg(_Rt_);
_psxDeleteReg(_Rt_, 0);
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_);
xFastCall((void*)iopMemRead16, ecx ); // returns value in EAX
if (_Rt_) {
xMOVSX(eax, ax);
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax);
xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax);
}
PSX_DEL_CONST(_Rt_);
}
@ -687,12 +687,12 @@ static void rpsxLHU()
_psxOnWriteReg(_Rt_);
_psxDeleteReg(_Rt_, 0);
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_);
xFastCall((void*)iopMemRead16, ecx ); // returns value in EAX
if (_Rt_) {
xMOVZX(eax, ax);
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax);
xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax);
}
PSX_DEL_CONST(_Rt_);
}
@ -704,7 +704,7 @@ static void rpsxLW()
_psxDeleteReg(_Rt_, 0);
_psxFlushCall(FLUSH_EVERYTHING);
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_);
xTEST(ecx, 0x10000000);
@ -712,18 +712,17 @@ static void rpsxLW()
xFastCall((void*)iopMemRead32, ecx ); // returns value in EAX
if (_Rt_) {
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax);
xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax);
}
j8Ptr[1] = JMP8(0);
x86SetJ8(j8Ptr[0]);
// read from psM directly
xAND(ecx, 0x1fffff);
xADD(ecx, (uptr)iopMem->Main);
xMOV(ecx, ptr[ecx]);
xMOV(ecx, ptr32[xComplexAddress(rax, iopMem->Main, rcx)]);
if (_Rt_) {
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], ecx);
xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], ecx);
}
x86SetJ8(j8Ptr[1]);
@ -735,10 +734,10 @@ static void rpsxSB()
_psxDeleteReg(_Rs_, 1);
_psxDeleteReg(_Rt_, 1);
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_);
xMOV( edx, ptr[&psxRegs.GPR.r[_Rt_]] );
xFastCall((void*)iopMemWrite8, ecx, edx );
xMOV(arg1regd, ptr32[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(arg1regd, _Imm_);
xMOV( arg2regd, ptr32[&psxRegs.GPR.r[_Rt_]] );
xFastCall((void*)iopMemWrite8, arg1regd, arg2regd );
}
static void rpsxSH()
@ -746,10 +745,10 @@ static void rpsxSH()
_psxDeleteReg(_Rs_, 1);
_psxDeleteReg(_Rt_, 1);
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_);
xMOV( edx, ptr[&psxRegs.GPR.r[_Rt_]] );
xFastCall((void*)iopMemWrite16, ecx, edx );
xMOV(arg1regd, ptr32[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(arg1regd, _Imm_);
xMOV( arg2regd, ptr32[&psxRegs.GPR.r[_Rt_]] );
xFastCall((void*)iopMemWrite16, arg1regd, arg2regd );
}
static void rpsxSW()
@ -757,10 +756,10 @@ static void rpsxSW()
_psxDeleteReg(_Rs_, 1);
_psxDeleteReg(_Rt_, 1);
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(ecx, _Imm_);
xMOV( edx, ptr[&psxRegs.GPR.r[_Rt_]] );
xFastCall((void*)iopMemWrite32, ecx, edx );
xMOV(arg1regd, ptr32[&psxRegs.GPR.r[_Rs_]]);
if (_Imm_) xADD(arg1regd, _Imm_);
xMOV( arg2regd, ptr32[&psxRegs.GPR.r[_Rt_]] );
xFastCall((void*)iopMemWrite32, arg1regd, arg2regd );
}
//// SLL
@ -782,19 +781,19 @@ void rpsxShiftConst(int info, int rdreg, int rtreg, int imm, int shifttype)
}
}
else {
xMOV(eax, ptr[&psxRegs.GPR.r[rtreg]]);
xMOV(eax, ptr32[&psxRegs.GPR.r[rtreg]]);
switch(shifttype) {
case 0: xSHL(eax, imm); break;
case 1: xSHR(eax, imm); break;
case 2: xSAR(eax, imm); break;
}
xMOV(ptr[&psxRegs.GPR.r[rdreg]], eax);
xMOV(ptr32[&psxRegs.GPR.r[rdreg]], eax);
}
}
else {
if( rdreg != rtreg ) {
xMOV(eax, ptr[&psxRegs.GPR.r[rtreg]]);
xMOV(ptr[&psxRegs.GPR.r[rdreg]], eax);
xMOV(eax, ptr32[&psxRegs.GPR.r[rtreg]]);
xMOV(ptr32[&psxRegs.GPR.r[rdreg]], eax);
}
}
}
@ -834,23 +833,23 @@ void rpsxShiftVconsts(int info, int shifttype)
void rpsxShiftVconstt(int info, int shifttype)
{
xMOV(eax, g_psxConstRegs[_Rt_]);
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
switch(shifttype) {
case 0: xSHL(eax, cl); break;
case 1: xSHR(eax, cl); break;
case 2: xSAR(eax, cl); break;
}
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax);
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
}
void rpsxSLLV_consts(int info) { rpsxShiftVconsts(info, 0); }
void rpsxSLLV_constt(int info) { rpsxShiftVconstt(info, 0); }
void rpsxSLLV_(int info)
{
xMOV(eax, ptr[&psxRegs.GPR.r[_Rt_]]);
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
xSHL(eax, cl);
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax);
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
}
PSXRECOMPILE_CONSTCODE0(SLLV);
@ -865,10 +864,10 @@ void rpsxSRLV_consts(int info) { rpsxShiftVconsts(info, 1); }
void rpsxSRLV_constt(int info) { rpsxShiftVconstt(info, 1); }
void rpsxSRLV_(int info)
{
xMOV(eax, ptr[&psxRegs.GPR.r[_Rt_]]);
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
xSHR(eax, cl);
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax);
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
}
PSXRECOMPILE_CONSTCODE0(SRLV);
@ -883,10 +882,10 @@ void rpsxSRAV_consts(int info) { rpsxShiftVconsts(info, 2); }
void rpsxSRAV_constt(int info) { rpsxShiftVconstt(info, 2); }
void rpsxSRAV_(int info)
{
xMOV(eax, ptr[&psxRegs.GPR.r[_Rt_]]);
xMOV(ecx, ptr[&psxRegs.GPR.r[_Rs_]]);
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
xMOV(ecx, ptr32[&psxRegs.GPR.r[_Rs_]]);
xSAR(eax, cl);
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax);
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
}
PSXRECOMPILE_CONSTCODE0(SRAV);
@ -900,8 +899,8 @@ void rpsxMFHI()
_psxOnWriteReg(_Rd_);
_psxDeleteReg(_Rd_, 0);
xMOV(eax, ptr[&psxRegs.GPR.n.hi]);
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax);
xMOV(eax, ptr32[&psxRegs.GPR.n.hi]);
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
}
void rpsxMTHI()
@ -911,8 +910,8 @@ void rpsxMTHI()
}
else {
_psxDeleteReg(_Rs_, 1);
xMOV(eax, ptr[&psxRegs.GPR.r[_Rs_]]);
xMOV(ptr[&psxRegs.GPR.n.hi], eax);
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
xMOV(ptr32[&psxRegs.GPR.n.hi], eax);
}
}
@ -922,8 +921,8 @@ void rpsxMFLO()
_psxOnWriteReg(_Rd_);
_psxDeleteReg(_Rd_, 0);
xMOV(eax, ptr[&psxRegs.GPR.n.lo]);
xMOV(ptr[&psxRegs.GPR.r[_Rd_]], eax);
xMOV(eax, ptr32[&psxRegs.GPR.n.lo]);
xMOV(ptr32[&psxRegs.GPR.r[_Rd_]], eax);
}
void rpsxMTLO()
@ -933,8 +932,8 @@ void rpsxMTLO()
}
else {
_psxDeleteReg(_Rs_, 1);
xMOV(eax, ptr[&psxRegs.GPR.r[_Rs_]]);
xMOV(ptr[&psxRegs.GPR.n.lo], eax);
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rs_]]);
xMOV(ptr32[&psxRegs.GPR.n.lo], eax);
}
}
@ -965,8 +964,8 @@ void rpsxJR()
void rpsxJALR()
{
// jalr Rs
_allocX86reg(esi, X86TYPE_PCWRITEBACK, 0, MODE_WRITE);
_psxMoveGPRtoR(esi, _Rs_);
_allocX86reg(calleeSavedReg2d, X86TYPE_PCWRITEBACK, 0, MODE_WRITE);
_psxMoveGPRtoR(calleeSavedReg2d, _Rs_);
if ( _Rd_ )
{
@ -977,18 +976,18 @@ void rpsxJALR()
psxRecompileNextInstruction(1);
if( x86regs[esi.GetId()].inuse ) {
pxAssert( x86regs[esi.GetId()].type == X86TYPE_PCWRITEBACK );
xMOV(ptr[&psxRegs.pc], esi);
x86regs[esi.GetId()].inuse = 0;
if( x86regs[calleeSavedReg2d.GetId()].inuse ) {
pxAssert( x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK );
xMOV(ptr32[&psxRegs.pc], calleeSavedReg2d);
x86regs[calleeSavedReg2d.GetId()].inuse = 0;
#ifdef PCSX2_DEBUG
xOR( esi, esi );
xOR( calleeSavedReg2d, calleeSavedReg2d );
#endif
}
else {
xMOV(eax, ptr[&g_recWriteback]);
xMOV(ptr[&psxRegs.pc], eax);
xMOV(eax, ptr32[&g_recWriteback]);
xMOV(ptr32[&psxRegs.pc], eax);
#ifdef PCSX2_DEBUG
xOR( eax, eax );
#endif
@ -1016,8 +1015,8 @@ void rpsxSetBranchEQ(int info, int process)
s_pbranchjmp = JNE32( 0 );
}
else {
xMOV(eax, ptr[&psxRegs.GPR.r[ _Rs_ ] ]);
xCMP(eax, ptr[&psxRegs.GPR.r[ _Rt_ ] ]);
xMOV(eax, ptr32[&psxRegs.GPR.r[ _Rs_ ] ]);
xCMP(eax, ptr32[&psxRegs.GPR.r[ _Rt_ ] ]);
s_pbranchjmp = JNE32( 0 );
}
}
@ -1342,8 +1341,8 @@ void rpsxMFC0()
if (!_Rt_) return;
_psxOnWriteReg(_Rt_);
xMOV(eax, ptr[&psxRegs.CP0.r[_Rd_]]);
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax);
xMOV(eax, ptr32[&psxRegs.CP0.r[_Rd_]]);
xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax);
}
void rpsxCFC0()
@ -1352,8 +1351,8 @@ void rpsxCFC0()
if (!_Rt_) return;
_psxOnWriteReg(_Rt_);
xMOV(eax, ptr[&psxRegs.CP0.r[_Rd_]]);
xMOV(ptr[&psxRegs.GPR.r[_Rt_]], eax);
xMOV(eax, ptr32[&psxRegs.CP0.r[_Rd_]]);
xMOV(ptr32[&psxRegs.GPR.r[_Rt_]], eax);
}
void rpsxMTC0()
@ -1364,8 +1363,8 @@ void rpsxMTC0()
}
else {
_psxDeleteReg(_Rt_, 1);
xMOV(eax, ptr[&psxRegs.GPR.r[_Rt_]]);
xMOV(ptr[&psxRegs.CP0.r[_Rd_]], eax);
xMOV(eax, ptr32[&psxRegs.GPR.r[_Rt_]]);
xMOV(ptr32[&psxRegs.CP0.r[_Rd_]], eax);
}
}
@ -1377,13 +1376,13 @@ void rpsxCTC0()
void rpsxRFE()
{
xMOV(eax, ptr[&psxRegs.CP0.n.Status]);
xMOV(eax, ptr32[&psxRegs.CP0.n.Status]);
xMOV(ecx, eax);
xAND(eax, 0xfffffff0);
xAND(ecx, 0x3c);
xSHR(ecx, 2);
xOR(eax, ecx);
xMOV(ptr[&psxRegs.CP0.n.Status], eax);
xMOV(ptr32[&psxRegs.CP0.n.Status], eax);
// Test the IOP's INTC status, so that any pending ints get raised.

View File

@ -104,7 +104,7 @@ extern u32 g_cpuHasConstReg, g_cpuFlushedConstReg;
u32* _eeGetConstReg(int reg);
// finds where the GPR is stored and moves lower 32 bits to EAX
void _eeMoveGPRtoR(const x86Emitter::xRegisterLong& to, int fromgpr);
void _eeMoveGPRtoR(const x86Emitter::xRegister32& to, int fromgpr);
void _eeMoveGPRtoM(uptr to, int fromgpr);
void _eeMoveGPRtoRm(x86IntRegType to, int fromgpr);
void eeSignExtendTo(int gpr, bool onlyupper=false);

View File

@ -239,7 +239,17 @@ void _flushConstRegs()
}
}
int _allocX86reg(xRegisterLong x86reg, int type, int reg, int mode)
int _allocX86reg(xRegisterEmpty x86reg, int type, int reg, int mode)
{
return _allocX86reg(xRegister32(x86reg), type, reg, mode);
}
int _allocX86reg(xRegister64 x86reg, int type, int reg, int mode)
{
return _allocX86reg(xRegister32(x86reg.Id), type, reg, mode);
}
int _allocX86reg(xRegister32 x86reg, int type, int reg, int mode)
{
uint i;
pxAssertDev( reg >= 0 && reg < 32, "Register index out of bounds." );
@ -313,7 +323,7 @@ int _allocX86reg(xRegisterLong x86reg, int type, int reg, int mode)
}
if (x86reg.IsEmpty())
x86reg = xRegisterLong(_getFreeX86reg(oldmode));
x86reg = xRegister32(_getFreeX86reg(oldmode));
else
_freeX86reg(x86reg);
@ -440,7 +450,7 @@ void _deleteX86reg(int type, int reg, int flush)
}
// Temporary solution to support eax/ebx... type
void _freeX86reg(const x86Emitter::xRegisterLong& x86reg)
void _freeX86reg(const x86Emitter::xRegister32& x86reg)
{
_freeX86reg(x86reg.GetId());
}

View File

@ -51,7 +51,7 @@ using namespace R5900;
u32 maxrecmem = 0;
static __aligned16 uptr recLUT[_64kb];
static __aligned16 uptr hwLUT[_64kb];
static __aligned16 u32 hwLUT[_64kb];
static __fi u32 HWADDR(u32 mem) { return hwLUT[mem >> 16] + mem; }
@ -75,7 +75,7 @@ static const int RECCONSTBUF_SIZE = 16384 * 2; // 64 bit consts in 32 bit units
static RecompiledCodeReserve* recMem = NULL;
static u8* recRAMCopy = NULL;
static u8* recLutReserve_RAM = NULL;
static const size_t recLutSize = Ps2MemSize::MainRam + Ps2MemSize::Rom + Ps2MemSize::Rom1 + Ps2MemSize::Rom2;
static const size_t recLutSize = (Ps2MemSize::MainRam + Ps2MemSize::Rom + Ps2MemSize::Rom1 + Ps2MemSize::Rom2) * wordsize / 4;
static uptr m_ConfiguredCacheReserve = 64;
@ -153,7 +153,7 @@ u32* _eeGetConstReg(int reg)
return &cpuRegs.GPR.r[ reg ].UL[0];
}
void _eeMoveGPRtoR(const xRegisterLong& to, int fromgpr)
void _eeMoveGPRtoR(const xRegister32& to, int fromgpr)
{
if( fromgpr == 0 )
xXOR(to, to); // zero register should use xor, thanks --air
@ -346,13 +346,17 @@ static DynGenFunc* _DynGen_JITCompile()
u8* retval = xGetAlignedCallTarget();
xFastCall((void*)recRecompile, ptr[&cpuRegs.pc] );
xFastCall((void*)recRecompile, ptr32[&cpuRegs.pc] );
// C equivalent:
// u32 addr = cpuRegs.pc;
// void(**base)() = (void(**)())recLUT[addr >> 16];
// base[addr >> 2]();
xMOV( eax, ptr[&cpuRegs.pc] );
xMOV( ebx, eax );
xSHR( eax, 16 );
xMOV( ecx, ptr[recLUT + (eax*4)] );
xJMP( ptr32[ecx+ebx] );
xMOV( rcx, ptrNative[xComplexAddress(rcx, recLUT, rax*wordsize)] );
xJMP( ptrNative[rbx*(wordsize/4) + rcx] );
return (DynGenFunc*)retval;
}
@ -369,11 +373,15 @@ static DynGenFunc* _DynGen_DispatcherReg()
{
u8* retval = xGetPtr(); // fallthrough target, can't align it!
// C equivalent:
// u32 addr = cpuRegs.pc;
// void(**base)() = (void(**)())recLUT[addr >> 16];
// base[addr >> 2]();
xMOV( eax, ptr[&cpuRegs.pc] );
xMOV( ebx, eax );
xSHR( eax, 16 );
xMOV( ecx, ptr[recLUT + (eax*4)] );
xJMP( ptr32[ecx+ebx] );
xMOV( rcx, ptrNative[xComplexAddress(rcx, recLUT, rax*wordsize)] );
xJMP( ptrNative[rbx*(wordsize/4) + rcx] );
return (DynGenFunc*)retval;
}
@ -461,7 +469,7 @@ static void _DynGen_Dispatchers()
static __ri void ClearRecLUT(BASEBLOCK* base, int memsize)
{
for (int i = 0; i < memsize/4; i++)
for (int i = 0; i < memsize/(int)sizeof(uptr); i++)
base[i].SetFnptr((uptr)JITCompile);
}
@ -521,7 +529,7 @@ static void recAlloc()
for (int i = 0; i < 0x10000; i++)
recLUT_SetPage(recLUT, 0, 0, 0, i, 0);
for ( int i = 0x0000; i < Ps2MemSize::MainRam / 0x10000; i++ )
for ( int i = 0x0000; i < (int)(Ps2MemSize::MainRam / 0x10000); i++ )
{
recLUT_SetPage(recLUT, hwLUT, recRAM, 0x0000, i, i);
recLUT_SetPage(recLUT, hwLUT, recRAM, 0x2000, i, i);
@ -864,21 +872,21 @@ void SetBranchReg( u32 reg )
// xMOV(ptr[&cpuRegs.pc], eax);
// }
// }
_allocX86reg(esi, X86TYPE_PCWRITEBACK, 0, MODE_WRITE);
_eeMoveGPRtoR(esi, reg);
_allocX86reg(calleeSavedReg2d, X86TYPE_PCWRITEBACK, 0, MODE_WRITE);
_eeMoveGPRtoR(calleeSavedReg2d, reg);
if (EmuConfig.Gamefixes.GoemonTlbHack) {
xMOV(ecx, esi);
xMOV(ecx, calleeSavedReg2d);
vtlb_DynV2P();
xMOV(esi, eax);
xMOV(calleeSavedReg2d, eax);
}
recompileNextInstruction(1);
if( x86regs[esi.GetId()].inuse ) {
pxAssert( x86regs[esi.GetId()].type == X86TYPE_PCWRITEBACK );
xMOV(ptr[&cpuRegs.pc], esi);
x86regs[esi.GetId()].inuse = 0;
if( x86regs[calleeSavedReg2d.GetId()].inuse ) {
pxAssert( x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK );
xMOV(ptr[&cpuRegs.pc], calleeSavedReg2d);
x86regs[calleeSavedReg2d.GetId()].inuse = 0;
}
else {
xMOV(eax, ptr[&g_recWriteback]);
@ -1525,8 +1533,8 @@ static void memory_protect_recompiled_code(u32 startpc, u32 size)
break;
case ProtMode_Manual:
xMOV( ecx, inpage_ptr );
xMOV( edx, inpage_sz / 4 );
xMOV( arg1regd, inpage_ptr );
xMOV( arg2regd, inpage_sz / 4 );
//xMOV( eax, startpc ); // uncomment this to access startpc (as eax) in dyna_block_discard
u32 lpc = inpage_ptr;
@ -1737,7 +1745,7 @@ static void __fastcall recRecompile( const u32 startpc )
// Game will unmap some virtual addresses. If a constant address were hardcoded in the block, we would be in a bad situation.
eeRecNeedsReset = true;
// 0x3563b8 is the start address of the function that invalidate entry in TLB cache
xFastCall((void*)GoemonUnloadTlb, ptr[&cpuRegs.GPR.n.a0.UL[0]]);
xFastCall((void*)GoemonUnloadTlb, ptr32[&cpuRegs.GPR.n.a0.UL[0]]);
}
}

View File

@ -148,7 +148,7 @@ void recSLTIU_(int info)
x86SetJ8(j8Ptr[0]);
x86SetJ8(j8Ptr[1]);
xMOV(ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], eax);
xMOV(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], eax);
xMOV(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ]], 0 );
}
@ -178,7 +178,7 @@ void recSLTI_(int info)
x86SetJ8(j8Ptr[0]);
x86SetJ8(j8Ptr[1]);
xMOV(ptr[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], eax);
xMOV(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]], eax);
xMOV(ptr32[&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ]], 0 );
}

View File

@ -103,13 +103,13 @@ void recJALR()
EE::Profiler.EmitOp(eeOpcode::JALR);
int newpc = pc + 4;
_allocX86reg(esi, X86TYPE_PCWRITEBACK, 0, MODE_WRITE);
_eeMoveGPRtoR(esi, _Rs_);
_allocX86reg(calleeSavedReg2d, X86TYPE_PCWRITEBACK, 0, MODE_WRITE);
_eeMoveGPRtoR(calleeSavedReg2d, _Rs_);
if (EmuConfig.Gamefixes.GoemonTlbHack) {
xMOV(ecx, esi);
xMOV(ecx, calleeSavedReg2d);
vtlb_DynV2P();
xMOV(esi, eax);
xMOV(calleeSavedReg2d, eax);
}
// uncomment when there are NO instructions that need to call interpreter
// int mmreg;
@ -147,10 +147,10 @@ void recJALR()
_clearNeededXMMregs();
recompileNextInstruction(1);
if( x86regs[esi.GetId()].inuse ) {
pxAssert( x86regs[esi.GetId()].type == X86TYPE_PCWRITEBACK );
xMOV(ptr[&cpuRegs.pc], esi);
x86regs[esi.GetId()].inuse = 0;
if( x86regs[calleeSavedReg2d.GetId()].inuse ) {
pxAssert( x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK );
xMOV(ptr[&cpuRegs.pc], calleeSavedReg2d);
x86regs[calleeSavedReg2d.GetId()].inuse = 0;
}
else {
xMOV(eax, ptr[&g_recWriteback]);

View File

@ -99,13 +99,13 @@ void recLoad64( u32 bits, bool sign )
{
pxAssume( bits == 64 || bits == 128 );
// Load EDX with the destination.
// Load arg2 with the destination.
// 64/128 bit modes load the result directly into the cpuRegs.GPR struct.
if (_Rt_)
xMOV(edx, (uptr)&cpuRegs.GPR.r[_Rt_].UL[0]);
xLEA(arg2reg, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
else
xMOV(edx, (uptr)&dummyValue[0]);
xLEA(arg2reg, ptr[&dummyValue[0]]);
if (GPR_IS_CONST1(_Rs_))
{
@ -121,11 +121,11 @@ void recLoad64( u32 bits, bool sign )
else
{
// Load ECX with the source memory address that we're reading from.
_eeMoveGPRtoR(ecx, _Rs_);
_eeMoveGPRtoR(arg1regd, _Rs_);
if (_Imm_ != 0)
xADD(ecx, _Imm_);
xADD(arg1regd, _Imm_);
if (bits == 128) // force 16 byte alignment on 128 bit reads
xAND(ecx, ~0x0F);
xAND(arg1regd, ~0x0F);
_eeOnLoadWrite(_Rt_);
_deleteEEreg(_Rt_, 0);
@ -154,10 +154,10 @@ void recLoad32( u32 bits, bool sign )
}
else
{
// Load ECX with the source memory address that we're reading from.
_eeMoveGPRtoR(ecx, _Rs_);
// Load arg1 with the source memory address that we're reading from.
_eeMoveGPRtoR(arg1regd, _Rs_);
if (_Imm_ != 0)
xADD(ecx, _Imm_ );
xADD(arg1regd, _Imm_ );
_eeOnLoadWrite(_Rt_);
_deleteEEreg(_Rt_, 0);
@ -194,12 +194,12 @@ void recStore(u32 bits)
if (bits < 64)
{
_eeMoveGPRtoR(edx, _Rt_);
_eeMoveGPRtoR(arg2regd, _Rt_);
}
else if (bits == 128 || bits == 64)
{
_flushEEreg(_Rt_); // flush register to mem
xMOV(edx, (uptr)&cpuRegs.GPR.r[_Rt_].UL[0]);
xLEA(arg2reg, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
}
// Load ECX with the destination address, or issue a direct optimized write
@ -215,11 +215,11 @@ void recStore(u32 bits)
}
else
{
_eeMoveGPRtoR(ecx, _Rs_);
_eeMoveGPRtoR(arg1regd, _Rs_);
if (_Imm_ != 0)
xADD(ecx, _Imm_);
xADD(arg1regd, _Imm_);
if (bits == 128)
xAND(ecx, ~0x0F);
xAND(arg1regd, ~0x0F);
iFlushCall(FLUSH_FULLVTLB);
@ -253,30 +253,30 @@ void recLWL()
iFlushCall(FLUSH_FULLVTLB);
_deleteEEreg(_Rt_, 1);
_eeMoveGPRtoR(ecx, _Rs_);
_eeMoveGPRtoR(arg1regd, _Rs_);
if (_Imm_ != 0)
xADD(ecx, _Imm_);
xADD(arg1regd, _Imm_);
// edi = bit offset in word
xMOV(edi, ecx);
xAND(edi, 3);
xSHL(edi, 3);
// calleeSavedReg1 = bit offset in word
xMOV(calleeSavedReg1d, arg1regd);
xAND(calleeSavedReg1d, 3);
xSHL(calleeSavedReg1d, 3);
xAND(ecx, ~3);
xAND(arg1regd, ~3);
vtlb_DynGenRead32(32, false);
if (!_Rt_)
return;
// mask off bytes loaded
xMOV(ecx, edi);
xMOV(ecx, calleeSavedReg1d);
xMOV(edx, 0xffffff);
xSHR(edx, cl);
xAND(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], edx);
// OR in bytes loaded
xMOV(ecx, 24);
xSUB(ecx, edi);
xNEG(ecx);
xADD(ecx, 24);
xSHL(eax, cl);
xOR(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
@ -301,16 +301,16 @@ void recLWR()
iFlushCall(FLUSH_FULLVTLB);
_deleteEEreg(_Rt_, 1);
_eeMoveGPRtoR(ecx, _Rs_);
_eeMoveGPRtoR(arg1regd, _Rs_);
if (_Imm_ != 0)
xADD(ecx, _Imm_);
xADD(arg1regd, _Imm_);
// edi = bit offset in word
xMOV(edi, ecx);
xAND(edi, 3);
xSHL(edi, 3);
xMOV(calleeSavedReg1d, arg1regd);
xAND(calleeSavedReg1d, 3);
xSHL(calleeSavedReg1d, 3);
xAND(ecx, ~3);
xAND(arg1regd, ~3);
vtlb_DynGenRead32(32, false);
if (!_Rt_)
@ -318,17 +318,17 @@ void recLWR()
// mask off bytes loaded
xMOV(ecx, 24);
xSUB(ecx, edi);
xSUB(ecx, calleeSavedReg1d);
xMOV(edx, 0xffffff00);
xSHL(edx, cl);
xAND(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], edx);
// OR in bytes loaded
xMOV(ecx, edi);
xMOV(ecx, calleeSavedReg1d);
xSHR(eax, cl);
xOR(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
xCMP(edi, 0);
xCMP(ecx, 0);
xForwardJump8 nosignextend(Jcc_NotEqual);
// if ((addr & 3) == 0)
xCDQ();
@ -351,38 +351,38 @@ void recSWL()
#ifdef REC_STORES
iFlushCall(FLUSH_FULLVTLB);
_eeMoveGPRtoR(ecx, _Rs_);
_eeMoveGPRtoR(arg1regd, _Rs_);
if (_Imm_ != 0)
xADD(ecx, _Imm_);
xADD(arg1regd, _Imm_);
// edi = bit offset in word
xMOV(edi, ecx);
xAND(edi, 3);
xSHL(edi, 3);
xMOV(calleeSavedReg1d, arg1regd);
xAND(calleeSavedReg1d, 3);
xSHL(calleeSavedReg1d, 3);
xAND(ecx, ~3);
xAND(arg1regd, ~3);
vtlb_DynGenRead32(32, false);
// mask read -> edx
xMOV(ecx, edi);
xMOV(edx, 0xffffff00);
xSHL(edx, cl);
xAND(edx, eax);
// mask read -> arg2
xMOV(ecx, calleeSavedReg1d);
xMOV(arg2regd, 0xffffff00);
xSHL(arg2regd, cl);
xAND(arg2regd, eax);
if (_Rt_)
{
// mask write and OR -> edx
xMOV(ecx, 24);
xSUB(ecx, edi);
xNEG(ecx);
xADD(ecx, 24);
_eeMoveGPRtoR(eax, _Rt_);
xSHR(eax, cl);
xOR(edx, eax);
xOR(arg2regd, eax);
}
_eeMoveGPRtoR(ecx, _Rs_);
_eeMoveGPRtoR(arg1regd, _Rs_);
if (_Imm_ != 0)
xADD(ecx, _Imm_);
xAND(ecx, ~3);
xADD(arg1regd, _Imm_);
xAND(arg1regd, ~3);
vtlb_DynGenWrite(32);
#else
@ -401,38 +401,38 @@ void recSWR()
#ifdef REC_STORES
iFlushCall(FLUSH_FULLVTLB);
_eeMoveGPRtoR(ecx, _Rs_);
_eeMoveGPRtoR(arg1regd, _Rs_);
if (_Imm_ != 0)
xADD(ecx, _Imm_);
xADD(arg1regd, _Imm_);
// edi = bit offset in word
xMOV(edi, ecx);
xAND(edi, 3);
xSHL(edi, 3);
xMOV(calleeSavedReg1d, arg1regd);
xAND(calleeSavedReg1d, 3);
xSHL(calleeSavedReg1d, 3);
xAND(ecx, ~3);
xAND(arg1regd, ~3);
vtlb_DynGenRead32(32, false);
// mask read -> edx
xMOV(ecx, 24);
xSUB(ecx, edi);
xMOV(edx, 0xffffff);
xSHR(edx, cl);
xAND(edx, eax);
xSUB(ecx, calleeSavedReg1d);
xMOV(arg2regd, 0xffffff);
xSHR(arg2regd, cl);
xAND(arg2regd, eax);
if (_Rt_)
{
// mask write and OR -> edx
xMOV(ecx, edi);
xMOV(ecx, calleeSavedReg1d);
_eeMoveGPRtoR(eax, _Rt_);
xSHL(eax, cl);
xOR(edx, eax);
xOR(arg2regd, eax);
}
_eeMoveGPRtoR(ecx, _Rs_);
_eeMoveGPRtoR(arg1regd, _Rs_);
if (_Imm_ != 0)
xADD(ecx, _Imm_);
xAND(ecx, ~3);
xADD(arg1regd, _Imm_);
xAND(arg1regd, ~3);
vtlb_DynGenWrite(32);
#else
@ -512,9 +512,9 @@ void recLWC1()
}
else
{
_eeMoveGPRtoR(ecx, _Rs_);
_eeMoveGPRtoR(arg1regd, _Rs_);
if (_Imm_ != 0)
xADD(ecx, _Imm_);
xADD(arg1regd, _Imm_);
iFlushCall(FLUSH_FULLVTLB);
@ -536,7 +536,7 @@ void recSWC1()
#else
_deleteFPtoXMMreg(_Rt_, 1);
xMOV(edx, ptr32[&fpuRegs.fpr[_Rt_].UL] );
xMOV(arg2regd, ptr32[&fpuRegs.fpr[_Rt_].UL] );
if( GPR_IS_CONST1( _Rs_ ) )
{
@ -545,9 +545,9 @@ void recSWC1()
}
else
{
_eeMoveGPRtoR(ecx, _Rs_);
_eeMoveGPRtoR(arg1regd, _Rs_);
if (_Imm_ != 0)
xADD(ecx, _Imm_);
xADD(arg1regd, _Imm_);
iFlushCall(FLUSH_FULLVTLB);
@ -574,9 +574,9 @@ void recSWC1()
void recLQC2()
{
if (_Rt_)
xMOV(edx, (uptr)&VU0.VF[_Ft_].UD[0]);
xLEA(arg2reg, ptr[&VU0.VF[_Ft_].UD[0]]);
else
xMOV(edx, (uptr)&dummyValue[0]);
xLEA(arg2reg, ptr[&dummyValue[0]]);
if (GPR_IS_CONST1(_Rs_))
{
@ -586,9 +586,9 @@ void recLQC2()
}
else
{
_eeMoveGPRtoR(ecx, _Rs_);
_eeMoveGPRtoR(arg1regd, _Rs_);
if (_Imm_ != 0)
xADD(ecx, _Imm_);
xADD(arg1regd, _Imm_);
iFlushCall(FLUSH_FULLVTLB);
@ -602,7 +602,7 @@ void recLQC2()
void recSQC2()
{
xMOV(edx, (uptr)&VU0.VF[_Ft_].UD[0]);
xLEA(arg2reg, ptr[&VU0.VF[_Ft_].UD[0]]);
if (GPR_IS_CONST1(_Rs_))
{
@ -611,9 +611,9 @@ void recSQC2()
}
else
{
_eeMoveGPRtoR(ecx, _Rs_);
_eeMoveGPRtoR(arg1regd, _Rs_);
if (_Imm_ != 0)
xADD(ecx, _Imm_);
xADD(arg1regd, _Imm_);
iFlushCall(FLUSH_FULLVTLB);

View File

@ -73,6 +73,12 @@ static void iMOV128_SSE( const xIndirectVoid& destRm, const xIndirectVoid& srcRm
//
static void iMOV64_Smart( const xIndirectVoid& destRm, const xIndirectVoid& srcRm )
{
if (wordsize == 8) {
xMOV(rax, srcRm);
xMOV(destRm, rax);
return;
}
if( _hasFreeXMMreg() )
{
// Move things using MOVLPS:
@ -92,8 +98,8 @@ static void iMOV64_Smart( const xIndirectVoid& destRm, const xIndirectVoid& srcR
/*
// Pseudo-Code For the following Dynarec Implementations -->
u32 vmv=vmap[addr>>VTLB_PAGE_BITS];
s32 ppf=addr+vmv;
u32 vmv = vmap[addr>>VTLB_PAGE_BITS].raw();
sptr ppf=addr+vmv;
if (!(ppf<0))
{
data[0]=*reinterpret_cast<DataType*>(ppf);
@ -105,7 +111,7 @@ static void iMOV64_Smart( const xIndirectVoid& destRm, const xIndirectVoid& srcR
{
//has to: translate, find function, call function
u32 hand=(u8)vmv;
u32 paddr=ppf-hand+0x80000000;
u32 paddr=(ppf-hand) << 1;
//Console.WriteLn("Translated 0x%08X to 0x%08X",params addr,paddr);
return reinterpret_cast<TemplateHelper<DataSize,false>::HandlerType*>(RWFT[TemplateHelper<DataSize,false>::sidx][0][hand])(paddr,data);
}
@ -114,26 +120,28 @@ static void iMOV64_Smart( const xIndirectVoid& destRm, const xIndirectVoid& srcR
mov eax,ecx;
shr eax,VTLB_PAGE_BITS;
mov eax,[eax*4+vmap];
add ecx,eax;
mov rax,[rax*wordsize+vmap];
add rcx,rax;
js _fullread;
//these are wrong order, just an example ...
mov [eax],ecx;
mov ecx,[edx];
mov [eax+4],ecx;
mov ecx,[edx+4];
mov [eax+4+4],ecx;
mov ecx,[edx+4+4];
mov [eax+4+4+4+4],ecx;
mov ecx,[edx+4+4+4+4];
mov [rax],ecx;
mov ecx,[rdx];
mov [rax+4],ecx;
mov ecx,[rdx+4];
mov [rax+4+4],ecx;
mov ecx,[rdx+4+4];
mov [rax+4+4+4+4],ecx;
mov ecx,[rdx+4+4+4+4];
///....
jmp cont;
_fullread:
movzx eax,al;
sub ecx,eax;
#ifndef __M_X86_64 // The x86-64 marker will be cleared by using 32-bit ops
sub ecx,0x80000000;
#endif
call [eax+stuff];
cont:
........
@ -146,17 +154,16 @@ namespace vtlb_private
// Prepares eax, ecx, and, ebx for Direct or Indirect operations.
// Returns the writeback pointer for ebx (return address from indirect handling)
//
static uptr* DynGen_PrepRegs()
static u32* DynGen_PrepRegs()
{
// Warning dirty ebx (in case someone got the very bad idea to move this code)
EE::Profiler.EmitMem();
xMOV( eax, ecx );
xMOV( eax, arg1regd );
xSHR( eax, VTLB_PAGE_BITS );
xMOV( eax, ptr[(eax*4) + vtlbdata.vmap] );
xMOV( ebx, 0xcdcdcdcd );
uptr* writeback = ((uptr*)xGetPtr()) - 1;
xADD( ecx, eax );
xMOV( rax, ptrNative[xComplexAddress(rbx, vtlbdata.vmap, rax*wordsize)] );
u32* writeback = xLEA_Writeback( rbx );
xADD( arg1reg, rax );
return writeback;
}
@ -168,28 +175,28 @@ namespace vtlb_private
{
case 8:
if( sign )
xMOVSX( eax, ptr8[ecx] );
xMOVSX( eax, ptr8[arg1reg] );
else
xMOVZX( eax, ptr8[ecx] );
xMOVZX( eax, ptr8[arg1reg] );
break;
case 16:
if( sign )
xMOVSX( eax, ptr16[ecx] );
xMOVSX( eax, ptr16[arg1reg] );
else
xMOVZX( eax, ptr16[ecx] );
xMOVZX( eax, ptr16[arg1reg] );
break;
case 32:
xMOV( eax, ptr[ecx] );
xMOV( eax, ptr[arg1reg] );
break;
case 64:
iMOV64_Smart( ptr[edx], ptr[ecx] );
iMOV64_Smart( ptr[arg2reg], ptr[arg1reg] );
break;
case 128:
iMOV128_SSE( ptr[edx], ptr[ecx] );
iMOV128_SSE( ptr[arg2reg], ptr[arg1reg] );
break;
jNO_DEFAULT
@ -199,27 +206,29 @@ namespace vtlb_private
// ------------------------------------------------------------------------
static void DynGen_DirectWrite( u32 bits )
{
// TODO: x86Emitter can't use dil (and xRegister8(rdi.Id) is not dil)
switch(bits)
{
//8 , 16, 32 : data on EDX
case 8:
xMOV( ptr[ecx], dl );
xMOV( edx, arg2regd );
xMOV( ptr[arg1reg], dl );
break;
case 16:
xMOV( ptr[ecx], dx );
xMOV( ptr[arg1reg], xRegister16(arg2reg.Id) );
break;
case 32:
xMOV( ptr[ecx], edx );
xMOV( ptr[arg1reg], arg2regd );
break;
case 64:
iMOV64_Smart( ptr[ecx], ptr[edx] );
iMOV64_Smart( ptr[arg1reg], ptr[arg2reg] );
break;
case 128:
iMOV128_SSE( ptr[ecx], ptr[edx] );
iMOV128_SSE( ptr[arg1reg], ptr[arg2reg] );
break;
}
}
@ -274,15 +283,23 @@ static void DynGen_IndirectDispatch( int mode, int bits, bool sign = false )
// ------------------------------------------------------------------------
// Generates the various instances of the indirect dispatchers
// In: arg1reg: vtlb entry, arg2reg: data ptr (if mode >= 64), rbx: function return ptr
// Out: eax: result (if mode < 64)
static void DynGen_IndirectTlbDispatcher( int mode, int bits, bool sign )
{
xMOVZX( eax, al );
xSUB( ecx, 0x80000000 );
xSUB( ecx, eax );
if (wordsize != 8) xSUB( arg1regd, 0x80000000 );
xSUB( arg1regd, eax );
// jump to the indirect handler, which is a __fastcall C++ function.
// [ecx is address, edx is data]
xFastCall(ptr32[(eax*4) + vtlbdata.RWFT[bits][mode]], ecx, edx);
sptr table = (sptr)vtlbdata.RWFT[bits][mode];
if (table == (s32)table) {
xFastCall(ptrNative[(rax*wordsize) + table], arg1reg, arg2reg);
} else {
xLEA(arg3reg, ptr[(void*)table]);
xFastCall(ptrNative[(rax*wordsize) + arg3reg], arg1reg, arg2reg);
}
if (!mode)
{
@ -302,7 +319,7 @@ static void DynGen_IndirectTlbDispatcher( int mode, int bits, bool sign )
}
}
xJMP( ebx );
xJMP( rbx );
}
// One-time initialization procedure. Multiple subsequent calls during the lifespan of the
@ -338,18 +355,30 @@ void vtlb_dynarec_init()
Perf::any.map((uptr)m_IndirectDispatchers, __pagesize, "TLB Dispatcher");
}
static void vtlb_SetWriteback(u32 *writeback)
{
uptr val = (uptr)xGetPtr();
if (wordsize == 8)
{
pxAssertMsg(*((u8*)writeback - 2) == 0x8d, "Expected codegen to be an LEA");
val -= ((uptr)writeback + 4);
}
pxAssertMsg((sptr)val == (s32)val, "Writeback too far away!");
*writeback = val;
}
//////////////////////////////////////////////////////////////////////////////////////////
// Dynarec Load Implementations
void vtlb_DynGenRead64(u32 bits)
{
pxAssume( bits == 64 || bits == 128 );
uptr* writeback = DynGen_PrepRegs();
u32* writeback = DynGen_PrepRegs();
DynGen_IndirectDispatch( 0, bits );
DynGen_DirectRead( bits, false );
*writeback = (uptr)xGetPtr(); // return target for indirect's call/ret
vtlb_SetWriteback(writeback); // return target for indirect's call/ret
}
// ------------------------------------------------------------------------
@ -360,12 +389,12 @@ void vtlb_DynGenRead32(u32 bits, bool sign)
{
pxAssume( bits <= 32 );
uptr* writeback = DynGen_PrepRegs();
u32* writeback = DynGen_PrepRegs();
DynGen_IndirectDispatch( 0, bits, sign && bits < 32 );
DynGen_DirectRead( bits, sign );
*writeback = (uptr)xGetPtr();
vtlb_SetWriteback(writeback);
}
// ------------------------------------------------------------------------
@ -378,15 +407,15 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const )
auto vmv = vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS];
if( !vmv.isHandler(addr_const) )
{
auto ppf = vmv.assumeHandlerGetPAddr(addr_const);
auto ppf = vmv.assumePtr(addr_const);
switch( bits )
{
case 64:
iMOV64_Smart( ptr[edx], ptr[(void*)ppf] );
iMOV64_Smart( ptr[arg2reg], ptr[(void*)ppf] );
break;
case 128:
iMOV128_SSE( ptr[edx], ptr[(void*)ppf] );
iMOV128_SSE( ptr[arg2reg], ptr[(void*)ppf] );
break;
jNO_DEFAULT
@ -405,7 +434,7 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const )
}
iFlushCall(FLUSH_FULLVTLB);
xFastCall( vmv.assumeHandlerGetRaw(szidx, 0), paddr );
xFastCall( vmv.assumeHandlerGetRaw(szidx, 0), paddr, arg2reg );
}
}
@ -442,7 +471,7 @@ void vtlb_DynGenRead32_Const( u32 bits, bool sign, u32 addr_const )
break;
case 32:
xMOV( eax, ptr[(void*)ppf] );
xMOV( eax, ptr32[(u32*)ppf] );
break;
}
}
@ -494,12 +523,12 @@ void vtlb_DynGenRead32_Const( u32 bits, bool sign, u32 addr_const )
void vtlb_DynGenWrite(u32 sz)
{
uptr* writeback = DynGen_PrepRegs();
u32* writeback = DynGen_PrepRegs();
DynGen_IndirectDispatch( 1, sz );
DynGen_DirectWrite( sz );
*writeback = (uptr)xGetPtr();
vtlb_SetWriteback(writeback);
}
@ -514,28 +543,30 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const )
auto vmv = vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS];
if( !vmv.isHandler(addr_const) )
{
// TODO: x86Emitter can't use dil (and xRegister8(rdi.Id) is not dil)
auto ppf = vmv.assumePtr(addr_const);
switch(bits)
{
//8 , 16, 32 : data on EDX
//8 , 16, 32 : data on arg2
case 8:
xMOV( edx, arg2regd );
xMOV( ptr[(void*)ppf], dl );
break;
case 16:
xMOV( ptr[(void*)ppf], dx );
xMOV( ptr[(void*)ppf], xRegister16(arg2reg.Id) );
break;
case 32:
xMOV( ptr[(void*)ppf], edx );
xMOV( ptr[(void*)ppf], arg2regd );
break;
case 64:
iMOV64_Smart( ptr[(void*)ppf], ptr[edx] );
iMOV64_Smart( ptr[(void*)ppf], ptr[arg2reg] );
break;
case 128:
iMOV128_SSE( ptr[(void*)ppf], ptr[edx] );
iMOV128_SSE( ptr[(void*)ppf], ptr[arg2reg] );
break;
}
@ -556,7 +587,7 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const )
}
iFlushCall(FLUSH_FULLVTLB);
xFastCall( vmv.assumeHandlerGetRaw(szidx, true), paddr, edx );
xFastCall( vmv.assumeHandlerGetRaw(szidx, true), paddr, arg2reg );
}
}
@ -565,13 +596,14 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const )
// ecx - virtual address
// Returns physical address in eax.
// Clobbers edx
void vtlb_DynV2P()
{
xMOV(eax, ecx);
xAND(ecx, VTLB_PAGE_MASK); // vaddr & VTLB_PAGE_MASK
xSHR(eax, VTLB_PAGE_BITS);
xMOV(eax, ptr[(eax*4) + vtlbdata.ppmap]); //vtlbdata.ppmap[vaddr>>VTLB_PAGE_BITS];
xMOV(eax, ptr[xComplexAddress(rdx, vtlbdata.ppmap, rax*4)]); //vtlbdata.ppmap[vaddr>>VTLB_PAGE_BITS];
xOR(eax, ecx);
}

View File

@ -178,25 +178,25 @@ void normJumpCompile(mV, microFlagCycles& mFC, bool isEvilJump) {
mVUpBlock->jumpCache = new microJumpCache[mProgSize/2];
}
if (isEvilJump) xMOV(gprT2, ptr32[&mVU.evilBranch]);
else xMOV(gprT2, ptr32[&mVU.branch]);
if (doJumpCaching) xMOV(gprT3, (uptr)mVUpBlock);
else xMOV(gprT3, (uptr)&mVUpBlock->pStateEnd);
if (isEvilJump) xMOV(arg1regd, ptr32[&mVU.evilBranch]);
else xMOV(arg1regd, ptr32[&mVU.branch]);
if (doJumpCaching) xLoadFarAddr(arg2reg, mVUpBlock);
else xLoadFarAddr(arg2reg, &mVUpBlock->pStateEnd);
if(mVUup.eBit && isEvilJump)// E-bit EvilJump
{
//Xtreme G 3 does 2 conditional jumps, the first contains an E Bit on the first instruction
//So if it is taken, you need to end the program, else you get infinite loops.
mVUendProgram(mVU, &mFC, 2);
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], gprT2);
xMOV(ptr32[&mVU.regs().VI[REG_TPC].UL], arg1regd);
xJMP(mVU.exitFunct);
}
if (!mVU.index) xFastCall((void*)(void(*)())mVUcompileJIT<0>, gprT2, gprT3); //(u32 startPC, uptr pState)
else xFastCall((void*)(void(*)())mVUcompileJIT<1>, gprT2, gprT3);
if (!mVU.index) xFastCall((void*)(void(*)())mVUcompileJIT<0>, arg1reg, arg2reg); //(u32 startPC, uptr pState)
else xFastCall((void*)(void(*)())mVUcompileJIT<1>, arg1reg, arg2reg);
mVUrestoreRegs(mVU);
xJMP(gprT1); // Jump to rec-code address
xJMP(gprT1q); // Jump to rec-code address
}
void normBranch(mV, microFlagCycles& mFC) {

View File

@ -27,8 +27,8 @@ void mVUdispatcherAB(mV) {
xScopedStackFrame frame(false, true);
// __fastcall = The caller has already put the needed parameters in ecx/edx:
if (!isVU1) { xFastCall((void*)mVUexecuteVU0, ecx, edx); }
else { xFastCall((void*)mVUexecuteVU1, ecx, edx); }
if (!isVU1) { xFastCall((void*)mVUexecuteVU0, arg1reg, arg2reg); }
else { xFastCall((void*)mVUexecuteVU1, arg1reg, arg2reg); }
// Load VU's MXCSR state
xLDMXCSR(g_sseVUMXCSR);
@ -52,7 +52,7 @@ void mVUdispatcherAB(mV) {
xSHUF.PS(xmmPQ, xmmT1, 0); // wzyx = PPQQ
// Jump to Recompiled Code Block
xJMP(eax);
xJMP(rax);
mVU.exitFunct = x86Ptr;
@ -89,7 +89,7 @@ void mVUdispatcherCD(mV) {
xMOV(gprF3, ptr32[&mVU.statFlag[3]]);
// Jump to Recompiled Code Block
xJMP(ptr32[&mVU.resumePtrXG]);
xJMP(ptrNative[&mVU.resumePtrXG]);
mVU.exitFunctXG = x86Ptr;

View File

@ -283,7 +283,7 @@ __fi void mVUsetupFlags(mV, microFlagCycles& mFC) {
xMOV(gprF3, getFlagReg(bStatus[3]));
}
else if (sortRegs == 2) {
xMOV(gprT1, getFlagReg (bStatus[3]));
xMOV(gprT1, getFlagReg (bStatus[3]));
xMOV(gprF0, getFlagReg (bStatus[0]));
xMOV(gprF1, getFlagReg2(bStatus[1]));
xMOV(gprF2, getFlagReg2(bStatus[2]));
@ -291,7 +291,7 @@ __fi void mVUsetupFlags(mV, microFlagCycles& mFC) {
}
else if (sortRegs == 3) {
int gFlag = (bStatus[0] == bStatus[1]) ? bStatus[2] : bStatus[1];
xMOV(gprT1, getFlagReg (gFlag));
xMOV(gprT1, getFlagReg (gFlag));
xMOV(gprT2, getFlagReg (bStatus[3]));
xMOV(gprF0, getFlagReg (bStatus[0]));
xMOV(gprF1, getFlagReg3(bStatus[1]));
@ -299,12 +299,12 @@ __fi void mVUsetupFlags(mV, microFlagCycles& mFC) {
xMOV(gprF3, gprT2);
}
else {
xMOV(gprT1, getFlagReg(bStatus[0]));
xMOV(gprT1, getFlagReg(bStatus[0]));
xMOV(gprT2, getFlagReg(bStatus[1]));
xMOV(gprT3, getFlagReg(bStatus[2]));
xMOV(gprF3, getFlagReg(bStatus[3]));
xMOV(gprF0, gprT1);
xMOV(gprF1, gprT2);
xMOV(gprF1, gprT2);
xMOV(gprF2, gprT3);
}
}

View File

@ -841,16 +841,14 @@ mVUop(mVU_ILW) {
analyzeVIreg2(mVU, _It_, mVUlow.VI_write, 4);
}
pass2 {
xAddressVoid ptr(mVU.regs().Mem + offsetSS);
void *ptr = mVU.regs().Mem + offsetSS;
mVUallocVIa(mVU, gprT2, _Is_);
if (!_Is_)
xXOR(gprT2, gprT2);
xADD(gprT2, _Imm11_);
mVUaddrFix (mVU, gprT2);
ptr += gprT2;
xMOVZX(gprT1, ptr16[ptr]);
mVUaddrFix (mVU, gprT2q);
xMOVZX(gprT1, ptr16[xComplexAddress(gprT3q, ptr, gprT2q)]);
mVUallocVIb(mVU, gprT1, _It_);
mVU.profiler.EmitOp(opILW);
}
@ -866,13 +864,14 @@ mVUop(mVU_ILWR) {
analyzeVIreg2(mVU, _It_, mVUlow.VI_write, 4);
}
pass2 {
xAddressVoid ptr(mVU.regs().Mem + offsetSS);
void *ptr = mVU.regs().Mem + offsetSS;
if (_Is_) {
mVUallocVIa(mVU, gprT2, _Is_);
mVUaddrFix (mVU, gprT2);
ptr += gprT2;
mVUaddrFix (mVU, gprT2q);
xMOVZX(gprT1, ptr16[xComplexAddress(gprT3q, ptr, gprT2q)]);
} else {
xMOVZX(gprT1, ptr16[ptr]);
}
xMOVZX(gprT1, ptr16[ptr]);
mVUallocVIb(mVU, gprT1, _It_);
mVU.profiler.EmitOp(opILWR);
}
@ -883,26 +882,44 @@ mVUop(mVU_ILWR) {
// ISW/ISWR
//------------------------------------------------------------------
static void writeBackISW(microVU& mVU, void *base_ptr, xAddressReg reg) {
if (!reg.IsEmpty() && (sptr)base_ptr != (s32)(sptr)base_ptr) {
int register_offset = -1;
auto writeBackAt = [&](int offset){
if (register_offset == -1) {
xLEA(gprT3q, ptr[(void*)((sptr)base_ptr + offset)]);
register_offset = offset;
}
xMOV(ptr32[gprT3q+gprT2q+(offset-register_offset)], gprT1);
};
if (_X) writeBackAt(0);
if (_Y) writeBackAt(4);
if (_Z) writeBackAt(8);
if (_W) writeBackAt(12);
} else {
if (_X) xMOV(ptr32[base_ptr+reg], gprT1);
if (_Y) xMOV(ptr32[base_ptr+reg+4], gprT1);
if (_Z) xMOV(ptr32[base_ptr+reg+8], gprT1);
if (_W) xMOV(ptr32[base_ptr+reg+12], gprT1);
}
}
mVUop(mVU_ISW) {
pass1 {
analyzeVIreg1(mVU, _Is_, mVUlow.VI_read[0]);
analyzeVIreg1(mVU, _It_, mVUlow.VI_read[1]);
}
pass2 {
xAddressVoid ptr(mVU.regs().Mem);
void *ptr = mVU.regs().Mem;
mVUallocVIa(mVU, gprT2, _Is_);
if (!_Is_)
xXOR(gprT2, gprT2);
xADD(gprT2, _Imm11_);
mVUaddrFix (mVU, gprT2);
ptr += gprT2;
mVUallocVIa(mVU, gprT1, _It_);
if (_X) xMOV(ptr32[ptr], gprT1);
if (_Y) xMOV(ptr32[ptr+4], gprT1);
if (_Z) xMOV(ptr32[ptr+8], gprT1);
if (_W) xMOV(ptr32[ptr+12], gprT1);
writeBackISW(mVU, ptr, gprT2);
mVU.profiler.EmitOp(opISW);
}
pass3 { mVUlog("ISW.%s vi%02d, vi%02d + %d", _XYZW_String, _Ft_, _Fs_, _Imm11_); }
@ -913,17 +930,16 @@ mVUop(mVU_ISWR) {
analyzeVIreg1(mVU, _Is_, mVUlow.VI_read[0]);
analyzeVIreg1(mVU, _It_, mVUlow.VI_read[1]); }
pass2 {
xAddressVoid ptr(mVU.regs().Mem);
void *ptr = mVU.regs().Mem;
xAddressReg is = xEmptyReg;
if (_Is_) {
mVUallocVIa(mVU, gprT2, _Is_);
mVUaddrFix (mVU, gprT2);
ptr += gprT2;
mVUaddrFix (mVU, gprT2q);
is = gprT2q;
}
mVUallocVIa(mVU, gprT1, _It_);
if (_X) xMOV(ptr32[ptr], gprT1);
if (_Y) xMOV(ptr32[ptr+4], gprT1);
if (_Z) xMOV(ptr32[ptr+8], gprT1);
if (_W) xMOV(ptr32[ptr+12], gprT1);
writeBackISW(mVU, ptr, is);
mVU.profiler.EmitOp(opISWR);
}
pass3 { mVUlog("ISWR.%s vi%02d, vi%02d", _XYZW_String, _Ft_, _Fs_); }
@ -936,16 +952,15 @@ mVUop(mVU_ISWR) {
mVUop(mVU_LQ) {
pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, false); }
pass2 {
xAddressVoid ptr(mVU.regs().Mem);
void *ptr = mVU.regs().Mem;
mVUallocVIa(mVU, gprT2, _Is_);
if (!_Is_)
xXOR(gprT2, gprT2);
xADD(gprT2, _Imm11_);
mVUaddrFix(mVU, gprT2);
ptr += gprT2;
mVUaddrFix(mVU, gprT2q);
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
mVUloadReg(Ft, ptr, _X_Y_Z_W);
mVUloadReg(Ft, xComplexAddress(gprT3q, ptr, gprT2q), _X_Y_Z_W);
mVU.regAlloc->clearNeeded(Ft);
mVU.profiler.EmitOp(opLQ);
}
@ -955,18 +970,25 @@ mVUop(mVU_LQ) {
mVUop(mVU_LQD) {
pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, true); }
pass2 {
xAddressVoid ptr(mVU.regs().Mem);
void * ptr = mVU.regs().Mem;
xAddressReg is = xEmptyReg;
if (_Is_ || isVU0) { // Access VU1 regs mem-map in !_Is_ case
mVUallocVIa(mVU, gprT2, _Is_);
xSUB(gprT2b, 1);
if (_Is_) mVUallocVIb(mVU, gprT2, _Is_);
mVUaddrFix (mVU, gprT2);
ptr += gprT2;
mVUaddrFix (mVU, gprT2q);
is = gprT2q;
}
else {
ptr = (void*)((sptr)ptr + (0xffff & (mVU.microMemSize-8)));
}
else ptr += (0xffff & (mVU.microMemSize-8));
if (!mVUlow.noWriteVF) {
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
mVUloadReg(Ft, ptr, _X_Y_Z_W);
if (is.IsEmpty()) {
mVUloadReg(Ft, xAddressVoid(ptr), _X_Y_Z_W);
} else {
mVUloadReg(Ft, xComplexAddress(gprT3q, ptr, is), _X_Y_Z_W);
}
mVU.regAlloc->clearNeeded(Ft);
}
mVU.profiler.EmitOp(opLQD);
@ -977,18 +999,23 @@ mVUop(mVU_LQD) {
mVUop(mVU_LQI) {
pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, true); }
pass2 {
xAddressVoid ptr(mVU.regs().Mem);
void *ptr = mVU.regs().Mem;
xAddressReg is = xEmptyReg;
if (_Is_) {
mVUallocVIa(mVU, gprT1, _Is_);
xMOV(gprT2, gprT1);
xADD(gprT1b, 1);
mVUallocVIb(mVU, gprT1, _Is_);
mVUaddrFix (mVU, gprT2);
ptr += gprT2;
mVUaddrFix (mVU, gprT2q);
is = gprT2q;
}
if (!mVUlow.noWriteVF) {
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
mVUloadReg(Ft, ptr, _X_Y_Z_W);
if (is.IsEmpty()) {
mVUloadReg(Ft, xAddressVoid(ptr), _X_Y_Z_W);
} else {
mVUloadReg(Ft, xComplexAddress(gprT3q, ptr, is), _X_Y_Z_W);
}
mVU.regAlloc->clearNeeded(Ft);
}
mVU.profiler.EmitOp(opLQI);
@ -1003,17 +1030,16 @@ mVUop(mVU_LQI) {
mVUop(mVU_SQ) {
pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, false); }
pass2 {
xAddressVoid ptr(mVU.regs().Mem);
void * ptr = mVU.regs().Mem;
mVUallocVIa(mVU, gprT2, _It_);
if (!_It_)
xXOR(gprT2, gprT2);
xADD(gprT2, _Imm11_);
mVUaddrFix(mVU, gprT2);
ptr += gprT2;
mVUaddrFix(mVU, gprT2q);
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
mVUsaveReg(Fs, ptr, _X_Y_Z_W, 1);
mVUsaveReg(Fs, xComplexAddress(gprT3q, ptr, gprT2q), _X_Y_Z_W, 1);
mVU.regAlloc->clearNeeded(Fs);
mVU.profiler.EmitOp(opSQ);
}
@ -1023,17 +1049,24 @@ mVUop(mVU_SQ) {
mVUop(mVU_SQD) {
pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, true); }
pass2 {
xAddressVoid ptr(mVU.regs().Mem);
void *ptr = mVU.regs().Mem;
xAddressReg it = xEmptyReg;
if (_It_ || isVU0) {// Access VU1 regs mem-map in !_It_ case
mVUallocVIa(mVU, gprT2, _It_);
xSUB(gprT2b, 1);
if (_It_) mVUallocVIb(mVU, gprT2, _It_);
mVUaddrFix (mVU, gprT2);
ptr += gprT2;
mVUaddrFix (mVU, gprT2q);
it = gprT2q;
}
else {
ptr = (void*)((sptr)ptr + (0xffff & (mVU.microMemSize-8)));
}
else ptr += (0xffff & (mVU.microMemSize-8));
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
mVUsaveReg(Fs, ptr, _X_Y_Z_W, 1);
if (it.IsEmpty()) {
mVUsaveReg(Fs, xAddressVoid(ptr), _X_Y_Z_W, 1);
} else {
mVUsaveReg(Fs, xComplexAddress(gprT3q, ptr, it), _X_Y_Z_W, 1);
}
mVU.regAlloc->clearNeeded(Fs);
mVU.profiler.EmitOp(opSQD);
}
@ -1043,17 +1076,20 @@ mVUop(mVU_SQD) {
mVUop(mVU_SQI) {
pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, true); }
pass2 {
xAddressVoid ptr(mVU.regs().Mem);
void *ptr = mVU.regs().Mem;
if (_It_) {
mVUallocVIa(mVU, gprT1, _It_);
xMOV(gprT2, gprT1);
xADD(gprT1b, 1);
mVUallocVIb(mVU, gprT1, _It_);
mVUaddrFix (mVU, gprT2);
ptr += gprT2;
mVUaddrFix (mVU, gprT2q);
}
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
mVUsaveReg(Fs, ptr, _X_Y_Z_W, 1);
if (_It_) {
mVUsaveReg(Fs, xComplexAddress(gprT3q, ptr, gprT2q), _X_Y_Z_W, 1);
} else {
mVUsaveReg(Fs, xAddressVoid(ptr), _X_Y_Z_W, 1);
}
mVU.regAlloc->clearNeeded(Fs);
mVU.profiler.EmitOp(opSQI);
}
@ -1409,7 +1445,7 @@ void normJumpPass2(mV) {
if (!mVUlow.evilBranch) { xMOV(ptr32[&mVU.branch], gprT1 ); }
else { xMOV(ptr32[&mVU.evilBranch], gprT1 ); }
//If delay slot is conditional, it uses badBranch to go to its target
if (mVUlow.badBranch) { xADD(gprT1, 8); xMOV(ptr32[&mVU.badBranch], gprT1); }
if (mVUlow.badBranch) { xADD(gprT1, 8); xMOV(ptr32[&mVU.badBranch], gprT1); }
}
}

View File

@ -373,7 +373,8 @@ static void recCTC2() {
// Executing vu0 block here fixes the intro of Ratchet and Clank
// sVU's COP2 has a comment that "Donald Duck" needs this too...
if (_Rd_) _eeMoveGPRtoM((uptr)&vu0Regs.VI[_Rd_].UL, _Rt_);
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, (uptr)CpuVU0);
xLoadFarAddr(arg1reg, CpuVU0);
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg);
break;
}
}

View File

@ -18,7 +18,7 @@
using namespace x86Emitter;
typedef xRegisterSSE xmm;
typedef xRegisterLong x32;
typedef xRegister32 x32;
struct microVU;
@ -145,14 +145,24 @@ static const char branchSTR[16][8] = {
#define gprT1 eax // eax - Temp Reg
#define gprT2 ecx // ecx - Temp Reg
#define gprT3 edx // edx - Temp Reg
#define gprT1q rax // eax - Temp Reg
#define gprT2q rcx // ecx - Temp Reg
#define gprT3q rdx // edx - Temp Reg
#define gprT1b ax // Low 16-bit of gprT1 (eax)
#define gprT2b cx // Low 16-bit of gprT2 (ecx)
#define gprT3b dx // Low 16-bit of gprT3 (edx)
#ifdef __M_X86_64
#define gprF0 ebx // Status Flag 0
#define gprF1 r12d // Status Flag 1
#define gprF2 r13d // Status Flag 2
#define gprF3 r14d // Status Flag 3
#else
#define gprF0 ebx // Status Flag 0
#define gprF1 ebp // Status Flag 1
#define gprF2 esi // Status Flag 2
#define gprF3 edi // Status Flag 3
#endif
// Function Params
#define mP microVU& mVU, int recPass

View File

@ -236,6 +236,18 @@ __fi void mVUrestoreRegs(microVU& mVU, bool fromMemory = false) {
else xMOVAPS(xmmPQ, ptr128[&mVU.xmmBackup[xmmPQ.Id][0]]);
}
class mVUScopedXMMBackup {
microVU& mVU;
bool fromMemory;
public:
mVUScopedXMMBackup(microVU& mVU, bool fromMemory): mVU(mVU), fromMemory(fromMemory) {
mVUbackupRegs(mVU, fromMemory);
}
~mVUScopedXMMBackup() {
mVUrestoreRegs(mVU, fromMemory);
}
};
_mVUt void __fc mVUprintRegs() {
microVU& mVU = mVUx;
for(int i = 0; i < 8; i++) {
@ -274,42 +286,31 @@ static void __fc mVUwaitMTVU() {
}
// Transforms the Address in gprReg to valid VU0/VU1 Address
__fi void mVUaddrFix(mV, const x32& gprReg)
__fi void mVUaddrFix(mV, const xAddressReg& gprReg)
{
if (isVU1) {
xAND(gprReg, 0x3ff); // wrap around
xSHL(gprReg, 4);
xAND(xRegister32(gprReg.Id), 0x3ff); // wrap around
xSHL(xRegister32(gprReg.Id), 4);
}
else {
xTEST(gprReg, 0x400);
xTEST(xRegister32(gprReg.Id), 0x400);
xForwardJNZ8 jmpA; // if addr & 0x4000, reads VU1's VF regs and VI regs
xAND(gprReg, 0xff); // if !(addr & 0x4000), wrap around
xAND(xRegister32(gprReg.Id), 0xff); // if !(addr & 0x4000), wrap around
xForwardJump32 jmpB;
jmpA.SetTarget();
if (THREAD_VU1) {
mVUbackupRegs(mVU, true);
xPUSH(gprT1);
xPUSH(gprT2);
xPUSH(gprT3);
// Align the stackframe (GCC only, since GCC assumes stackframe is always aligned)
#ifdef __GNUC__
xSUB(esp, 4);
#endif
if (IsDevBuild && !isCOP2) { // Lets see which games do this!
xMOV(gprT2, mVU.prog.cur->idx); // Note: Kernel does it via COP2 to initialize VU1!
xMOV(gprT3, xPC); // So we don't spam console, we'll only check micro-mode...
xCALL((void*)mVUwarningRegAccess);
{
mVUScopedXMMBackup mVUSave(mVU, true);
xScopedSavedRegisters save {gprT1q, gprT2q, gprT3q};
if (IsDevBuild && !isCOP2) { // Lets see which games do this!
xMOV(arg1regd, mVU.prog.cur->idx); // Note: Kernel does it via COP2 to initialize VU1!
xMOV(arg2regd, xPC); // So we don't spam console, we'll only check micro-mode...
xFastCall((void*)mVUwarningRegAccess, arg1regd, arg2regd);
}
xFastCall((void*)mVUwaitMTVU);
}
xCALL((void*)mVUwaitMTVU);
#ifdef __GNUC__
xADD(esp, 4);
#endif
xPOP (gprT3);
xPOP (gprT2);
xPOP (gprT1);
mVUrestoreRegs(mVU, true);
}
xAND(gprReg, 0x3f); // ToDo: theres a potential problem if VU0 overrides VU1's VF0/VI0 regs!
xAND(xRegister32(gprReg.Id), 0x3f); // ToDo: theres a potential problem if VU0 overrides VU1's VF0/VI0 regs!
xADD(gprReg, (u128*)VU1.VF - (u128*)VU0.Mem);
jmpB.SetTarget();
xSHL(gprReg, 4); // multiply by 16 (shift left by 4)
@ -568,38 +569,38 @@ void mVUcustomSearch() {
memset(mVUsearchXMM, 0xcc, __pagesize);
xSetPtr(mVUsearchXMM);
xMOVAPS (xmm0, ptr32[ecx]);
xPCMP.EQD(xmm0, ptr32[edx]);
xMOVAPS (xmm1, ptr32[ecx + 0x10]);
xPCMP.EQD(xmm1, ptr32[edx + 0x10]);
xMOVAPS (xmm0, ptr32[arg1reg]);
xPCMP.EQD(xmm0, ptr32[arg2reg]);
xMOVAPS (xmm1, ptr32[arg1reg + 0x10]);
xPCMP.EQD(xmm1, ptr32[arg2reg + 0x10]);
xPAND (xmm0, xmm1);
xMOVMSKPS(eax, xmm0);
xCMP (eax, 0xf);
xForwardJL8 exitPoint;
xMOVAPS (xmm0, ptr32[ecx + 0x20]);
xPCMP.EQD(xmm0, ptr32[edx + 0x20]);
xMOVAPS (xmm1, ptr32[ecx + 0x30]);
xPCMP.EQD(xmm1, ptr32[edx + 0x30]);
xMOVAPS (xmm0, ptr32[arg1reg + 0x20]);
xPCMP.EQD(xmm0, ptr32[arg2reg + 0x20]);
xMOVAPS (xmm1, ptr32[arg1reg + 0x30]);
xPCMP.EQD(xmm1, ptr32[arg2reg + 0x30]);
xPAND (xmm0, xmm1);
xMOVAPS (xmm2, ptr32[ecx + 0x40]);
xPCMP.EQD(xmm2, ptr32[edx + 0x40]);
xMOVAPS (xmm3, ptr32[ecx + 0x50]);
xPCMP.EQD(xmm3, ptr32[edx + 0x50]);
xMOVAPS (xmm2, ptr32[arg1reg + 0x40]);
xPCMP.EQD(xmm2, ptr32[arg2reg + 0x40]);
xMOVAPS (xmm3, ptr32[arg1reg + 0x50]);
xPCMP.EQD(xmm3, ptr32[arg2reg + 0x50]);
xPAND (xmm2, xmm3);
xMOVAPS (xmm4, ptr32[ecx + 0x60]);
xPCMP.EQD(xmm4, ptr32[edx + 0x60]);
xMOVAPS (xmm5, ptr32[ecx + 0x70]);
xPCMP.EQD(xmm5, ptr32[edx + 0x70]);
xMOVAPS (xmm4, ptr32[arg1reg + 0x60]);
xPCMP.EQD(xmm4, ptr32[arg2reg + 0x60]);
xMOVAPS (xmm5, ptr32[arg1reg + 0x70]);
xPCMP.EQD(xmm5, ptr32[arg2reg + 0x70]);
xPAND (xmm4, xmm5);
xMOVAPS (xmm6, ptr32[ecx + 0x80]);
xPCMP.EQD(xmm6, ptr32[edx + 0x80]);
xMOVAPS (xmm7, ptr32[ecx + 0x90]);
xPCMP.EQD(xmm7, ptr32[edx + 0x90]);
xMOVAPS (xmm6, ptr32[arg1reg + 0x80]);
xPCMP.EQD(xmm6, ptr32[arg2reg + 0x80]);
xMOVAPS (xmm7, ptr32[arg1reg + 0x90]);
xPCMP.EQD(xmm7, ptr32[arg2reg + 0x90]);
xPAND (xmm6, xmm7);
xPAND (xmm0, xmm2);

View File

@ -221,10 +221,10 @@ void VifUnpackSSE_Dynarec::CompileRoutine() {
while (vNum) {
ShiftDisplacementWindow( dstIndirect, ecx );
ShiftDisplacementWindow( dstIndirect, arg1reg );
if(UnpkNoOfIterations == 0)
ShiftDisplacementWindow( srcIndirect, edx ); //Don't need to do this otherwise as we arent reading the source.
ShiftDisplacementWindow( srcIndirect, arg2reg ); //Don't need to do this otherwise as we arent reading the source.
if (vCL < cycleSize) {

View File

@ -63,8 +63,8 @@ VifUnpackSSE_Base::VifUnpackSSE_Base()
, UnpkLoopIteration(0)
, UnpkNoOfIterations(0)
, IsAligned(0)
, dstIndirect(ecx) // parameter 1 of __fastcall
, srcIndirect(edx) // parameter 2 of __fastcall
, dstIndirect(arg1reg)
, srcIndirect(arg2reg)
, workReg( xmm1 )
, destReg( xmm0 )
{

View File

@ -24,23 +24,23 @@ TEST(CodegenTests, MOVTest)
{
CODEGEN_TEST_BOTH(xMOV(rax, 0), "31 c0");
CODEGEN_TEST_64(xMOV(rax, rcx), "48 89 c8");
CODEGEN_TEST_BOTH(xMOV(eaxd, ecxd), "89 c8");
CODEGEN_TEST_BOTH(xMOV(eax, ecx), "89 c8");
CODEGEN_TEST_64(xMOV(r8, 0), "45 31 c0");
CODEGEN_TEST_64(xMOV(rax, r8), "4c 89 c0");
CODEGEN_TEST_64(xMOV(r8, rax), "49 89 c0");
CODEGEN_TEST_64(xMOV(r8, r9), "4d 89 c8");
CODEGEN_TEST_64(xMOV(rax, ptrNative[rcx]), "48 8b 01");
CODEGEN_TEST_BOTH(xMOV(eaxd, ptrNative[rcx]), "8b 01");
CODEGEN_TEST_BOTH(xMOV(eax, ptrNative[rcx]), "8b 01");
CODEGEN_TEST_64(xMOV(ptrNative[rax], rcx), "48 89 08");
CODEGEN_TEST_BOTH(xMOV(ptr32[rax], ecxd), "89 08");
CODEGEN_TEST_BOTH(xMOV(ptr32[rax], ecx), "89 08");
CODEGEN_TEST_64(xMOV(rax, ptrNative[r8]), "49 8b 00");
CODEGEN_TEST_64(xMOV(ptrNative[r8], rax), "49 89 00");
CODEGEN_TEST_64(xMOV(r8, ptrNative[r9]), "4d 8b 01");
CODEGEN_TEST_64(xMOV(ptrNative[r8], r9), "4d 89 08");
CODEGEN_TEST_64(xMOV(rax, ptrNative[rbx*4+3+rcx]), "48 8b 44 99 03");
CODEGEN_TEST_64(xMOV(ptrNative[rbx*4+3+rax], rcx), "48 89 4c 98 03");
CODEGEN_TEST_BOTH(xMOV(eaxd, ptr32[rbx*4+3+rcx]), "8b 44 99 03");
CODEGEN_TEST_BOTH(xMOV(ptr32[rbx*4+3+rax], ecxd), "89 4c 98 03");
CODEGEN_TEST_BOTH(xMOV(eax, ptr32[rbx*4+3+rcx]), "8b 44 99 03");
CODEGEN_TEST_BOTH(xMOV(ptr32[rbx*4+3+rax], ecx), "89 4c 98 03");
CODEGEN_TEST_64(xMOV(r8, ptrNative[r10*4+3+r9]), "4f 8b 44 91 03");
CODEGEN_TEST_64(xMOV(ptrNative[r9*4+3+r8], r10), "4f 89 54 88 03");
CODEGEN_TEST_64(xMOV(ptrNative[r8], 0), "49 c7 00 00 00 00 00");
@ -56,14 +56,18 @@ TEST(CodegenTests, MOVTest)
TEST(CodegenTests, LEATest)
{
CODEGEN_TEST_64(xLEA(rax, ptr[rcx]), "48 89 c8"); // Converted to mov rax, rcx
CODEGEN_TEST_BOTH(xLEA(eaxd, ptr[rcx]), "89 c8"); // Converted to mov eax, ecx
CODEGEN_TEST_BOTH(xLEA(eax, ptr[rcx]), "89 c8"); // Converted to mov eax, ecx
CODEGEN_TEST_64(xLEA(rax, ptr[r8]), "4c 89 c0"); // Converted to mov rax, r8
CODEGEN_TEST_64(xLEA(r8, ptr[r9]), "4d 89 c8"); // Converted to mov r8, r9
CODEGEN_TEST_64(xLEA(rax, ptr[rbx*4+3+rcx]), "48 8d 44 99 03");
CODEGEN_TEST_BOTH(xLEA(eaxd, ptr32[rbx*4+3+rcx]), "8d 44 99 03");
CODEGEN_TEST_BOTH(xLEA(eax, ptr32[rbx*4+3+rcx]), "8d 44 99 03");
CODEGEN_TEST_64(xLEA(r8, ptr[r10*4+3+r9]), "4f 8d 44 91 03");
CODEGEN_TEST_64(xLEA(r8, ptr[base]), "4c 8d 05 f9 ff ff ff");
CODEGEN_TEST_64(xLoadFarAddr(r8, base), "4c 8d 05 f9 ff ff ff");
CODEGEN_TEST_64(xLoadFarAddr(r8, (void*)0x1234567890), "49 b8 90 78 56 34 12 00 00 00");
CODEGEN_TEST_BOTH(xLEA(rax, ptr[(void*)0x1234]), "b8 34 12 00 00"); // Converted to mov rax, 0x1234
CODEGEN_TEST_BOTH(xLoadFarAddr(rax, (void*)0x1234), "b8 34 12 00 00");
CODEGEN_TEST(xLEA_Writeback(rbx), "bb cd cd cd cd", "48 8d 1d cd cd cd 0d");
}
TEST(CodegenTests, PUSHTest)
@ -100,7 +104,7 @@ TEST(CodegenTests, POPTest)
TEST(CodegenTests, MathTest)
{
CODEGEN_TEST(xINC(eaxd), "40", "ff c0");
CODEGEN_TEST(xINC(eax), "40", "ff c0");
CODEGEN_TEST(xDEC(rax), "48", "48 ff c8");
CODEGEN_TEST_64(xINC(r8), "49 ff c0");
CODEGEN_TEST_64(xADD(r8, r9), "4d 01 c8");
@ -108,33 +112,33 @@ TEST(CodegenTests, MathTest)
CODEGEN_TEST_64(xADD(rax, 0x1234), "48 05 34 12 00 00");
CODEGEN_TEST_64(xADD(ptr32[base], -0x60), "83 05 f9 ff ff ff a0");
CODEGEN_TEST_64(xADD(ptr32[base], 0x1234), "81 05 f6 ff ff ff 34 12 00 00");
CODEGEN_TEST_BOTH(xADD(eaxd, ebxd), "01 d8");
CODEGEN_TEST_BOTH(xADD(eaxd, 0x1234), "05 34 12 00 00");
CODEGEN_TEST_BOTH(xADD(eax, ebx), "01 d8");
CODEGEN_TEST_BOTH(xADD(eax, 0x1234), "05 34 12 00 00");
CODEGEN_TEST_64(xADD(r8, ptrNative[r10*4+3+r9]), "4f 03 44 91 03");
CODEGEN_TEST_64(xADD(ptrNative[r9*4+3+r8], r10), "4f 01 54 88 03");
CODEGEN_TEST_BOTH(xADD(eaxd, ptr32[rbx*4+3+rcx]), "03 44 99 03");
CODEGEN_TEST_BOTH(xADD(ptr32[rax*4+3+rbx], ecxd), "01 4c 83 03");
CODEGEN_TEST_BOTH(xADD(eax, ptr32[rbx*4+3+rcx]), "03 44 99 03");
CODEGEN_TEST_BOTH(xADD(ptr32[rax*4+3+rbx], ecx), "01 4c 83 03");
CODEGEN_TEST_64(xSUB(r8, 0x12), "49 83 e8 12");
CODEGEN_TEST_64(xSUB(rax, 0x1234), "48 2d 34 12 00 00");
CODEGEN_TEST_BOTH(xSUB(eaxd, ptr32[rcx*4+rax]), "2b 04 88");
CODEGEN_TEST_BOTH(xSUB(eax, ptr32[rcx*4+rax]), "2b 04 88");
CODEGEN_TEST_64(xMUL(ptr32[base]), "f7 2d fa ff ff ff");
CODEGEN_TEST(xMUL(ptr32[(void*)0x1234]), "f7 2d 34 12 00 00", "f7 2c 25 34 12 00 00");
CODEGEN_TEST_BOTH(xDIV(ecxd), "f7 f9");
CODEGEN_TEST_BOTH(xDIV(ecx), "f7 f9");
}
TEST(CodegenTests, BitwiseTest)
{
CODEGEN_TEST_64(xSHR(r8, cl), "49 d3 e8");
CODEGEN_TEST_64(xSHR(rax, cl), "48 d3 e8");
CODEGEN_TEST_BOTH(xSHR(ecxd, cl), "d3 e9");
CODEGEN_TEST_BOTH(xSHR(ecx, cl), "d3 e9");
CODEGEN_TEST_64(xSAR(r8, 1), "49 d1 f8");
CODEGEN_TEST_64(xSAR(rax, 60), "48 c1 f8 3c");
CODEGEN_TEST_BOTH(xSAR(eaxd, 30), "c1 f8 1e");
CODEGEN_TEST_BOTH(xSHL(ebxd, 30), "c1 e3 1e");
CODEGEN_TEST_BOTH(xSAR(eax, 30), "c1 f8 1e");
CODEGEN_TEST_BOTH(xSHL(ebx, 30), "c1 e3 1e");
CODEGEN_TEST_64(xSHL(ptr32[base], 4), "c1 25 f9 ff ff ff 04");
CODEGEN_TEST_64(xAND(r8, r9), "4d 21 c8");
CODEGEN_TEST_64(xXOR(rax, ptrNative[r10]), "49 33 02");
CODEGEN_TEST_BOTH(xOR(esid, ptr32[rax+rbx]), "0b 34 18");
CODEGEN_TEST_BOTH(xOR(esi, ptr32[rax+rbx]), "0b 34 18");
CODEGEN_TEST_64(xNOT(r8), "49 f7 d0");
CODEGEN_TEST_64(xNOT(ptrNative[rax]), "48 f7 10");
CODEGEN_TEST_BOTH(xNOT(ptr32[rbx]), "f7 13");