// Copyright (C) 2003-2008 Dolphin Project. // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, version 2.0. // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License 2.0 for more details. // A copy of the GPL 2.0 should have been included with the program. // If not, see http://www.gnu.org/licenses/ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ #include "Common.h" #include "x64Emitter.h" #include "ABI.h" #include "CPUDetect.h" namespace Gen { static u8 *code; static bool enableBranchHints = false; void SetCodePtr(u8 *ptr) { code = ptr; } const u8 *GetCodePtr() { return code; } u8 *GetWritableCodePtr() { return code; } void ReserveCodeSpace(int bytes) { for (int i = 0; i < bytes; i++) *code++ = 0xCC; } const u8 *AlignCode4() { int c = int((u64)code & 3); if (c) ReserveCodeSpace(4-c); return code; } const u8 *AlignCode16() { int c = int((u64)code & 15); if (c) ReserveCodeSpace(16-c); return code; } const u8 *AlignCodePage() { int c = int((u64)code & 4095); if (c) ReserveCodeSpace(4096-c); return code; } inline void Write8(u8 value) { *code++ = value; } inline void Write16(u16 value) { *(u16*)code = value; code += 2; } inline void Write32(u32 value) { *(u32*)code = value; code += 4; } inline void Write64(u64 value) { *(u64*)code = value; code += 8; } void WriteModRM( int mod, int rm, int reg ) { Write8((u8)((mod << 6) | ((rm & 7) << 3) | (reg & 7))); } void WriteSIB(int scale, int index, int base) { Write8((u8)((scale << 6) | ((index & 7) << 3) | (base & 7))); } void OpArg::WriteRex(bool op64, int customOp) const { #ifdef _M_X64 u8 op = 0x40; if (customOp == -1) customOp = operandReg; if (op64) op |= 8; if (customOp >> 3) op |= 4; if (indexReg >> 3) op |= 2; if (offsetOrBaseReg >> 3) op |= 1; //TODO investigate if this is dangerous if (op != 0x40) Write8(op); #else _dbg_assert_(DYNA_REC, (operandReg >> 3) == 0); _dbg_assert_(DYNA_REC, (indexReg >> 3) == 0); _dbg_assert_(DYNA_REC, (offsetOrBaseReg >> 3) == 0); #endif } void OpArg::WriteRest(int extraBytes, X64Reg _operandReg) const { if (_operandReg == 0xff) _operandReg = (X64Reg)this->operandReg; int mod = 0; int ireg = indexReg; bool SIB = false; int _offsetOrBaseReg = this->offsetOrBaseReg; if (scale == SCALE_RIP) //Also, on 32-bit, just an immediate address { // Oh, RIP addressing. _offsetOrBaseReg = 5; WriteModRM(0, _operandReg&7, 5); //TODO : add some checks #ifdef _M_X64 u64 ripAddr = (u64)code + 4 + extraBytes; s32 offs = (s32)((s64)offset - (s64)ripAddr); Write32((u32)offs); #else Write32((u32)offset); #endif return; } if (scale == 0) { // Oh, no memory, Just a reg. mod = 3; //11 } else if (scale >= 1) { //Ah good, no scaling. if (scale == SCALE_ATREG && !((_offsetOrBaseReg&7) == 4 || (_offsetOrBaseReg&7) == 5)) { //Okay, we're good. No SIB necessary. int ioff = (int)offset; if (ioff == 0) { mod = 0; } else if (ioff<-128 || ioff>127) { mod = 2; //32-bit displacement } else { mod = 1; //8-bit displacement } } else //if (scale != SCALE_ATREG) { if ((_offsetOrBaseReg & 7) == 4) //this would occupy the SIB encoding :( { //So we have to fake it with SIB encoding :( SIB = true; } if (scale >= SCALE_1 && scale < SCALE_ATREG) { SIB = true; } if (scale == SCALE_ATREG && _offsetOrBaseReg == 4) { SIB = true; ireg = 4; } //Okay, we're fine. Just disp encoding. //We need displacement. Which size? int ioff = (int)(s64)offset; if (ioff < -128 || ioff > 127) { mod = 2; //32-bit displacement } else { mod = 1; //8-bit displacement } } } // Okay. Time to do the actual writing // ModRM byte: int oreg = _offsetOrBaseReg; if (SIB) oreg = 4; // TODO(ector): WTF is this if about? I don't remember writing it :-) //if (RIP) // oreg = 5; WriteModRM(mod, _operandReg&7, oreg&7); if (SIB) { //SIB byte int ss; switch (scale) { case 0: _offsetOrBaseReg = 4; ss = 0; break; //RSP case 1: ss = 0; break; case 2: ss = 1; break; case 4: ss = 2; break; case 8: ss = 3; break; case SCALE_ATREG: ss = 0; break; default: _assert_msg_(DYNA_REC, 0, "Invalid scale for SIB byte"); ss = 0; break; } Write8((u8)((ss << 6) | ((ireg&7)<<3) | (_offsetOrBaseReg&7))); } if (mod == 1) //8-bit disp { Write8((u8)(s8)(s32)offset); } else if (mod == 2) //32-bit disp { Write32((u32)offset); } } // W = operand extended width (1 if 64-bit) // R = register# upper bit // X = scale amnt upper bit // B = base register# upper bit void Rex(int w, int r, int x, int b) { w = w ? 1 : 0; r = r ? 1 : 0; x = x ? 1 : 0; b = b ? 1 : 0; u8 rx = (u8)(0x40 | (w << 3) | (r << 2) | (x << 1) | (b)); if (rx != 0x40) Write8(rx); } void JMP(const u8 *addr, bool force5Bytes) { u64 fn = (u64)addr; if (!force5Bytes) { s32 distance = (s32)(fn - ((u64)code + 2)); //TODO - sanity check //8 bits will do Write8(0xEB); Write8((u8)(s8)distance); } else { s32 distance = (s32)(fn - ((u64)code + 5)); //TODO - sanity check Write8(0xE9); Write32((u32)(s32)distance); } } void JMPptr(const OpArg &arg2) { OpArg arg = arg2; if (arg.IsImm()) _assert_msg_(DYNA_REC, 0, "JMPptr - Imm argument"); arg.operandReg = 4; arg.WriteRex(false); Write8(0xFF); arg.WriteRest(); } //Can be used to trap other processors, before overwriting their code // not used in dolphin void JMPself() { Write8(0xEB); Write8(0xFE); } void CALLptr(OpArg arg) { if (arg.IsImm()) _assert_msg_(DYNA_REC, 0, "CALLptr - Imm argument"); arg.operandReg = 2; arg.WriteRex(false); Write8(0xFF); arg.WriteRest(); } void CALL(void *fnptr) { u64 distance = u64(fnptr) - (u64(code) + 5); if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { PanicAlert("CALL out of range (%p calls %p)", code, fnptr); } Write8(0xE8); Write32(u32(distance)); } FixupBranch J(bool force5bytes) { FixupBranch branch; branch.type = force5bytes ? 1 : 0; branch.ptr = code + (force5bytes ? 5 : 2); if (!force5bytes) { //8 bits will do Write8(0xEB); Write8(0); } else { Write8(0xE9); Write32(0); } return branch; } // These are to be used with Jcc only. // Found in intel manual 2A // These do not really make a difference for any current X86 CPU, // but are provided here for future use void HINT_NOT_TAKEN() { if (enableBranchHints) Write8(0x2E); } void HINT_TAKEN() { if (enableBranchHints) Write8(0x3E); } FixupBranch J_CC(CCFlags conditionCode, bool force5bytes) { FixupBranch branch; branch.type = force5bytes ? 1 : 0; branch.ptr = code + (force5bytes ? 5 : 2); if (!force5bytes) { //8 bits will do Write8(0x70 + conditionCode); Write8(0); } else { Write8(0x0F); Write8(0x80 + conditionCode); Write32(0); } return branch; } void J_CC(CCFlags conditionCode, const u8 * addr, bool force5Bytes) { u64 fn = (u64)addr; if (!force5Bytes) { s32 distance = (s32)(fn - ((u64)code + 2)); //TODO - sanity check //8 bits will do Write8(0x70 + conditionCode); Write8((u8)(s8)distance); } else { s32 distance = (s32)(fn - ((u64)code + 6)); //TODO - sanity check Write8(0x0F); Write8(0x80 + conditionCode); Write32((u32)(s32)distance); } } void SetJumpTarget(const FixupBranch &branch) { if (branch.type == 0) { branch.ptr[-1] = (u8)(s8)(code - branch.ptr); } else if (branch.type == 1) { ((s32*)branch.ptr)[-1] = (s32)(code - branch.ptr); } } /* void INC(int bits, OpArg arg) { if (arg.IsImm()) _assert_msg_(DYNA_REC, 0, "INC - Imm argument"); arg.operandReg = 0; if (bits == 16) {Write8(0x66);} arg.WriteRex(bits == 64); Write8(bits == 8 ? 0xFE : 0xFF); arg.WriteRest(); } void DEC(int bits, OpArg arg) { if (arg.IsImm()) _assert_msg_(DYNA_REC, 0, "DEC - Imm argument"); arg.operandReg = 1; if (bits == 16) {Write8(0x66);} arg.WriteRex(bits == 64); Write8(bits == 8 ? 0xFE : 0xFF); arg.WriteRest(); } */ //Single byte opcodes //There is no PUSHAD/POPAD in 64-bit mode. void INT3() {Write8(0xCC);} void RET() {Write8(0xC3);} void RET_FAST() {Write8(0xF3); Write8(0xC3);} //two-byte return (rep ret) - recommended by AMD optimization manual for the case of jumping to a ret void NOP(int count) { // TODO: look up the fastest nop sleds for various sizes int i; switch (count) { case 1: Write8(0x90); break; case 2: Write8(0x66); Write8(0x90); break; default: for (i = 0; i < count; i++) { Write8(0x90); } break; } } void PAUSE() {Write8(0xF3); NOP();} //use in tight spinloops for energy saving on some cpu void CLC() {Write8(0xF8);} //clear carry void CMC() {Write8(0xF5);} //flip carry void STC() {Write8(0xF9);} //set carry //TODO: xchg ah, al ??? void XCHG_AHAL() { Write8(0x86); Write8(0xe0); // alt. 86 c4 } //These two can not be executed on early Intel 64-bit CPU:s, only on AMD! void LAHF() {Write8(0x9F);} void SAHF() {Write8(0x9E);} void PUSHF() {Write8(0x9C);} void POPF() {Write8(0x9D);} void LFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xE8);} void MFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xF0);} void SFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xF8);} void WriteSimple1Byte(int bits, u8 byte, X64Reg reg) { if (bits == 16) {Write8(0x66);} Rex(bits == 64, 0, 0, (int)reg >> 3); Write8(byte + ((int)reg & 7)); } void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg) { if (bits == 16) {Write8(0x66);} Rex(bits==64, 0, 0, (int)reg >> 3); Write8(byte1); Write8(byte2 + ((int)reg & 7)); } void CWD(int bits) { if (bits == 16) {Write8(0x66);} Rex(bits == 64, 0, 0, 0); Write8(0x99); } void CBW(int bits) { if (bits == 8) {Write8(0x66);} Rex(bits == 32, 0, 0, 0); Write8(0x98); } //Simple opcodes //push/pop do not need wide to be 64-bit void PUSH(X64Reg reg) {WriteSimple1Byte(32, 0x50, reg);} void POP(X64Reg reg) {WriteSimple1Byte(32, 0x58, reg);} void PUSH(int bits, const OpArg ®) { if (reg.IsSimpleReg()) PUSH(reg.GetSimpleReg()); else if (reg.IsImm()) { switch (reg.GetImmBits()) { case 8: Write8(0x6A); Write8((u8)(s8)reg.offset); break; case 16: Write8(0x66); Write8(0x68); Write16((u16)(s16)(s32)reg.offset); break; case 32: Write8(0x68); Write32((u32)reg.offset); break; default: _assert_msg_(DYNA_REC, 0, "PUSH - Bad imm bits"); break; } } else { //INT3(); if (bits == 16) Write8(0x66); reg.WriteRex(bits == 64); Write8(0xFF); reg.WriteRest(0,(X64Reg)6); } } void POP(int /*bits*/, const OpArg ®) { if (reg.IsSimpleReg()) POP(reg.GetSimpleReg()); else INT3(); } void BSWAP(int bits, X64Reg reg) { if (bits >= 32) { WriteSimple2Byte(bits, 0x0F, 0xC8, reg); } else if (bits == 16) { //fake 16-bit bswap, TODO replace with xchg ah, al where appropriate WriteSimple2Byte(false, 0x0F, 0xC8, reg); SHR(32, R(reg), Imm8(16)); } else if (bits == 8) { } else { _assert_msg_(DYNA_REC, 0, "BSWAP - Wrong number of bits"); } } // Undefined opcode - reserved // If we ever need a way to always cause a non-breakpoint hard exception... void UD2() { Write8(0x0F); Write8(0x0B); } void PREFETCH(PrefetchLevel level, OpArg arg) { if (arg.IsImm()) _assert_msg_(DYNA_REC, 0, "PREFETCH - Imm argument");; arg.operandReg = (u8)level; arg.WriteRex(false); Write8(0x0F); Write8(0x18); arg.WriteRest(); } void SETcc(CCFlags flag, OpArg dest) { if (dest.IsImm()) _assert_msg_(DYNA_REC, 0, "SETcc - Imm argument"); dest.operandReg = 0; dest.WriteRex(false); Write8(0x0F); Write8(0x90 + (u8)flag); dest.WriteRest(); } void CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag) { if (src.IsImm()) _assert_msg_(DYNA_REC, 0, "CMOVcc - Imm argument"); src.operandReg = dest; src.WriteRex(bits == 64); Write8(0x0F); Write8(0x40 + (u8)flag); src.WriteRest(); } void WriteMulDivType(int bits, OpArg src, int ext) { if (src.IsImm()) _assert_msg_(DYNA_REC, 0, "WriteMulDivType - Imm argument"); src.operandReg = ext; if (bits == 16) Write8(0x66); src.WriteRex(bits == 64); if (bits == 8) { Write8(0xF6); } else { Write8(0xF7); } src.WriteRest(); } void MUL(int bits, OpArg src) {WriteMulDivType(bits, src, 4);} void DIV(int bits, OpArg src) {WriteMulDivType(bits, src, 6);} void IMUL(int bits, OpArg src) {WriteMulDivType(bits, src, 5);} void IDIV(int bits, OpArg src) {WriteMulDivType(bits, src, 7);} void NEG(int bits, OpArg src) {WriteMulDivType(bits, src, 3);} void NOT(int bits, OpArg src) {WriteMulDivType(bits, src, 2);} void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2) { if (src.IsImm()) _assert_msg_(DYNA_REC, 0, "WriteBitSearchType - Imm argument"); src.operandReg = (u8)dest; if (bits == 16) Write8(0x66); src.WriteRex(bits == 64); Write8(0x0F); Write8(byte2); src.WriteRest(); } void MOVNTI(int bits, OpArg dest, X64Reg src) { if (bits <= 16) _assert_msg_(DYNA_REC, 0, "MOVNTI - bits<=16"); WriteBitSearchType(bits, src, dest, 0xC3); } void BSF(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBC);} //bottom bit to top bit void BSR(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBD);} //top bit to bottom bit void MOVSX(int dbits, int sbits, X64Reg dest, OpArg src) { if (src.IsImm()) _assert_msg_(DYNA_REC, 0, "MOVSX - Imm argument"); if (dbits == sbits) { MOV(dbits, R(dest), src); return; } src.operandReg = (u8)dest; if (dbits == 16) Write8(0x66); src.WriteRex(dbits == 64); if (sbits == 8) { Write8(0x0F); Write8(0xBE); } else if (sbits == 16) { Write8(0x0F); Write8(0xBF); } else if (sbits == 32 && dbits == 64) { Write8(0x63); } else { Crash(); } src.WriteRest(); } void MOVZX(int dbits, int sbits, X64Reg dest, OpArg src) { if (src.IsImm()) _assert_msg_(DYNA_REC, 0, "MOVZX - Imm argument"); if (dbits == sbits) { MOV(dbits, R(dest), src); return; } src.operandReg = (u8)dest; if (dbits == 16) Write8(0x66); src.WriteRex(dbits == 64); if (sbits == 8) { Write8(0x0F); Write8(0xB6); } else if (sbits == 16) { Write8(0x0F); Write8(0xB7); } else { Crash(); } src.WriteRest(); } void LEA(int bits, X64Reg dest, OpArg src) { if (src.IsImm()) _assert_msg_(DYNA_REC, 0, "LEA - Imm argument"); src.operandReg = (u8)dest; if (bits == 16) Write8(0x66); //TODO: performance warning src.WriteRex(bits == 64); Write8(0x8D); src.WriteRest(); } //shift can be either imm8 or cl void WriteShift(int bits, OpArg dest, OpArg &shift, int ext) { bool writeImm = false; if (dest.IsImm()) { _assert_msg_(DYNA_REC, 0, "WriteShift - can't shift imms"); } if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || (shift.IsImm() && shift.GetImmBits() != 8)) { _assert_msg_(DYNA_REC, 0, "WriteShift - illegal argument"); } dest.operandReg = ext; if (bits == 16) Write8(0x66); dest.WriteRex(bits == 64); if (shift.GetImmBits() == 8) { //ok an imm u8 imm = (u8)shift.offset; if (imm == 1) { Write8(bits == 8 ? 0xD0 : 0xD1); } else { writeImm = true; Write8(bits == 8 ? 0xC0 : 0xC1); } } else { Write8(bits == 8 ? 0xD2 : 0xD3); } dest.WriteRest(writeImm ? 1 : 0); if (writeImm) Write8((u8)shift.offset); } // large rotates and shift are slower on intel than amd // intel likes to rotate by 1, and the op is smaller too void ROL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 0);} void ROR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 1);} void RCL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 2);} void RCR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 3);} void SHL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 4);} void SHR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 5);} void SAR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 7);} void OpArg::WriteSingleByteOp(u8 op, X64Reg _operandReg, int bits) { if (bits == 16) Write8(0x66); this->operandReg = (u8)_operandReg; WriteRex(bits == 64); Write8(op); WriteRest(); } //todo : add eax special casing struct NormalOpDef { u8 toRm8, toRm32, fromRm8, fromRm32, imm8, imm32, simm8, ext; }; const NormalOpDef nops[11] = { {0x00, 0x01, 0x02, 0x03, 0x80, 0x81, 0x83, 0}, //ADD {0x10, 0x11, 0x12, 0x13, 0x80, 0x81, 0x83, 2}, //ADC {0x28, 0x29, 0x2A, 0x2B, 0x80, 0x81, 0x83, 5}, //SUB {0x18, 0x19, 0x1A, 0x1B, 0x80, 0x81, 0x83, 3}, //SBB {0x20, 0x21, 0x22, 0x23, 0x80, 0x81, 0x83, 4}, //AND {0x08, 0x09, 0x0A, 0x0B, 0x80, 0x81, 0x83, 1}, //OR {0x30, 0x31, 0x32, 0x33, 0x80, 0x81, 0x83, 6}, //XOR {0x88, 0x89, 0x8A, 0x8B, 0xC6, 0xC7, 0xCC, 0}, //MOV {0x84, 0x85, 0x84, 0x85, 0xF6, 0xF7, 0xCC, 0}, //TEST (to == from) {0x38, 0x39, 0x3A, 0x3B, 0x80, 0x81, 0x83, 7}, //CMP {0x86, 0x87, 0x86, 0x87, 0xCC, 0xCC, 0xCC, 7}, //XCHG }; //operand can either be immediate or register void OpArg::WriteNormalOp(bool toRM, NormalOp op, const OpArg &operand, int bits) const { X64Reg _operandReg = (X64Reg)this->operandReg; if (IsImm()) { _assert_msg_(DYNA_REC, 0, "WriteNormalOp - Imm argument, wrong order"); } if (bits == 16) Write8(0x66); int immToWrite = 0; if (operand.IsImm()) { _operandReg = (X64Reg)0; WriteRex(bits == 64); if (!toRM) { _assert_msg_(DYNA_REC, 0, "WriteNormalOp - Writing to Imm (!toRM)"); } if (operand.scale == SCALE_IMM8 && bits == 8) { Write8(nops[op].imm8); _assert_msg_(DYNA_REC, code[-1] != 0xCC, "ARGH1"); immToWrite = 8; } else if ((operand.scale == SCALE_IMM16 && bits == 16) || (operand.scale == SCALE_IMM32 && bits == 32) || (operand.scale == SCALE_IMM32 && bits == 64)) { Write8(nops[op].imm32); _assert_msg_(DYNA_REC, code[-1] != 0xCC, "ARGH2"); immToWrite = 32; } else if ((operand.scale == SCALE_IMM8 && bits == 16) || (operand.scale == SCALE_IMM8 && bits == 32) || (operand.scale == SCALE_IMM8 && bits == 64)) { Write8(nops[op].simm8); _assert_msg_(DYNA_REC, code[-1] != 0xCC, "ARGH3"); immToWrite = 8; } else if (operand.scale == SCALE_IMM64 && bits == 64) { if (op == nrmMOV) { Write8(0xB8 + (offsetOrBaseReg & 7)); Write64((u64)operand.offset); return; } _assert_msg_(DYNA_REC, 0, "WriteNormalOp - Only MOV can take 64-bit imm"); } else { _assert_msg_(DYNA_REC, 0, "WriteNormalOp - Unhandled case"); } _operandReg = (X64Reg)nops[op].ext; //pass extension in REG of ModRM } else { _operandReg = (X64Reg)operand.offsetOrBaseReg; WriteRex(bits == 64, _operandReg); // mem/reg or reg/reg op if (toRM) { Write8(bits == 8 ? nops[op].toRm8 : nops[op].toRm32); _assert_msg_(DYNA_REC, code[-1] != 0xCC, "ARGH4"); } else { Write8(bits == 8 ? nops[op].fromRm8 : nops[op].fromRm32); _assert_msg_(DYNA_REC, code[-1] != 0xCC, "ARGH5"); } } WriteRest(immToWrite>>3, _operandReg); switch (immToWrite) { case 0: break; case 8: Write8((u8)operand.offset); break; case 32: Write32((u32)operand.offset); break; default: _assert_msg_(DYNA_REC, 0, "WriteNormalOp - Unhandled case"); } } void WriteNormalOp(int bits, NormalOp op, const OpArg &a1, const OpArg &a2) { if (a1.IsImm()) { //Booh! Can't write to an imm _assert_msg_(DYNA_REC, 0, "WriteNormalOp - a1 cannot be imm"); return; } if (a2.IsImm()) { a1.WriteNormalOp(true, op, a2, bits); } else { if (a1.IsSimpleReg()) { a2.WriteNormalOp(false, op, a1, bits); } else { a1.WriteNormalOp(true, op, a2, bits); } } } void ADD (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(bits, nrmADD, a1, a2);} void ADC (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(bits, nrmADC, a1, a2);} void SUB (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(bits, nrmSUB, a1, a2);} void SBB (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(bits, nrmSBB, a1, a2);} void AND (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(bits, nrmAND, a1, a2);} void OR (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(bits, nrmOR , a1, a2);} void XOR (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(bits, nrmXOR, a1, a2);} void MOV (int bits, const OpArg &a1, const OpArg &a2) { _assert_msg_(DYNA_REC, !a1.IsSimpleReg() || !a2.IsSimpleReg() || a1.GetSimpleReg() != a2.GetSimpleReg(), "Redundant MOV @ %p", code); WriteNormalOp(bits, nrmMOV, a1, a2); } void TEST(int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(bits, nrmTEST, a1, a2);} void CMP (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(bits, nrmCMP, a1, a2);} void XCHG(int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(bits, nrmXCHG, a1, a2);} void IMUL(int bits, X64Reg regOp, OpArg a1, OpArg a2) { if (bits == 8) { _assert_msg_(DYNA_REC, 0, "IMUL - illegal bit size!"); return; } if (a1.IsImm()) { _assert_msg_(DYNA_REC, 0, "IMUL - second arg cannot be imm!"); return; } if (!a2.IsImm()) { _assert_msg_(DYNA_REC, 0, "IMUL - third arg must be imm!"); return; } if (bits == 16) Write8(0x66); a1.WriteRex(bits == 64, regOp); if (a2.GetImmBits() == 8) { Write8(0x6B); a1.WriteRest(1, regOp); Write8((u8)a2.offset); } else { Write8(0x69); if (a2.GetImmBits() == 16 && bits == 16) { a1.WriteRest(2, regOp); Write16((u16)a2.offset); } else if (a2.GetImmBits() == 32 && (bits == 32 || bits == 64)) { a1.WriteRest(4, regOp); Write32((u32)a2.offset); } else { _assert_msg_(DYNA_REC, 0, "IMUL - unhandled case!"); } } } void IMUL(int bits, X64Reg regOp, OpArg a) { if (bits == 8) { _assert_msg_(DYNA_REC, 0, "IMUL - illegal bit size!"); return; } if (a.IsImm()) { IMUL(bits, regOp, R(regOp), a) ; return ; } if (bits == 16) Write8(0x66); a.WriteRex(bits == 64, regOp); Write8(0x0F); Write8(0xAF); a.WriteRest(0, regOp); } void WriteSSEOp(int size, u8 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes = 0) { if (size == 64 && packed) Write8(0x66); //this time, override goes upwards if (!packed) Write8(size == 64 ? 0xF2 : 0xF3); arg.operandReg = regOp; arg.WriteRex(false); Write8(0x0F); Write8(sseOp); arg.WriteRest(extrabytes); } void MOVD_xmm(X64Reg dest, const OpArg &arg) {WriteSSEOp(64, 0x6E, true, dest, arg, 0);} void MOVQ_xmm(X64Reg dest, OpArg arg) { #ifdef _M_X64 // Alternate encoding // This does not display correctly in MSVC's debugger, it thinks it's a MOVD arg.operandReg = dest; Write8(0x66); arg.WriteRex(true); Write8(0x0f); Write8(0x6E); arg.WriteRest(0); #else arg.operandReg = dest; Write8(0xF3); Write8(0x0f); Write8(0x7E); arg.WriteRest(0); #endif } void MOVD_xmm(const OpArg &arg, X64Reg src) {WriteSSEOp(64, 0x7E, true, src, arg, 0);} void MOVQ_xmm(OpArg arg, X64Reg src) { if (src > 7) { // Alternate encoding // This does not display correctly in MSVC's debugger, it thinks it's a MOVD arg.operandReg = src; Write8(0x66); arg.WriteRex(true); Write8(0x0f); Write8(0x7E); arg.WriteRest(0); } else { // INT3(); arg.operandReg = src; arg.WriteRex(false); Write8(0x66); Write8(0x0f); Write8(0xD6); arg.WriteRest(0); } } void WriteMXCSR(OpArg arg, int ext) { if (arg.IsImm() || arg.IsSimpleReg()) _assert_msg_(DYNA_REC, 0, "MXCSR - invalid operand"); arg.operandReg = ext; arg.WriteRex(false); Write8(0x0F); Write8(0xAE); arg.WriteRest(); } enum NormalSSEOps { sseCMP = 0xC2, sseADD = 0x58, //ADD sseSUB = 0x5C, //SUB sseAND = 0x54, //AND sseANDN = 0x55, //ANDN sseOR = 0x56, sseXOR = 0x57, sseMUL = 0x59, //MUL, sseDIV = 0x5E, //DIV sseMIN = 0x5D, //MIN sseMAX = 0x5F, //MAX sseCOMIS = 0x2F, //COMIS sseUCOMIS = 0x2E, //UCOMIS sseSQRT = 0x51, //SQRT sseRSQRT = 0x52, //RSQRT (NO DOUBLE PRECISION!!!) sseMOVAPfromRM = 0x28, //MOVAP from RM sseMOVAPtoRM = 0x29, //MOVAP to RM sseMOVUPfromRM = 0x10, //MOVUP from RM sseMOVUPtoRM = 0x11, //MOVUP to RM sseMASKMOVDQU = 0xF7, sseLDDQU = 0xF0, sseSHUF = 0xC6, sseMOVNTDQ = 0xE7, sseMOVNTP = 0x2B, }; void STMXCSR(OpArg memloc) {WriteMXCSR(memloc, 3);} void LDMXCSR(OpArg memloc) {WriteMXCSR(memloc, 2);} void MOVNTDQ(OpArg arg, X64Reg regOp) {WriteSSEOp(64, sseMOVNTDQ, true, regOp, arg);} void MOVNTPS(OpArg arg, X64Reg regOp) {WriteSSEOp(32, sseMOVNTP, true, regOp, arg);} void MOVNTPD(OpArg arg, X64Reg regOp) {WriteSSEOp(64, sseMOVNTP, true, regOp, arg);} void ADDSS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseADD, false, regOp, arg);} void ADDSD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseADD, false, regOp, arg);} void SUBSS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseSUB, false, regOp, arg);} void SUBSD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseSUB, false, regOp, arg);} void CMPSS(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(32, sseCMP, false, regOp, arg,1); Write8(compare);} void CMPSD(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(64, sseCMP, false, regOp, arg,1); Write8(compare);} void MULSS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseMUL, false, regOp, arg);} void MULSD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseMUL, false, regOp, arg);} void DIVSS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseDIV, false, regOp, arg);} void DIVSD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseDIV, false, regOp, arg);} void MINSS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseMIN, false, regOp, arg);} void MINSD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseMIN, false, regOp, arg);} void MAXSS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseMAX, false, regOp, arg);} void MAXSD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseMAX, false, regOp, arg);} void SQRTSS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseSQRT, false, regOp, arg);} void SQRTSD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseSQRT, false, regOp, arg);} void RSQRTSS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseRSQRT, false, regOp, arg);} void RSQRTSD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseRSQRT, false, regOp, arg);} void ADDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseADD, true, regOp, arg);} void ADDPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseADD, true, regOp, arg);} void SUBPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseSUB, true, regOp, arg);} void SUBPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseSUB, true, regOp, arg);} void CMPPS(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(32, sseCMP, true, regOp, arg,1); Write8(compare);} void CMPPD(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(64, sseCMP, true, regOp, arg,1); Write8(compare);} void ANDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseAND, true, regOp, arg);} void ANDPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseAND, true, regOp, arg);} void ANDNPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseANDN, true, regOp, arg);} void ANDNPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseANDN, true, regOp, arg);} void ORPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseOR, true, regOp, arg);} void ORPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseOR, true, regOp, arg);} void XORPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseXOR, true, regOp, arg);} void XORPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseXOR, true, regOp, arg);} void MULPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseMUL, true, regOp, arg);} void MULPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseMUL, true, regOp, arg);} void DIVPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseDIV, true, regOp, arg);} void DIVPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseDIV, true, regOp, arg);} void MINPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseMIN, true, regOp, arg);} void MINPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseMIN, true, regOp, arg);} void MAXPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseMAX, true, regOp, arg);} void MAXPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseMAX, true, regOp, arg);} void SQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseSQRT, true, regOp, arg);} void SQRTPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseSQRT, true, regOp, arg);} void RSQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseRSQRT, true, regOp, arg);} void RSQRTPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseRSQRT, true, regOp, arg);} void SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(32, sseSHUF, true, regOp, arg,1); Write8(shuffle);} void SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(64, sseSHUF, true, regOp, arg,1); Write8(shuffle);} void COMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseCOMIS, true, regOp, arg);} //weird that these should be packed void COMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseCOMIS, true, regOp, arg);} //ordered void UCOMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseUCOMIS, true, regOp, arg);} //unordered void UCOMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseUCOMIS, true, regOp, arg);} void MOVAPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseMOVAPfromRM, true, regOp, arg);} void MOVAPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseMOVAPfromRM, true, regOp, arg);} void MOVAPS(OpArg arg, X64Reg regOp) {WriteSSEOp(32, sseMOVAPtoRM, true, regOp, arg);} void MOVAPD(OpArg arg, X64Reg regOp) {WriteSSEOp(64, sseMOVAPtoRM, true, regOp, arg);} void MOVUPS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseMOVUPfromRM, true, regOp, arg);} void MOVUPD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseMOVUPfromRM, true, regOp, arg);} void MOVUPS(OpArg arg, X64Reg regOp) {WriteSSEOp(32, sseMOVUPtoRM, true, regOp, arg);} void MOVUPD(OpArg arg, X64Reg regOp) {WriteSSEOp(64, sseMOVUPtoRM, true, regOp, arg);} void MOVSS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, sseMOVUPfromRM, false, regOp, arg);} void MOVSD(X64Reg regOp, OpArg arg) {WriteSSEOp(64, sseMOVUPfromRM, false, regOp, arg);} void MOVSS(OpArg arg, X64Reg regOp) {WriteSSEOp(32, sseMOVUPtoRM, false, regOp, arg);} void MOVSD(OpArg arg, X64Reg regOp) {WriteSSEOp(64, sseMOVUPtoRM, false, regOp, arg);} void CVTPS2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(32, 0x5A, true, regOp, arg);} void CVTPD2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(64, 0x5A, true, regOp, arg);} void CVTSD2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(64, 0x5A, false, regOp, arg);} void CVTSS2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(32, 0x5A, false, regOp, arg);} void CVTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(32, 0xF2, false, regOp, arg);} void CVTDQ2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(32, 0xE6, false, regOp, arg);} void CVTDQ2PS(X64Reg regOp, const OpArg &arg) {WriteSSEOp(32, 0x5B, true, regOp, arg);} void CVTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(64, 0xE6, false, regOp, arg);} void MASKMOVDQU(X64Reg dest, X64Reg src) {WriteSSEOp(64, sseMASKMOVDQU, true, dest, R(src));} void MOVMSKPS(X64Reg dest, OpArg arg) {WriteSSEOp(32, 0x50, true, dest, arg);} void MOVMSKPD(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x50, true, dest, arg);} void LDDQU(X64Reg dest, OpArg arg) {WriteSSEOp(64, sseLDDQU, false, dest, arg);} // For integer data only void UNPCKLPD(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x14, true, dest, arg);} void UNPCKHPD(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x15, true, dest, arg);} void MOVDDUP(X64Reg regOp, OpArg arg) { if (cpu_info.bSSE3) { WriteSSEOp(64, 0x12, false, regOp, arg); //SSE3 movddup } else { // Simulate this instruction with SSE2 instructions if (!arg.IsSimpleReg(regOp)) MOVQ_xmm(regOp, arg); UNPCKLPD(regOp, R(regOp)); } } //There are a few more left // Also some integer instrucitons are missing void PACKSSDW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x6B, true, dest, arg);} void PACKSSWB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x63, true, dest, arg);} //void PACKUSDW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x66, true, dest, arg);} // WRONG void PACKUSWB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x67, true, dest, arg);} void PUNPCKLBW(X64Reg dest, const OpArg &arg) {WriteSSEOp(64, 0x60, true, dest, arg);} void PUNPCKLWD(X64Reg dest, const OpArg &arg) {WriteSSEOp(64, 0x61, true, dest, arg);} void PUNPCKLDQ(X64Reg dest, const OpArg &arg) {WriteSSEOp(64, 0x62, true, dest, arg);} //void PUNPCKLQDQ(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x60, true, dest, arg);} // WARNING not REX compatible void PSRAW(X64Reg reg, int shift) { if (reg > 7) PanicAlert("The PSRAW-emitter does not support regs above 7"); Write8(0x66); Write8(0x0f); Write8(0x71); Write8(0xE0 | reg); Write8(shift); } // WARNING not REX compatible void PSRAD(X64Reg reg, int shift) { if (reg > 7) PanicAlert("The PSRAD-emitter does not support regs above 7"); Write8(0x66); Write8(0x0f); Write8(0x72); Write8(0xE0 | reg); Write8(shift); } void PSHUFB(X64Reg dest, OpArg arg) { if (!cpu_info.bSSSE3) { PanicAlert("Trying to use PSHUFB on a system that doesn't support it. Bad programmer."); } Write8(0x66); arg.operandReg = dest; arg.WriteRex(false); Write8(0x0f); Write8(0x38); Write8(0x00); arg.WriteRest(0); } void PAND(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xDB, true, dest, arg);} void PANDN(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xDF, true, dest, arg);} void PXOR(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xEF, true, dest, arg);} void POR(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xEB, true, dest, arg);} void PADDB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xFC, true, dest, arg);} void PADDW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xFD, true, dest, arg);} void PADDD(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xFE, true, dest, arg);} void PADDQ(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xD4, true, dest, arg);} void PADDSB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xEC, true, dest, arg);} void PADDSW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xED, true, dest, arg);} void PADDUSB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xDC, true, dest, arg);} void PADDUSW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xDD, true, dest, arg);} void PSUBB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xF8, true, dest, arg);} void PSUBW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xF9, true, dest, arg);} void PSUBD(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xFA, true, dest, arg);} void PSUBQ(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xDB, true, dest, arg);} void PSUBSB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xE8, true, dest, arg);} void PSUBSW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xE9, true, dest, arg);} void PSUBUSB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xD8, true, dest, arg);} void PSUBUSW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xD9, true, dest, arg);} void PAVGB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xE0, true, dest, arg);} void PAVGW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xE3, true, dest, arg);} void PCMPEQB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x74, true, dest, arg);} void PCMPEQW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x75, true, dest, arg);} void PCMPEQD(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x76, true, dest, arg);} void PCMPGTB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x64, true, dest, arg);} void PCMPGTW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x65, true, dest, arg);} void PCMPGTD(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0x66, true, dest, arg);} void PEXTRW(X64Reg dest, OpArg arg, u8 subreg) {WriteSSEOp(64, 0x64, true, dest, arg); Write8(subreg);} void PINSRW(X64Reg dest, OpArg arg, u8 subreg) {WriteSSEOp(64, 0x64, true, dest, arg); Write8(subreg);} void PMADDWD(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xF5, true, dest, arg); } void PSADBW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xF6, true, dest, arg);} void PMAXSW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xEE, true, dest, arg); } void PMAXUB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xDE, true, dest, arg); } void PMINSW(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xEA, true, dest, arg); } void PMINUB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xDA, true, dest, arg); } void PMOVMSKB(X64Reg dest, OpArg arg) {WriteSSEOp(64, 0xD7, true, dest, arg); } // Prefixes void LOCK() { Write8(0xF0); } void REP() { Write8(0xF3); } void REPNE(){ Write8(0xF2); } void FWAIT() { Write8(0x9B); } namespace Util { // Sets up a __cdecl function. void EmitPrologue(int maxCallParams) { #ifdef _M_IX86 // Don't really need to do anything #elif defined(_M_X64) #if _WIN32 int stacksize = ((maxCallParams + 1) & ~1)*8 + 8; // Set up a stack frame so that we can call functions // TODO: use maxCallParams SUB(64, R(RSP), Imm8(stacksize)); #endif #else #error Arch not supported #endif } void EmitEpilogue(int maxCallParams) { #ifdef _M_IX86 RET(); #elif defined(_M_X64) #ifdef _WIN32 int stacksize = ((maxCallParams+1)&~1)*8 + 8; ADD(64, R(RSP), Imm8(stacksize)); #endif RET(); #else #error Arch not supported #endif } } // namespace // helper routines for setting pointers void CallCdeclFunction3(void* fnptr, u32 arg0, u32 arg1, u32 arg2) { using namespace Gen; #ifdef _M_X64 #ifdef _MSC_VER MOV(32, R(RCX), Imm32(arg0)); MOV(32, R(RDX), Imm32(arg1)); MOV(32, R(R8), Imm32(arg2)); CALL(fnptr); #else MOV(32, R(RDI), Imm32(arg0)); MOV(32, R(RSI), Imm32(arg1)); MOV(32, R(RDX), Imm32(arg2)); CALL(fnptr); #endif #else ABI_AlignStack(3 * 4); PUSH(32, Imm32(arg2)); PUSH(32, Imm32(arg1)); PUSH(32, Imm32(arg0)); CALL(fnptr); #ifdef _WIN32 // don't inc stack #else ABI_RestoreStack(3 * 4); #endif #endif } void CallCdeclFunction4(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3) { using namespace Gen; #ifdef _M_X64 #ifdef _MSC_VER MOV(32, R(RCX), Imm32(arg0)); MOV(32, R(RDX), Imm32(arg1)); MOV(32, R(R8), Imm32(arg2)); MOV(32, R(R9), Imm32(arg3)); CALL(fnptr); #else MOV(32, R(RDI), Imm32(arg0)); MOV(32, R(RSI), Imm32(arg1)); MOV(32, R(RDX), Imm32(arg2)); MOV(32, R(RCX), Imm32(arg3)); CALL(fnptr); #endif #else ABI_AlignStack(4 * 4); PUSH(32, Imm32(arg3)); PUSH(32, Imm32(arg2)); PUSH(32, Imm32(arg1)); PUSH(32, Imm32(arg0)); CALL(fnptr); #ifdef _WIN32 // don't inc stack #else ABI_RestoreStack(4 * 4); #endif #endif } void CallCdeclFunction5(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4) { using namespace Gen; #ifdef _M_X64 #ifdef _MSC_VER MOV(32, R(RCX), Imm32(arg0)); MOV(32, R(RDX), Imm32(arg1)); MOV(32, R(R8), Imm32(arg2)); MOV(32, R(R9), Imm32(arg3)); MOV(32, MDisp(RSP, 0x20), Imm32(arg4)); CALL(fnptr); #else MOV(32, R(RDI), Imm32(arg0)); MOV(32, R(RSI), Imm32(arg1)); MOV(32, R(RDX), Imm32(arg2)); MOV(32, R(RCX), Imm32(arg3)); MOV(32, R(R8), Imm32(arg4)); CALL(fnptr); #endif #else ABI_AlignStack(5 * 4); PUSH(32, Imm32(arg4)); PUSH(32, Imm32(arg3)); PUSH(32, Imm32(arg2)); PUSH(32, Imm32(arg1)); PUSH(32, Imm32(arg0)); CALL(fnptr); #ifdef _WIN32 // don't inc stack #else ABI_RestoreStack(5 * 4); #endif #endif } void CallCdeclFunction6(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4, u32 arg5) { using namespace Gen; #ifdef _M_X64 #ifdef _MSC_VER MOV(32, R(RCX), Imm32(arg0)); MOV(32, R(RDX), Imm32(arg1)); MOV(32, R(R8), Imm32(arg2)); MOV(32, R(R9), Imm32(arg3)); MOV(32, MDisp(RSP, 0x20), Imm32(arg4)); MOV(32, MDisp(RSP, 0x28), Imm32(arg5)); CALL(fnptr); #else MOV(32, R(RDI), Imm32(arg0)); MOV(32, R(RSI), Imm32(arg1)); MOV(32, R(RDX), Imm32(arg2)); MOV(32, R(RCX), Imm32(arg3)); MOV(32, R(R8), Imm32(arg4)); MOV(32, R(R9), Imm32(arg5)); CALL(fnptr); #endif #else ABI_AlignStack(6 * 4); PUSH(32, Imm32(arg5)); PUSH(32, Imm32(arg4)); PUSH(32, Imm32(arg3)); PUSH(32, Imm32(arg2)); PUSH(32, Imm32(arg1)); PUSH(32, Imm32(arg0)); CALL(fnptr); #ifdef _WIN32 // don't inc stack #else ABI_RestoreStack(6 * 4); #endif #endif } #ifdef _M_X64 // See header void ___CallCdeclImport3(void* impptr, u32 arg0, u32 arg1, u32 arg2) { MOV(32, R(RCX), Imm32(arg0)); MOV(32, R(RDX), Imm32(arg1)); MOV(32, R(R8), Imm32(arg2)); CALLptr(M(impptr)); } void ___CallCdeclImport4(void* impptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3) { MOV(32, R(RCX), Imm32(arg0)); MOV(32, R(RDX), Imm32(arg1)); MOV(32, R(R8), Imm32(arg2)); MOV(32, R(R9), Imm32(arg3)); CALLptr(M(impptr)); } void ___CallCdeclImport5(void* impptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4) { MOV(32, R(RCX), Imm32(arg0)); MOV(32, R(RDX), Imm32(arg1)); MOV(32, R(R8), Imm32(arg2)); MOV(32, R(R9), Imm32(arg3)); MOV(32, MDisp(RSP, 0x20), Imm32(arg4)); CALLptr(M(impptr)); } void ___CallCdeclImport6(void* impptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4, u32 arg5) { MOV(32, R(RCX), Imm32(arg0)); MOV(32, R(RDX), Imm32(arg1)); MOV(32, R(R8), Imm32(arg2)); MOV(32, R(R9), Imm32(arg3)); MOV(32, MDisp(RSP, 0x20), Imm32(arg4)); MOV(32, MDisp(RSP, 0x28), Imm32(arg5)); CALLptr(M(impptr)); } #endif }