Merge pull request #62 from chrisps/canary_experimental
Minor correctness/constant folding fixes, guest code optimizations for pre-ryzen amd processors
This commit is contained in:
commit
9006b309af
|
@ -0,0 +1,334 @@
|
||||||
|
#ifndef XENIA_CPU_BACKEND_X64_X64_AMDFX_EXTENSIONS_H_
|
||||||
|
#define XENIA_CPU_BACKEND_X64_X64_AMDFX_EXTENSIONS_H_
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace xe {
|
||||||
|
namespace cpu {
|
||||||
|
namespace backend {
|
||||||
|
namespace x64 {
|
||||||
|
namespace amdfx {
|
||||||
|
enum xopcodemap_e : unsigned char {
|
||||||
|
XOPCODE_HAS_IMMBYTE = 0x8,
|
||||||
|
XOPCODE_NO_IMMBYTE = 0x9
|
||||||
|
};
|
||||||
|
|
||||||
|
// base opcodes, without their size specified
|
||||||
|
enum xopcode_e : unsigned char {
|
||||||
|
xop_VFRCZPD = 0x81,
|
||||||
|
xop_VFRCZPS = 0x80,
|
||||||
|
xop_VFRCZSD = 0x83,
|
||||||
|
xop_VFRCZSS = 0x82,
|
||||||
|
xop_VPCMOV = 0xA2,
|
||||||
|
xop_VPCOMB = 0xCC,
|
||||||
|
xop_VPCOMD = 0xCE,
|
||||||
|
xop_VPCOMQ = 0xCF,
|
||||||
|
xop_VPCOMUB = 0xEC,
|
||||||
|
xop_VPCOMUD = 0xEE,
|
||||||
|
xop_VPCOMUQ = 0xEF,
|
||||||
|
xop_VPCOMUW = 0xED,
|
||||||
|
xop_VPCOMW = 0xCD,
|
||||||
|
xop_VPERMIL2PD = 0x49,
|
||||||
|
xop_VPERMIL2PS = 0x48,
|
||||||
|
xop_VPHADDBD = 0xC2,
|
||||||
|
xop_VPHADDBQ = 0xC3,
|
||||||
|
xop_VPHADDBW = 0xC1,
|
||||||
|
xop_VPHADDDQ = 0xCB,
|
||||||
|
xop_VPHADDUBD = 0xD2,
|
||||||
|
xop_VPHADDUBQ = 0xD3,
|
||||||
|
xop_VPHADDUBW = 0xD1,
|
||||||
|
xop_VPHADDUDQ = 0xDB,
|
||||||
|
xop_VPHADDUWD = 0xD6,
|
||||||
|
xop_VPHADDUWQ = 0xD7,
|
||||||
|
xop_VPHADDWD = 0xC6,
|
||||||
|
xop_VPHADDWQ = 0xC7,
|
||||||
|
xop_VPHSUBBW = 0xE1,
|
||||||
|
xop_VPHSUBDQ = 0xE3,
|
||||||
|
xop_VPHSUBWD = 0xE2,
|
||||||
|
xop_VPMACSDD = 0x9E,
|
||||||
|
xop_VPMACSDQH = 0x9F,
|
||||||
|
xop_VPMACSDQL = 0x97,
|
||||||
|
xop_VPMACSSDD = 0x8E,
|
||||||
|
xop_VPMACSSDQH = 0x8F,
|
||||||
|
xop_VPMACSSDQL = 0x87,
|
||||||
|
xop_VPMACSSWD = 0x86,
|
||||||
|
xop_VPMACSSWW = 0x85,
|
||||||
|
xop_VPMACSWD = 0x96,
|
||||||
|
xop_VPMACSWW = 0x95,
|
||||||
|
xop_VPMADCSSWD = 0xA6,
|
||||||
|
xop_VPMADCSWD = 0xB6,
|
||||||
|
xop_VPPERM = 0xA3,
|
||||||
|
xop_VPROTB = 0x90,
|
||||||
|
xop_VPROTBI = 0xC0, // imm version
|
||||||
|
xop_VPROTD = 0x92,
|
||||||
|
xop_VPROTDI = 0xC2,
|
||||||
|
xop_VPROTQ = 0x93,
|
||||||
|
xop_VPROTQI = 0xC3,
|
||||||
|
xop_VPROTW = 0x91,
|
||||||
|
xop_VPROTWI = 0xC1,
|
||||||
|
xop_VPSHAB = 0x98,
|
||||||
|
xop_VPSHAD = 0x9A,
|
||||||
|
xop_VPSHAQ = 0x9B,
|
||||||
|
xop_VPSHAW = 0x99,
|
||||||
|
xop_VPSHLB = 0x94,
|
||||||
|
xop_VPSHLD = 0x96,
|
||||||
|
xop_VPSHLQ = 0x97,
|
||||||
|
xop_VPSHLW = 0x95,
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
enum xop_iop_e : unsigned char {
|
||||||
|
XOP_BYTE = 0,
|
||||||
|
XOP_WORD = 1,
|
||||||
|
XOP_DOUBLEWORD = 2,
|
||||||
|
XOP_QUADWORD = 3
|
||||||
|
};
|
||||||
|
|
||||||
|
enum xop_fop_e : unsigned char {
|
||||||
|
XOP_PS = 0,
|
||||||
|
XOP_PD = 1,
|
||||||
|
XOP_SS = 2,
|
||||||
|
XOP_SD = 3
|
||||||
|
};
|
||||||
|
class xop_byte1_t {
|
||||||
|
public:
|
||||||
|
union {
|
||||||
|
// informative names
|
||||||
|
struct {
|
||||||
|
/*
|
||||||
|
A five bit field encoding a one- or two-byte opcode prefix.
|
||||||
|
*/
|
||||||
|
unsigned char opcode_map_select : 5;
|
||||||
|
/*
|
||||||
|
This bit provides a one-bit extension of either the ModRM.r/m
|
||||||
|
field to specify a GPR or XMM register or to the SIB base field to
|
||||||
|
specify a GPR. This permits access to 16 registers. In 32-bit protected
|
||||||
|
and compatibility modes, this bit is ignored. This bit is the
|
||||||
|
bit-inverted equivalent of the REX.B bit and is available only in the
|
||||||
|
3-byte prefix format.
|
||||||
|
*/
|
||||||
|
unsigned char inv_1bit_ext_modrm_or_sib : 1;
|
||||||
|
/*
|
||||||
|
This bit provides a one bit extension of the SIB.index field in
|
||||||
|
64-bit mode, permitting access to 16 YMM/XMM and GPR registers. In
|
||||||
|
32-bit protected and compatibility modes, this bit must be set to 1.
|
||||||
|
This bit is the bit-inverted equivalent of the REX.X bit
|
||||||
|
*/
|
||||||
|
unsigned char inv_1bit_ext_sib_index : 1;
|
||||||
|
/*
|
||||||
|
This bit provides a one bit extension of the ModRM.reg field in
|
||||||
|
64-bit mode, permitting access to all 16 YMM/XMM and GPR registers. In
|
||||||
|
32-bit protected and compatibility modes, this bit must be set to 1.
|
||||||
|
This bit is the bit-inverted equivalent of the REX.R bit.
|
||||||
|
*/
|
||||||
|
unsigned char inv_1bit_ext_modrm_reg_field : 1;
|
||||||
|
};
|
||||||
|
// amd manual names
|
||||||
|
struct {
|
||||||
|
unsigned char mmmmm : 5;
|
||||||
|
unsigned char B : 1;
|
||||||
|
unsigned char X : 1;
|
||||||
|
unsigned char R : 1;
|
||||||
|
};
|
||||||
|
unsigned char encoded;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
class xop_byte2_t {
|
||||||
|
public:
|
||||||
|
union {
|
||||||
|
struct {
|
||||||
|
unsigned char
|
||||||
|
implied_66f2f3_ext : 2; // 0 = no implied, 1 = 66, 2 = F3, 3 = F2
|
||||||
|
unsigned char vector_length : 1;
|
||||||
|
unsigned char source_or_dest_reg_specifier_inverted_1s_compl : 4;
|
||||||
|
unsigned char scalar_reg_size_override_special : 1;
|
||||||
|
};
|
||||||
|
// amd manual names
|
||||||
|
|
||||||
|
struct {
|
||||||
|
unsigned char pp : 2; // presumably 0 = no implied, 1 = 66, 2 = F2, 3 =
|
||||||
|
// F3
|
||||||
|
unsigned char L : 1;
|
||||||
|
unsigned char vvvv : 4; // src1 for four operand form
|
||||||
|
unsigned char W : 1;
|
||||||
|
};
|
||||||
|
unsigned char encoded;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
class xop_opcode_byte_t {
|
||||||
|
public:
|
||||||
|
union {
|
||||||
|
struct {
|
||||||
|
xop_fop_e float_datatype : 2;
|
||||||
|
unsigned char __unused0 : 6;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct {
|
||||||
|
xop_iop_e int_datatype : 2;
|
||||||
|
unsigned char __unused1 : 6;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct {
|
||||||
|
unsigned char oes : 2;
|
||||||
|
unsigned char opcode : 6;
|
||||||
|
};
|
||||||
|
unsigned char encoded;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
class modrm_byte_t {
|
||||||
|
public:
|
||||||
|
union {
|
||||||
|
struct {
|
||||||
|
unsigned char rm : 3;
|
||||||
|
unsigned char mod : 5; // 4 opnd form dest reg
|
||||||
|
};
|
||||||
|
unsigned char encoded;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
#pragma pack(push, 1)
|
||||||
|
class xop_t {
|
||||||
|
public:
|
||||||
|
unsigned char imm_8F; // always 0x8F
|
||||||
|
xop_byte1_t byte1;
|
||||||
|
xop_byte2_t byte2;
|
||||||
|
xop_opcode_byte_t opcode;
|
||||||
|
modrm_byte_t modrm;
|
||||||
|
unsigned char imm8;
|
||||||
|
|
||||||
|
xop_t() : imm_8F(0x8F) {
|
||||||
|
byte1.encoded = 0;
|
||||||
|
byte2.encoded = 0;
|
||||||
|
opcode.encoded = 0;
|
||||||
|
modrm.encoded = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned AssembledSize() const {
|
||||||
|
if (byte1.opcode_map_select == XOPCODE_NO_IMMBYTE) {
|
||||||
|
return 5;
|
||||||
|
} else {
|
||||||
|
return 6;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename TCall>
|
||||||
|
void ForeachByte(TCall&& cb) {
|
||||||
|
cb(imm_8F);
|
||||||
|
cb(byte1.encoded);
|
||||||
|
cb(byte2.encoded);
|
||||||
|
cb(opcode.encoded);
|
||||||
|
cb(modrm.encoded);
|
||||||
|
if (AssembledSize() == 6) {
|
||||||
|
cb(imm8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
#pragma pack(pop)
|
||||||
|
|
||||||
|
static void xop_set_fouroperand_form(xop_t& xop, unsigned xmmidx_dest,
|
||||||
|
unsigned xmmidx_src1, unsigned xmmidx_src2,
|
||||||
|
unsigned xmmidx_src3, xopcode_e opcode,
|
||||||
|
bool has_immbyte = true) {
|
||||||
|
xop.opcode.encoded = opcode;
|
||||||
|
xop.byte1.encoded = 0xe8;
|
||||||
|
if (has_immbyte) {
|
||||||
|
xop.byte1.opcode_map_select = XOPCODE_HAS_IMMBYTE;
|
||||||
|
} else {
|
||||||
|
xop.byte1.opcode_map_select = XOPCODE_NO_IMMBYTE;
|
||||||
|
}
|
||||||
|
xop.imm8 = xmmidx_src3 << 4;
|
||||||
|
|
||||||
|
xop.modrm.rm = xmmidx_src2 & 0b111;
|
||||||
|
xop.byte1.inv_1bit_ext_modrm_reg_field = (xmmidx_dest >> 3) ^ 1;
|
||||||
|
xop.byte1.inv_1bit_ext_modrm_or_sib = (xmmidx_src2 >> 3) ^ 1;
|
||||||
|
xop.byte2.vvvv = ~xmmidx_src1;
|
||||||
|
xop.modrm.encoded |= 0xC0;
|
||||||
|
xop.modrm.mod |= xmmidx_dest & 0b111;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum class xopcompare_e : uint32_t {
|
||||||
|
LT = 0b000,
|
||||||
|
LTE = 0b001,
|
||||||
|
GT = 0b010,
|
||||||
|
GTE = 0b011,
|
||||||
|
EQ = 0b100,
|
||||||
|
NEQ = 0b101,
|
||||||
|
FALSEY = 0b110, // there doesnt seem to be much in the way of documentation
|
||||||
|
// for these two
|
||||||
|
TRUTHEY = 0b111
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace operations {
|
||||||
|
#define SIMPLE_FOUROPERAND(funcname, opcode) \
|
||||||
|
static xop_t funcname(unsigned destidx, unsigned src1idx, unsigned src2idx, \
|
||||||
|
unsigned src3idx) { \
|
||||||
|
xop_t result{}; \
|
||||||
|
xop_set_fouroperand_form(result, destidx, src1idx, src2idx, src3idx, \
|
||||||
|
opcode, true); \
|
||||||
|
return result; \
|
||||||
|
}
|
||||||
|
|
||||||
|
SIMPLE_FOUROPERAND(vpcmov, xop_VPCMOV)
|
||||||
|
|
||||||
|
SIMPLE_FOUROPERAND(vpperm, xop_VPPERM)
|
||||||
|
|
||||||
|
#define COMPAREFUNC(name, opcode) \
|
||||||
|
static xop_t name(unsigned dst, unsigned src1, unsigned src2, \
|
||||||
|
xopcompare_e imm8) { \
|
||||||
|
xop_t xop; \
|
||||||
|
xop_set_fouroperand_form(xop, dst, src1, src2, 0, opcode, true); \
|
||||||
|
xop.imm8 = static_cast<uint8_t>(static_cast<uint32_t>(imm8)); \
|
||||||
|
return xop; \
|
||||||
|
}
|
||||||
|
|
||||||
|
COMPAREFUNC(vpcomb, xop_VPCOMB)
|
||||||
|
COMPAREFUNC(vpcomub, xop_VPCOMUB)
|
||||||
|
COMPAREFUNC(vpcomw, xop_VPCOMW)
|
||||||
|
COMPAREFUNC(vpcomuw, xop_VPCOMUW)
|
||||||
|
COMPAREFUNC(vpcomd, xop_VPCOMD)
|
||||||
|
COMPAREFUNC(vpcomud, xop_VPCOMUD)
|
||||||
|
COMPAREFUNC(vpcomq, xop_VPCOMQ)
|
||||||
|
COMPAREFUNC(vpcomuq, xop_VPCOMUQ)
|
||||||
|
|
||||||
|
#define SIMPLE_THREEOPERAND(funcname, opcode) \
|
||||||
|
static xop_t funcname(unsigned destidx, unsigned src1idx, \
|
||||||
|
unsigned src2idx) { \
|
||||||
|
xop_t result{}; \
|
||||||
|
xop_set_fouroperand_form(result, destidx, src1idx, src2idx, 0, opcode, \
|
||||||
|
false); \
|
||||||
|
return result; \
|
||||||
|
}
|
||||||
|
|
||||||
|
SIMPLE_THREEOPERAND(vprotb, xop_VPROTB)
|
||||||
|
SIMPLE_THREEOPERAND(vprotw, xop_VPROTW)
|
||||||
|
SIMPLE_THREEOPERAND(vprotd, xop_VPROTD)
|
||||||
|
SIMPLE_THREEOPERAND(vprotq, xop_VPROTQ)
|
||||||
|
|
||||||
|
SIMPLE_THREEOPERAND(vpshab, xop_VPSHAB)
|
||||||
|
SIMPLE_THREEOPERAND(vpshaw, xop_VPSHAW)
|
||||||
|
SIMPLE_THREEOPERAND(vpshad, xop_VPSHAD)
|
||||||
|
SIMPLE_THREEOPERAND(vpshaq, xop_VPSHAQ)
|
||||||
|
|
||||||
|
|
||||||
|
SIMPLE_THREEOPERAND(vpshlb, xop_VPSHLB)
|
||||||
|
SIMPLE_THREEOPERAND(vpshlw, xop_VPSHLW)
|
||||||
|
SIMPLE_THREEOPERAND(vpshld, xop_VPSHLD)
|
||||||
|
SIMPLE_THREEOPERAND(vpshlq, xop_VPSHLQ)
|
||||||
|
|
||||||
|
#undef SIMPLE_THREEOPERAND
|
||||||
|
#undef SIMPLE_FOUROPERAND
|
||||||
|
#undef COMPAREFUNC
|
||||||
|
} // namespace operations
|
||||||
|
|
||||||
|
} // namespace amdfx
|
||||||
|
} // namespace x64
|
||||||
|
} // namespace backend
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace xe
|
||||||
|
|
||||||
|
#endif // XENIA_CPU_BACKEND_X64_X64_AMDFX_EXTENSIONS_H_
|
|
@ -143,6 +143,12 @@ X64Emitter::X64Emitter(X64Backend* backend, XbyakAllocator* allocator)
|
||||||
feature_flags_ |= kX64EmitTBM;
|
feature_flags_ |= kX64EmitTBM;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (amd_flags & (1U << 11)) {
|
||||||
|
if ((cvars::x64_extension_mask & kX64EmitXOP) == kX64EmitXOP) {
|
||||||
|
feature_flags_ |= kX64EmitXOP;
|
||||||
|
XELOGCPU("Cpu support XOP!\n\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
if (cpu_.has(Xbyak::util::Cpu::tAMD)) {
|
if (cpu_.has(Xbyak::util::Cpu::tAMD)) {
|
||||||
bool is_zennish = cpu_.displayFamily >= 0x17;
|
bool is_zennish = cpu_.displayFamily >= 0x17;
|
||||||
/*
|
/*
|
||||||
|
@ -1024,8 +1030,13 @@ static const vec128_t xmm_consts[] = {
|
||||||
/*
|
/*
|
||||||
XMMF16PackLCPI6
|
XMMF16PackLCPI6
|
||||||
*/
|
*/
|
||||||
vec128i(0x8000)
|
vec128i(0x8000),
|
||||||
|
/* XMMXOPByteShiftMask,*/
|
||||||
|
vec128b(7),
|
||||||
|
/*XMMXOPWordShiftMask*/
|
||||||
|
vec128s(15),
|
||||||
|
/*XMMXOPDwordShiftMask*/
|
||||||
|
vec128i(31)
|
||||||
};
|
};
|
||||||
|
|
||||||
void* X64Emitter::FindByteConstantOffset(unsigned bytevalue) {
|
void* X64Emitter::FindByteConstantOffset(unsigned bytevalue) {
|
||||||
|
|
|
@ -23,7 +23,7 @@
|
||||||
// NOTE: must be included last as it expects windows.h to already be included.
|
// NOTE: must be included last as it expects windows.h to already be included.
|
||||||
#include "third_party/xbyak/xbyak/xbyak.h"
|
#include "third_party/xbyak/xbyak/xbyak.h"
|
||||||
#include "third_party/xbyak/xbyak/xbyak_util.h"
|
#include "third_party/xbyak/xbyak/xbyak_util.h"
|
||||||
|
#include "x64_amdfx_extensions.h"
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace cpu {
|
namespace cpu {
|
||||||
class Processor;
|
class Processor;
|
||||||
|
@ -167,8 +167,14 @@ enum XmmConst {
|
||||||
XMMF16PackLCPI3,
|
XMMF16PackLCPI3,
|
||||||
XMMF16PackLCPI4,
|
XMMF16PackLCPI4,
|
||||||
XMMF16PackLCPI5,
|
XMMF16PackLCPI5,
|
||||||
XMMF16PackLCPI6
|
XMMF16PackLCPI6,
|
||||||
|
XMMXOPByteShiftMask,
|
||||||
|
XMMXOPWordShiftMask,
|
||||||
|
XMMXOPDwordShiftMask,
|
||||||
|
|
||||||
};
|
};
|
||||||
|
using amdfx::xopcompare_e;
|
||||||
|
using Xbyak::Xmm;
|
||||||
// X64Backend specific Instr->runtime_flags
|
// X64Backend specific Instr->runtime_flags
|
||||||
enum : uint32_t {
|
enum : uint32_t {
|
||||||
INSTR_X64_FLAGS_ELIMINATED =
|
INSTR_X64_FLAGS_ELIMINATED =
|
||||||
|
@ -351,6 +357,60 @@ class X64Emitter : public Xbyak::CodeGenerator {
|
||||||
|
|
||||||
void EmitProfilerEpilogue();
|
void EmitProfilerEpilogue();
|
||||||
|
|
||||||
|
void EmitXOP(amdfx::xop_t xoperation) {
|
||||||
|
xoperation.ForeachByte([this](uint8_t b) { this->db(b); });
|
||||||
|
}
|
||||||
|
|
||||||
|
void vpcmov(Xmm dest, Xmm src1, Xmm src2, Xmm selector) {
|
||||||
|
auto xop_bytes = amdfx::operations::vpcmov(
|
||||||
|
dest.getIdx(), src1.getIdx(), src2.getIdx(), selector.getIdx());
|
||||||
|
EmitXOP(xop_bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
void vpperm(Xmm dest, Xmm src1, Xmm src2, Xmm selector) {
|
||||||
|
auto xop_bytes = amdfx::operations::vpperm(
|
||||||
|
dest.getIdx(), src1.getIdx(), src2.getIdx(), selector.getIdx());
|
||||||
|
EmitXOP(xop_bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define DEFINECOMPARE(name) \
|
||||||
|
void name(Xmm dest, Xmm src1, Xmm src2, xopcompare_e compareop) { \
|
||||||
|
auto xop_bytes = amdfx::operations::name(dest.getIdx(), src1.getIdx(), \
|
||||||
|
src2.getIdx(), compareop); \
|
||||||
|
EmitXOP(xop_bytes); \
|
||||||
|
}
|
||||||
|
DEFINECOMPARE(vpcomb);
|
||||||
|
DEFINECOMPARE(vpcomub);
|
||||||
|
DEFINECOMPARE(vpcomw);
|
||||||
|
DEFINECOMPARE(vpcomuw);
|
||||||
|
DEFINECOMPARE(vpcomd);
|
||||||
|
DEFINECOMPARE(vpcomud);
|
||||||
|
DEFINECOMPARE(vpcomq);
|
||||||
|
DEFINECOMPARE(vpcomuq);
|
||||||
|
#undef DEFINECOMPARE
|
||||||
|
|
||||||
|
#define DEFINESHIFTER(name) \
|
||||||
|
void name(Xmm dest, Xmm src1, Xmm src2) { \
|
||||||
|
auto xop_bytes = \
|
||||||
|
amdfx::operations::name(dest.getIdx(), src1.getIdx(), src2.getIdx()); \
|
||||||
|
EmitXOP(xop_bytes); \
|
||||||
|
}
|
||||||
|
|
||||||
|
DEFINESHIFTER(vprotb)
|
||||||
|
DEFINESHIFTER(vprotw)
|
||||||
|
DEFINESHIFTER(vprotd)
|
||||||
|
DEFINESHIFTER(vprotq)
|
||||||
|
|
||||||
|
DEFINESHIFTER(vpshab)
|
||||||
|
DEFINESHIFTER(vpshaw)
|
||||||
|
DEFINESHIFTER(vpshad)
|
||||||
|
DEFINESHIFTER(vpshaq)
|
||||||
|
|
||||||
|
DEFINESHIFTER(vpshlb)
|
||||||
|
DEFINESHIFTER(vpshlw)
|
||||||
|
DEFINESHIFTER(vpshld)
|
||||||
|
DEFINESHIFTER(vpshlq)
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void* Emplace(const EmitFunctionInfo& func_info,
|
void* Emplace(const EmitFunctionInfo& func_info,
|
||||||
GuestFunction* function = nullptr);
|
GuestFunction* function = nullptr);
|
||||||
|
|
|
@ -19,6 +19,16 @@
|
||||||
#include "xenia/base/cvar.h"
|
#include "xenia/base/cvar.h"
|
||||||
#include "xenia/cpu/backend/x64/x64_stack_layout.h"
|
#include "xenia/cpu/backend/x64/x64_stack_layout.h"
|
||||||
|
|
||||||
|
DEFINE_bool(xop_rotates, false, "rotate via xop", "X64");
|
||||||
|
|
||||||
|
DEFINE_bool(xop_left_shifts, false, "shl via xop", "X64");
|
||||||
|
|
||||||
|
DEFINE_bool(xop_right_shifts, false, "shr via xop", "X64");
|
||||||
|
|
||||||
|
DEFINE_bool(xop_arithmetic_right_shifts, false, "sar via xop", "X64");
|
||||||
|
|
||||||
|
DEFINE_bool(xop_compares, true, "compare via xop", "X64");
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace cpu {
|
namespace cpu {
|
||||||
namespace backend {
|
namespace backend {
|
||||||
|
@ -143,6 +153,7 @@ struct VECTOR_DENORMFLUSH
|
||||||
e.vandps(e.xmm0, i.src1,
|
e.vandps(e.xmm0, i.src1,
|
||||||
e.GetXmmConstPtr(XMMSingleDenormalMask)); // 0.25 P0123
|
e.GetXmmConstPtr(XMMSingleDenormalMask)); // 0.25 P0123
|
||||||
e.vcmpneqps(e.xmm2, e.xmm0, e.xmm1); // 0.5 P01
|
e.vcmpneqps(e.xmm2, e.xmm0, e.xmm1); // 0.5 P01
|
||||||
|
// todo: xop vpcmov here
|
||||||
e.vandps(e.xmm1, i.src1,
|
e.vandps(e.xmm1, i.src1,
|
||||||
e.GetXmmConstPtr(XMMSignMaskF32)); // 0.5 P0123 take signs, zeros
|
e.GetXmmConstPtr(XMMSignMaskF32)); // 0.5 P0123 take signs, zeros
|
||||||
// must keep their signs
|
// must keep their signs
|
||||||
|
@ -406,26 +417,44 @@ struct VECTOR_COMPARE_SGE_V128
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
EmitAssociativeBinaryXmmOp(
|
EmitAssociativeBinaryXmmOp(
|
||||||
e, i, [&i](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
e, i, [&i](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
||||||
switch (i.instr->flags) {
|
if (cvars::xop_compares && e.IsFeatureEnabled(kX64EmitXOP)) {
|
||||||
case INT8_TYPE:
|
switch (i.instr->flags) {
|
||||||
e.vpcmpeqb(e.xmm0, src1, src2);
|
case INT8_TYPE:
|
||||||
e.vpcmpgtb(dest, src1, src2);
|
e.vpcomb(dest, src1, src2, xopcompare_e::GTE);
|
||||||
e.vpor(dest, e.xmm0);
|
break;
|
||||||
break;
|
case INT16_TYPE:
|
||||||
case INT16_TYPE:
|
e.vpcomw(dest, src1, src2, xopcompare_e::GTE);
|
||||||
e.vpcmpeqw(e.xmm0, src1, src2);
|
break;
|
||||||
e.vpcmpgtw(dest, src1, src2);
|
case INT32_TYPE:
|
||||||
e.vpor(dest, e.xmm0);
|
e.vpcomd(dest, src1, src2, xopcompare_e::GTE);
|
||||||
break;
|
break;
|
||||||
case INT32_TYPE:
|
case FLOAT32_TYPE:
|
||||||
e.vpcmpeqd(e.xmm0, src1, src2);
|
e.ChangeMxcsrMode(MXCSRMode::Vmx);
|
||||||
e.vpcmpgtd(dest, src1, src2);
|
e.vcmpgeps(dest, src1, src2);
|
||||||
e.vpor(dest, e.xmm0);
|
break;
|
||||||
break;
|
}
|
||||||
case FLOAT32_TYPE:
|
} else {
|
||||||
e.ChangeMxcsrMode(MXCSRMode::Vmx);
|
switch (i.instr->flags) {
|
||||||
e.vcmpgeps(dest, src1, src2);
|
case INT8_TYPE:
|
||||||
break;
|
e.vpcmpeqb(e.xmm0, src1, src2);
|
||||||
|
e.vpcmpgtb(dest, src1, src2);
|
||||||
|
e.vpor(dest, e.xmm0);
|
||||||
|
break;
|
||||||
|
case INT16_TYPE:
|
||||||
|
e.vpcmpeqw(e.xmm0, src1, src2);
|
||||||
|
e.vpcmpgtw(dest, src1, src2);
|
||||||
|
e.vpor(dest, e.xmm0);
|
||||||
|
break;
|
||||||
|
case INT32_TYPE:
|
||||||
|
e.vpcmpeqd(e.xmm0, src1, src2);
|
||||||
|
e.vpcmpgtd(dest, src1, src2);
|
||||||
|
e.vpor(dest, e.xmm0);
|
||||||
|
break;
|
||||||
|
case FLOAT32_TYPE:
|
||||||
|
e.ChangeMxcsrMode(MXCSRMode::Vmx);
|
||||||
|
e.vcmpgeps(dest, src1, src2);
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -600,6 +629,7 @@ struct VECTOR_ADD
|
||||||
// overflowed (only need to check one input)
|
// overflowed (only need to check one input)
|
||||||
// if (src1 > res) then overflowed
|
// if (src1 > res) then overflowed
|
||||||
// http://locklessinc.com/articles/sat_arithmetic/
|
// http://locklessinc.com/articles/sat_arithmetic/
|
||||||
|
// chrispy: todo - add xop stuff here
|
||||||
e.vpxor(e.xmm2, src1, e.GetXmmConstPtr(XMMSignMaskI32));
|
e.vpxor(e.xmm2, src1, e.GetXmmConstPtr(XMMSignMaskI32));
|
||||||
e.vpxor(e.xmm0, e.xmm1, e.GetXmmConstPtr(XMMSignMaskI32));
|
e.vpxor(e.xmm0, e.xmm1, e.GetXmmConstPtr(XMMSignMaskI32));
|
||||||
e.vpcmpgtd(e.xmm0, e.xmm2, e.xmm0);
|
e.vpcmpgtd(e.xmm0, e.xmm2, e.xmm0);
|
||||||
|
@ -755,23 +785,52 @@ static __m128i EmulateVectorShl(void*, __m128i src1, __m128i src2) {
|
||||||
// Store result and return it.
|
// Store result and return it.
|
||||||
return _mm_load_si128(reinterpret_cast<__m128i*>(value));
|
return _mm_load_si128(reinterpret_cast<__m128i*>(value));
|
||||||
}
|
}
|
||||||
|
static XmmConst GetShiftmaskForType(unsigned typ) {
|
||||||
|
if (typ == INT8_TYPE) {
|
||||||
|
return XMMXOPByteShiftMask;
|
||||||
|
} else if (typ == INT16_TYPE) {
|
||||||
|
return XMMXOPWordShiftMask;
|
||||||
|
} else {
|
||||||
|
return XMMXOPDwordShiftMask;
|
||||||
|
}
|
||||||
|
}
|
||||||
struct VECTOR_SHL_V128
|
struct VECTOR_SHL_V128
|
||||||
: Sequence<VECTOR_SHL_V128, I<OPCODE_VECTOR_SHL, V128Op, V128Op, V128Op>> {
|
: Sequence<VECTOR_SHL_V128, I<OPCODE_VECTOR_SHL, V128Op, V128Op, V128Op>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
switch (i.instr->flags) {
|
if (cvars::xop_left_shifts && e.IsFeatureEnabled(kX64EmitXOP)) {
|
||||||
case INT8_TYPE:
|
Xmm src1 = GetInputRegOrConstant(e, i.src1, e.xmm0);
|
||||||
EmitInt8(e, i);
|
Xmm src2 = GetInputRegOrConstant(e, i.src2, e.xmm1);
|
||||||
break;
|
|
||||||
case INT16_TYPE:
|
e.vpand(e.xmm2, src2,
|
||||||
EmitInt16(e, i);
|
e.GetXmmConstPtr(GetShiftmaskForType(i.instr->flags)));
|
||||||
break;
|
|
||||||
case INT32_TYPE:
|
switch (i.instr->flags) {
|
||||||
EmitInt32(e, i);
|
case INT8_TYPE:
|
||||||
break;
|
e.vpshlb(i.dest, src1, e.xmm2);
|
||||||
default:
|
break;
|
||||||
assert_always();
|
case INT16_TYPE:
|
||||||
break;
|
e.vpshlw(i.dest, src1, e.xmm2);
|
||||||
|
break;
|
||||||
|
case INT32_TYPE:
|
||||||
|
e.vpshld(i.dest, src1, e.xmm2);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
switch (i.instr->flags) {
|
||||||
|
case INT8_TYPE:
|
||||||
|
EmitInt8(e, i);
|
||||||
|
break;
|
||||||
|
case INT16_TYPE:
|
||||||
|
EmitInt16(e, i);
|
||||||
|
break;
|
||||||
|
case INT32_TYPE:
|
||||||
|
EmitInt32(e, i);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert_always();
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1041,19 +1100,45 @@ static __m128i EmulateVectorShr(void*, __m128i src1, __m128i src2) {
|
||||||
struct VECTOR_SHR_V128
|
struct VECTOR_SHR_V128
|
||||||
: Sequence<VECTOR_SHR_V128, I<OPCODE_VECTOR_SHR, V128Op, V128Op, V128Op>> {
|
: Sequence<VECTOR_SHR_V128, I<OPCODE_VECTOR_SHR, V128Op, V128Op, V128Op>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
switch (i.instr->flags) {
|
if (cvars::xop_right_shifts && e.IsFeatureEnabled(kX64EmitXOP)) {
|
||||||
case INT8_TYPE:
|
Xmm src1 = GetInputRegOrConstant(e, i.src1, e.xmm0);
|
||||||
EmitInt8(e, i);
|
Xmm src2 = GetInputRegOrConstant(e, i.src2, e.xmm1);
|
||||||
break;
|
|
||||||
case INT16_TYPE:
|
e.vpand(e.xmm2, src2,
|
||||||
EmitInt16(e, i);
|
e.GetXmmConstPtr(GetShiftmaskForType(i.instr->flags)));
|
||||||
break;
|
|
||||||
case INT32_TYPE:
|
e.vpcmpeqb(e.xmm3, e.xmm3);
|
||||||
EmitInt32(e, i);
|
|
||||||
break;
|
switch (i.instr->flags) {
|
||||||
default:
|
case INT8_TYPE:
|
||||||
assert_always();
|
e.vpsignb(e.xmm2, e.xmm3);
|
||||||
break;
|
e.vpshlb(i.dest, src1, e.xmm2);
|
||||||
|
break;
|
||||||
|
case INT16_TYPE:
|
||||||
|
e.vpsignw(e.xmm2, e.xmm3);
|
||||||
|
e.vpshlw(i.dest, src1, e.xmm2);
|
||||||
|
break;
|
||||||
|
case INT32_TYPE:
|
||||||
|
e.vpsignd(e.xmm2, e.xmm3);
|
||||||
|
e.vpshld(i.dest, src1, e.xmm2);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
switch (i.instr->flags) {
|
||||||
|
case INT8_TYPE:
|
||||||
|
EmitInt8(e, i);
|
||||||
|
break;
|
||||||
|
case INT16_TYPE:
|
||||||
|
EmitInt16(e, i);
|
||||||
|
break;
|
||||||
|
case INT32_TYPE:
|
||||||
|
EmitInt32(e, i);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert_always();
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1224,19 +1309,45 @@ EMITTER_OPCODE_TABLE(OPCODE_VECTOR_SHR, VECTOR_SHR_V128);
|
||||||
struct VECTOR_SHA_V128
|
struct VECTOR_SHA_V128
|
||||||
: Sequence<VECTOR_SHA_V128, I<OPCODE_VECTOR_SHA, V128Op, V128Op, V128Op>> {
|
: Sequence<VECTOR_SHA_V128, I<OPCODE_VECTOR_SHA, V128Op, V128Op, V128Op>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
switch (i.instr->flags) {
|
if (cvars::xop_arithmetic_right_shifts && e.IsFeatureEnabled(kX64EmitXOP)) {
|
||||||
case INT8_TYPE:
|
Xmm src1 = GetInputRegOrConstant(e, i.src1, e.xmm0);
|
||||||
EmitInt8(e, i);
|
Xmm src2 = GetInputRegOrConstant(e, i.src2, e.xmm1);
|
||||||
break;
|
|
||||||
case INT16_TYPE:
|
e.vpand(e.xmm2, src2,
|
||||||
EmitInt16(e, i);
|
e.GetXmmConstPtr(GetShiftmaskForType(i.instr->flags)));
|
||||||
break;
|
|
||||||
case INT32_TYPE:
|
e.vpcmpeqb(e.xmm3, e.xmm3);
|
||||||
EmitInt32(e, i);
|
|
||||||
break;
|
switch (i.instr->flags) {
|
||||||
default:
|
case INT8_TYPE:
|
||||||
assert_always();
|
e.vpsignb(e.xmm2, e.xmm3);
|
||||||
break;
|
e.vpshab(i.dest, src1, e.xmm2);
|
||||||
|
break;
|
||||||
|
case INT16_TYPE:
|
||||||
|
e.vpsignw(e.xmm2, e.xmm3);
|
||||||
|
e.vpshaw(i.dest, src1, e.xmm2);
|
||||||
|
break;
|
||||||
|
case INT32_TYPE:
|
||||||
|
e.vpsignd(e.xmm2, e.xmm3);
|
||||||
|
e.vpshad(i.dest, src1, e.xmm2);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
switch (i.instr->flags) {
|
||||||
|
case INT8_TYPE:
|
||||||
|
EmitInt8(e, i);
|
||||||
|
break;
|
||||||
|
case INT16_TYPE:
|
||||||
|
EmitInt16(e, i);
|
||||||
|
break;
|
||||||
|
case INT32_TYPE:
|
||||||
|
EmitInt32(e, i);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert_always();
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1412,55 +1523,29 @@ struct VECTOR_ROTATE_LEFT_V128
|
||||||
: Sequence<VECTOR_ROTATE_LEFT_V128,
|
: Sequence<VECTOR_ROTATE_LEFT_V128,
|
||||||
I<OPCODE_VECTOR_ROTATE_LEFT, V128Op, V128Op, V128Op>> {
|
I<OPCODE_VECTOR_ROTATE_LEFT, V128Op, V128Op, V128Op>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
switch (i.instr->flags) {
|
if (cvars::xop_rotates && e.IsFeatureEnabled(kX64EmitXOP)) {
|
||||||
case INT8_TYPE:
|
Xmm src1 = GetInputRegOrConstant(e, i.src1, e.xmm0);
|
||||||
// TODO(benvanik): native version (with shift magic).
|
Xmm src2 = GetInputRegOrConstant(e, i.src2, e.xmm1);
|
||||||
if (i.src2.is_constant) {
|
|
||||||
e.lea(e.GetNativeParam(1), e.StashConstantXmm(1, i.src2.constant()));
|
e.vpand(e.xmm2, src2,
|
||||||
} else {
|
e.GetXmmConstPtr(GetShiftmaskForType(i.instr->flags)));
|
||||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
|
|
||||||
}
|
switch (i.instr->flags) {
|
||||||
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
case INT8_TYPE:
|
||||||
e.CallNativeSafe(
|
e.vprotb(i.dest, src1, e.xmm2);
|
||||||
reinterpret_cast<void*>(EmulateVectorRotateLeft<uint8_t>));
|
break;
|
||||||
e.vmovaps(i.dest, e.xmm0);
|
case INT16_TYPE:
|
||||||
break;
|
e.vprotw(i.dest, src1, e.xmm2);
|
||||||
case INT16_TYPE:
|
break;
|
||||||
// TODO(benvanik): native version (with shift magic).
|
case INT32_TYPE:
|
||||||
if (i.src2.is_constant) {
|
e.vprotd(i.dest, src1, e.xmm2);
|
||||||
e.lea(e.GetNativeParam(1), e.StashConstantXmm(1, i.src2.constant()));
|
break;
|
||||||
} else {
|
}
|
||||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
|
|
||||||
}
|
} else {
|
||||||
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
switch (i.instr->flags) {
|
||||||
e.CallNativeSafe(
|
case INT8_TYPE:
|
||||||
reinterpret_cast<void*>(EmulateVectorRotateLeft<uint16_t>));
|
// TODO(benvanik): native version (with shift magic).
|
||||||
e.vmovaps(i.dest, e.xmm0);
|
|
||||||
break;
|
|
||||||
case INT32_TYPE: {
|
|
||||||
if (e.IsFeatureEnabled(kX64EmitAVX512Ortho)) {
|
|
||||||
e.vprolvd(i.dest, i.src1, i.src2);
|
|
||||||
} else if (e.IsFeatureEnabled(kX64EmitAVX2)) {
|
|
||||||
Xmm temp = i.dest;
|
|
||||||
if (i.dest == i.src1 || i.dest == i.src2) {
|
|
||||||
temp = e.xmm2;
|
|
||||||
}
|
|
||||||
// Shift left (to get high bits):
|
|
||||||
if (i.src2.is_constant) {
|
|
||||||
e.LoadConstantXmm(temp, i.src2.constant());
|
|
||||||
e.vpand(e.xmm0, temp, e.GetXmmConstPtr(XMMShiftMaskPS));
|
|
||||||
} else {
|
|
||||||
e.vpand(e.xmm0, i.src2, e.GetXmmConstPtr(XMMShiftMaskPS));
|
|
||||||
}
|
|
||||||
e.vpsllvd(e.xmm1, i.src1, e.xmm0);
|
|
||||||
// Shift right (to get low bits):
|
|
||||||
e.vmovaps(temp, e.GetXmmConstPtr(XMMPI32));
|
|
||||||
e.vpsubd(temp, e.xmm0);
|
|
||||||
e.vpsrlvd(i.dest, i.src1, temp);
|
|
||||||
// Merge:
|
|
||||||
e.vpor(i.dest, e.xmm1);
|
|
||||||
} else {
|
|
||||||
// TODO(benvanik): non-AVX2 native version.
|
|
||||||
if (i.src2.is_constant) {
|
if (i.src2.is_constant) {
|
||||||
e.lea(e.GetNativeParam(1),
|
e.lea(e.GetNativeParam(1),
|
||||||
e.StashConstantXmm(1, i.src2.constant()));
|
e.StashConstantXmm(1, i.src2.constant()));
|
||||||
|
@ -1469,14 +1554,63 @@ struct VECTOR_ROTATE_LEFT_V128
|
||||||
}
|
}
|
||||||
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
||||||
e.CallNativeSafe(
|
e.CallNativeSafe(
|
||||||
reinterpret_cast<void*>(EmulateVectorRotateLeft<uint32_t>));
|
reinterpret_cast<void*>(EmulateVectorRotateLeft<uint8_t>));
|
||||||
e.vmovaps(i.dest, e.xmm0);
|
e.vmovaps(i.dest, e.xmm0);
|
||||||
|
break;
|
||||||
|
case INT16_TYPE:
|
||||||
|
// TODO(benvanik): native version (with shift magic).
|
||||||
|
if (i.src2.is_constant) {
|
||||||
|
e.lea(e.GetNativeParam(1),
|
||||||
|
e.StashConstantXmm(1, i.src2.constant()));
|
||||||
|
} else {
|
||||||
|
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
|
||||||
|
}
|
||||||
|
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
||||||
|
e.CallNativeSafe(
|
||||||
|
reinterpret_cast<void*>(EmulateVectorRotateLeft<uint16_t>));
|
||||||
|
e.vmovaps(i.dest, e.xmm0);
|
||||||
|
break;
|
||||||
|
case INT32_TYPE: {
|
||||||
|
if (e.IsFeatureEnabled(kX64EmitAVX512Ortho)) {
|
||||||
|
e.vprolvd(i.dest, i.src1, i.src2);
|
||||||
|
} else if (e.IsFeatureEnabled(kX64EmitAVX2)) {
|
||||||
|
Xmm temp = i.dest;
|
||||||
|
if (i.dest == i.src1 || i.dest == i.src2) {
|
||||||
|
temp = e.xmm2;
|
||||||
|
}
|
||||||
|
// Shift left (to get high bits):
|
||||||
|
if (i.src2.is_constant) {
|
||||||
|
e.LoadConstantXmm(temp, i.src2.constant());
|
||||||
|
e.vpand(e.xmm0, temp, e.GetXmmConstPtr(XMMShiftMaskPS));
|
||||||
|
} else {
|
||||||
|
e.vpand(e.xmm0, i.src2, e.GetXmmConstPtr(XMMShiftMaskPS));
|
||||||
|
}
|
||||||
|
e.vpsllvd(e.xmm1, i.src1, e.xmm0);
|
||||||
|
// Shift right (to get low bits):
|
||||||
|
e.vmovaps(temp, e.GetXmmConstPtr(XMMPI32));
|
||||||
|
e.vpsubd(temp, e.xmm0);
|
||||||
|
e.vpsrlvd(i.dest, i.src1, temp);
|
||||||
|
// Merge:
|
||||||
|
e.vpor(i.dest, e.xmm1);
|
||||||
|
} else {
|
||||||
|
// TODO(benvanik): non-AVX2 native version.
|
||||||
|
if (i.src2.is_constant) {
|
||||||
|
e.lea(e.GetNativeParam(1),
|
||||||
|
e.StashConstantXmm(1, i.src2.constant()));
|
||||||
|
} else {
|
||||||
|
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
|
||||||
|
}
|
||||||
|
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
||||||
|
e.CallNativeSafe(
|
||||||
|
reinterpret_cast<void*>(EmulateVectorRotateLeft<uint32_t>));
|
||||||
|
e.vmovaps(i.dest, e.xmm0);
|
||||||
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
break;
|
default:
|
||||||
|
assert_always();
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
default:
|
|
||||||
assert_always();
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -50,10 +50,10 @@ DEFINE_bool(no_round_to_single, false,
|
||||||
"Not for users, breaks games. Skip rounding double values to "
|
"Not for users, breaks games. Skip rounding double values to "
|
||||||
"single precision and back",
|
"single precision and back",
|
||||||
"CPU");
|
"CPU");
|
||||||
DEFINE_bool(
|
DEFINE_bool(inline_loadclock, false,
|
||||||
inline_loadclock, false,
|
"Directly read cached guest clock without calling the LoadClock "
|
||||||
"Directly read cached guest clock without calling the LoadClock method (it gets repeatedly updated by calls from other threads)",
|
"method (it gets repeatedly updated by calls from other threads)",
|
||||||
"CPU");
|
"CPU");
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace cpu {
|
namespace cpu {
|
||||||
namespace backend {
|
namespace backend {
|
||||||
|
@ -549,7 +549,7 @@ struct MAX_F64 : Sequence<MAX_F64, I<OPCODE_MAX, F64Op, F64Op, F64Op>> {
|
||||||
struct MAX_V128 : Sequence<MAX_V128, I<OPCODE_MAX, V128Op, V128Op, V128Op>> {
|
struct MAX_V128 : Sequence<MAX_V128, I<OPCODE_MAX, V128Op, V128Op, V128Op>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
e.ChangeMxcsrMode(MXCSRMode::Vmx);
|
e.ChangeMxcsrMode(MXCSRMode::Vmx);
|
||||||
//if 0 and -0, return 0! opposite of minfp
|
// if 0 and -0, return 0! opposite of minfp
|
||||||
auto src1 = GetInputRegOrConstant(e, i.src1, e.xmm0);
|
auto src1 = GetInputRegOrConstant(e, i.src1, e.xmm0);
|
||||||
auto src2 = GetInputRegOrConstant(e, i.src2, e.xmm1);
|
auto src2 = GetInputRegOrConstant(e, i.src2, e.xmm1);
|
||||||
e.vmaxps(e.xmm2, src1, src2);
|
e.vmaxps(e.xmm2, src1, src2);
|
||||||
|
@ -781,11 +781,15 @@ struct SELECT_V128_V128
|
||||||
} else if (mayblend == PermittedBlend::Ps) {
|
} else if (mayblend == PermittedBlend::Ps) {
|
||||||
e.vblendvps(i.dest, src2, src3, src1);
|
e.vblendvps(i.dest, src2, src3, src1);
|
||||||
} else {
|
} else {
|
||||||
//ideally we would have an xop path here...
|
if (e.IsFeatureEnabled(kX64EmitXOP)) {
|
||||||
// src1 ? src2 : src3;
|
e.vpcmov(i.dest, src3, src2, src1);
|
||||||
e.vpandn(e.xmm3, src1, src2);
|
} else {
|
||||||
e.vpand(i.dest, src1, src3);
|
// src1 ? src2 : src3;
|
||||||
e.vpor(i.dest, i.dest, e.xmm3);
|
|
||||||
|
e.vpandn(e.xmm3, src1, src2);
|
||||||
|
e.vpand(i.dest, src1, src3);
|
||||||
|
e.vpor(i.dest, i.dest, e.xmm3);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -84,7 +84,7 @@ bool SimplificationPass::Run(HIRBuilder* builder, bool& result) {
|
||||||
iter_result |= EliminateConversions(builder);
|
iter_result |= EliminateConversions(builder);
|
||||||
iter_result |= SimplifyAssignments(builder);
|
iter_result |= SimplifyAssignments(builder);
|
||||||
iter_result |= SimplifyBasicArith(builder);
|
iter_result |= SimplifyBasicArith(builder);
|
||||||
|
iter_result |= SimplifyVectorOps(builder);
|
||||||
result |= iter_result;
|
result |= iter_result;
|
||||||
} while (iter_result);
|
} while (iter_result);
|
||||||
return true;
|
return true;
|
||||||
|
@ -1393,6 +1393,65 @@ bool SimplificationPass::SimplifyBasicArith(hir::HIRBuilder* builder) {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool CouldEverProduceDenormal(hir::Instr* i) {
|
||||||
|
if (!i) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
Opcode denflushed_opcode = i->GetOpcodeNum();
|
||||||
|
|
||||||
|
if (denflushed_opcode == OPCODE_VECTOR_DENORMFLUSH) {
|
||||||
|
return false;
|
||||||
|
} else if (denflushed_opcode == OPCODE_UNPACK) {
|
||||||
|
// todo: more unpack operations likely cannot produce denormals
|
||||||
|
if (i->flags == PACK_TYPE_FLOAT16_4 || i->flags == PACK_TYPE_FLOAT16_2) {
|
||||||
|
return false; // xenos half float format does not support denormals
|
||||||
|
}
|
||||||
|
} else if (denflushed_opcode == OPCODE_VECTOR_CONVERT_I2F) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true; // todo: recurse, check values for min/max, abs, and others
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SimplificationPass::SimplifyVectorOps(hir::Instr* i,
|
||||||
|
hir::HIRBuilder* builder) {
|
||||||
|
Opcode opc = i->GetOpcodeNum();
|
||||||
|
/*
|
||||||
|
if the input to an unconditional denormal flush is an output of an
|
||||||
|
unconditional denormal flush, it is a pointless instruction and should be
|
||||||
|
elimed
|
||||||
|
*/
|
||||||
|
if (opc == OPCODE_VECTOR_DENORMFLUSH) {
|
||||||
|
hir::Instr* denflushed_def = i->src1.value->GetDefSkipAssigns();
|
||||||
|
|
||||||
|
if (denflushed_def) {
|
||||||
|
if (!CouldEverProduceDenormal(denflushed_def)) {
|
||||||
|
i->opcode = &OPCODE_ASSIGN_info;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
bool SimplificationPass::SimplifyVectorOps(hir::HIRBuilder* builder) {
|
||||||
|
bool result = false;
|
||||||
|
auto block = builder->first_block();
|
||||||
|
while (block) {
|
||||||
|
auto i = block->instr_head;
|
||||||
|
while (i) {
|
||||||
|
bool looks_vectory = false;
|
||||||
|
|
||||||
|
i->VisitValueOperands([&looks_vectory](Value* val, uint32_t idx) {
|
||||||
|
if (val->type == VEC128_TYPE) {
|
||||||
|
looks_vectory = true;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
result |= SimplifyVectorOps(i, builder);
|
||||||
|
i = i->next;
|
||||||
|
}
|
||||||
|
block = block->next;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
todo: add load-store simplification pass
|
todo: add load-store simplification pass
|
||||||
|
|
||||||
|
|
|
@ -35,6 +35,9 @@ class SimplificationPass : public ConditionalGroupSubpass {
|
||||||
|
|
||||||
// handles simple multiplication/addition rules
|
// handles simple multiplication/addition rules
|
||||||
bool SimplifyBasicArith(hir::HIRBuilder* builder);
|
bool SimplifyBasicArith(hir::HIRBuilder* builder);
|
||||||
|
|
||||||
|
bool SimplifyVectorOps(hir::HIRBuilder* builder);
|
||||||
|
bool SimplifyVectorOps(hir::Instr* i, hir::HIRBuilder* builder);
|
||||||
bool SimplifyBasicArith(hir::Instr* i, hir::HIRBuilder* builder);
|
bool SimplifyBasicArith(hir::Instr* i, hir::HIRBuilder* builder);
|
||||||
bool SimplifyAddWithSHL(hir::Instr* i, hir::HIRBuilder* builder);
|
bool SimplifyAddWithSHL(hir::Instr* i, hir::HIRBuilder* builder);
|
||||||
bool SimplifyAddToSelf(hir::Instr* i, hir::HIRBuilder* builder);
|
bool SimplifyAddToSelf(hir::Instr* i, hir::HIRBuilder* builder);
|
||||||
|
|
|
@ -31,10 +31,11 @@ struct SourceMapEntry {
|
||||||
uint32_t hir_offset; // Block ordinal (16b) | Instr ordinal (16b)
|
uint32_t hir_offset; // Block ordinal (16b) | Instr ordinal (16b)
|
||||||
uint32_t code_offset; // Offset from emitted code start.
|
uint32_t code_offset; // Offset from emitted code start.
|
||||||
};
|
};
|
||||||
|
enum class SaveRestoreType : uint8_t { NONE, GPR, VMX, FPR };
|
||||||
|
|
||||||
class Function : public Symbol {
|
class Function : public Symbol {
|
||||||
public:
|
public:
|
||||||
enum class Behavior {
|
enum class Behavior : uint8_t {
|
||||||
kDefault = 0,
|
kDefault = 0,
|
||||||
kProlog,
|
kProlog,
|
||||||
kEpilog,
|
kEpilog,
|
||||||
|
@ -53,6 +54,20 @@ class Function : public Symbol {
|
||||||
void set_behavior(Behavior value) { behavior_ = value; }
|
void set_behavior(Behavior value) { behavior_ = value; }
|
||||||
bool is_guest() const { return behavior_ != Behavior::kBuiltin; }
|
bool is_guest() const { return behavior_ != Behavior::kBuiltin; }
|
||||||
|
|
||||||
|
void SetSaverest(SaveRestoreType type, bool is_rest, uint8_t index) {
|
||||||
|
saverest_type_ = type;
|
||||||
|
is_restore_ = is_rest;
|
||||||
|
saverest_index_ = index;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsSaverest() const { return saverest_type_ != SaveRestoreType::NONE; }
|
||||||
|
|
||||||
|
SaveRestoreType SaverestType() const { return saverest_type_; }
|
||||||
|
unsigned SaverestIndex() const { return saverest_index_; }
|
||||||
|
|
||||||
|
bool IsSave() const { return IsSaverest() && is_restore_ == 0; }
|
||||||
|
bool IsRestore() const { return IsSaverest() && is_restore_; }
|
||||||
|
|
||||||
bool ContainsAddress(uint32_t address) const {
|
bool ContainsAddress(uint32_t address) const {
|
||||||
if (!address_ || !end_address_) {
|
if (!address_ || !end_address_) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -71,7 +86,11 @@ class Function : public Symbol {
|
||||||
Function(Module* module, uint32_t address);
|
Function(Module* module, uint32_t address);
|
||||||
|
|
||||||
uint32_t end_address_ = 0;
|
uint32_t end_address_ = 0;
|
||||||
|
|
||||||
Behavior behavior_ = Behavior::kDefault;
|
Behavior behavior_ = Behavior::kDefault;
|
||||||
|
SaveRestoreType saverest_type_ = SaveRestoreType::NONE;
|
||||||
|
uint8_t is_restore_ = 0;
|
||||||
|
uint8_t saverest_index_ = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
class BuiltinFunction : public Function {
|
class BuiltinFunction : public Function {
|
||||||
|
|
|
@ -1023,13 +1023,6 @@ Value* HIRBuilder::Truncate(Value* value, TypeName target_type) {
|
||||||
|
|
||||||
Value* HIRBuilder::Convert(Value* value, TypeName target_type,
|
Value* HIRBuilder::Convert(Value* value, TypeName target_type,
|
||||||
RoundMode round_mode) {
|
RoundMode round_mode) {
|
||||||
if (value->type == target_type) {
|
|
||||||
return value;
|
|
||||||
} else if (value->IsConstant()) {
|
|
||||||
Value* dest = CloneValue(value);
|
|
||||||
dest->Convert(target_type, round_mode);
|
|
||||||
return dest;
|
|
||||||
}
|
|
||||||
|
|
||||||
Instr* i =
|
Instr* i =
|
||||||
AppendInstr(OPCODE_CONVERT_info, round_mode, AllocValue(target_type));
|
AppendInstr(OPCODE_CONVERT_info, round_mode, AllocValue(target_type));
|
||||||
|
@ -1041,11 +1034,6 @@ Value* HIRBuilder::Convert(Value* value, TypeName target_type,
|
||||||
Value* HIRBuilder::Round(Value* value, RoundMode round_mode) {
|
Value* HIRBuilder::Round(Value* value, RoundMode round_mode) {
|
||||||
ASSERT_FLOAT_OR_VECTOR_TYPE(value);
|
ASSERT_FLOAT_OR_VECTOR_TYPE(value);
|
||||||
|
|
||||||
if (value->IsConstant()) {
|
|
||||||
Value* dest = CloneValue(value);
|
|
||||||
dest->Round(round_mode);
|
|
||||||
return dest;
|
|
||||||
}
|
|
||||||
|
|
||||||
Instr* i =
|
Instr* i =
|
||||||
AppendInstr(OPCODE_ROUND_info, round_mode, AllocValue(value->type));
|
AppendInstr(OPCODE_ROUND_info, round_mode, AllocValue(value->type));
|
||||||
|
@ -1295,7 +1283,7 @@ void HIRBuilder::SetNJM(Value* value) {
|
||||||
Value* HIRBuilder::Max(Value* value1, Value* value2) {
|
Value* HIRBuilder::Max(Value* value1, Value* value2) {
|
||||||
ASSERT_TYPES_EQUAL(value1, value2);
|
ASSERT_TYPES_EQUAL(value1, value2);
|
||||||
|
|
||||||
if (value1->type != VEC128_TYPE && value1->IsConstant() &&
|
if (IsScalarIntegralType( value1->type) && value1->IsConstant() &&
|
||||||
value2->IsConstant()) {
|
value2->IsConstant()) {
|
||||||
return value1->Compare(OPCODE_COMPARE_SLT, value2) ? value2 : value1;
|
return value1->Compare(OPCODE_COMPARE_SLT, value2) ? value2 : value1;
|
||||||
}
|
}
|
||||||
|
@ -1323,7 +1311,7 @@ Value* HIRBuilder::VectorMax(Value* value1, Value* value2, TypeName part_type,
|
||||||
Value* HIRBuilder::Min(Value* value1, Value* value2) {
|
Value* HIRBuilder::Min(Value* value1, Value* value2) {
|
||||||
ASSERT_TYPES_EQUAL(value1, value2);
|
ASSERT_TYPES_EQUAL(value1, value2);
|
||||||
|
|
||||||
if (value1->type != VEC128_TYPE && value1->IsConstant() &&
|
if (IsScalarIntegralType(value1->type) && value1->IsConstant() &&
|
||||||
value2->IsConstant()) {
|
value2->IsConstant()) {
|
||||||
return value1->Compare(OPCODE_COMPARE_SLT, value2) ? value1 : value2;
|
return value1->Compare(OPCODE_COMPARE_SLT, value2) ? value1 : value2;
|
||||||
}
|
}
|
||||||
|
@ -1351,8 +1339,9 @@ Value* HIRBuilder::VectorMin(Value* value1, Value* value2, TypeName part_type,
|
||||||
Value* HIRBuilder::Select(Value* cond, Value* value1, Value* value2) {
|
Value* HIRBuilder::Select(Value* cond, Value* value1, Value* value2) {
|
||||||
assert_true(cond->type == INT8_TYPE || cond->type == VEC128_TYPE); // for now
|
assert_true(cond->type == INT8_TYPE || cond->type == VEC128_TYPE); // for now
|
||||||
ASSERT_TYPES_EQUAL(value1, value2);
|
ASSERT_TYPES_EQUAL(value1, value2);
|
||||||
|
// chrispy: this was being done with V128, which was breaking stuff obviously
|
||||||
if (cond->IsConstant()) {
|
// because that should be an element by element select
|
||||||
|
if (cond->IsConstant() && IsScalarIntegralType(cond->type)) {
|
||||||
return cond->IsConstantTrue() ? value1 : value2;
|
return cond->IsConstantTrue() ? value1 : value2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1518,7 +1507,8 @@ Value* HIRBuilder::Add(Value* value1, Value* value2,
|
||||||
ASSERT_TYPES_EQUAL(value1, value2);
|
ASSERT_TYPES_EQUAL(value1, value2);
|
||||||
|
|
||||||
// TODO(benvanik): optimize when flags set.
|
// TODO(benvanik): optimize when flags set.
|
||||||
if (!arithmetic_flags) {
|
|
||||||
|
if (!arithmetic_flags && IsScalarIntegralType(value1->type)) {
|
||||||
if (value1->IsConstantZero()) {
|
if (value1->IsConstantZero()) {
|
||||||
return value2;
|
return value2;
|
||||||
} else if (value2->IsConstantZero()) {
|
} else if (value2->IsConstantZero()) {
|
||||||
|
|
|
@ -442,7 +442,18 @@ int InstrEmit_fabsx(PPCHIRBuilder& f, const InstrData& i) {
|
||||||
// frD <- abs(frB)
|
// frD <- abs(frB)
|
||||||
Value* v = f.Abs(f.LoadFPR(i.X.RB));
|
Value* v = f.Abs(f.LoadFPR(i.X.RB));
|
||||||
f.StoreFPR(i.X.RT, v);
|
f.StoreFPR(i.X.RT, v);
|
||||||
f.UpdateFPSCR(v, i.X.Rc);
|
/*
|
||||||
|
The contents of frB with bit 0 cleared are placed into frD.
|
||||||
|
Note that the fabs instruction treats NaNs just like any other kind of value. That is, the sign
|
||||||
|
bit of a NaN may be altered by fabs. This instruction does not alter the FPSCR.
|
||||||
|
Other registers altered:
|
||||||
|
• Condition Register (CR1 field):
|
||||||
|
Affected: FX, FEX, VX, OX (if Rc = 1)
|
||||||
|
*/
|
||||||
|
// f.UpdateFPSCR(v, i.X.Rc);
|
||||||
|
if (i.X.Rc) {
|
||||||
|
// todo
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -458,7 +469,10 @@ int InstrEmit_fnabsx(PPCHIRBuilder& f, const InstrData& i) {
|
||||||
// frD <- !abs(frB)
|
// frD <- !abs(frB)
|
||||||
Value* v = f.Neg(f.Abs(f.LoadFPR(i.X.RB)));
|
Value* v = f.Neg(f.Abs(f.LoadFPR(i.X.RB)));
|
||||||
f.StoreFPR(i.X.RT, v);
|
f.StoreFPR(i.X.RT, v);
|
||||||
f.UpdateFPSCR(v, i.X.Rc);
|
//f.UpdateFPSCR(v, i.X.Rc);
|
||||||
|
if (i.X.Rc) {
|
||||||
|
//todo
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -466,7 +480,10 @@ int InstrEmit_fnegx(PPCHIRBuilder& f, const InstrData& i) {
|
||||||
// frD <- ¬ frB[0] || frB[1-63]
|
// frD <- ¬ frB[0] || frB[1-63]
|
||||||
Value* v = f.Neg(f.LoadFPR(i.X.RB));
|
Value* v = f.Neg(f.LoadFPR(i.X.RB));
|
||||||
f.StoreFPR(i.X.RT, v);
|
f.StoreFPR(i.X.RT, v);
|
||||||
f.UpdateFPSCR(v, i.X.Rc);
|
//f.UpdateFPSCR(v, i.X.Rc);
|
||||||
|
if (i.X.Rc) {
|
||||||
|
//todo
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1598,6 +1598,8 @@ bool XexModule::FindSaveRest() {
|
||||||
// TODO(benvanik): set flags fn->flags |= FunctionSymbol::kFlagSaveGprLr;
|
// TODO(benvanik): set flags fn->flags |= FunctionSymbol::kFlagSaveGprLr;
|
||||||
function->set_behavior(Function::Behavior::kProlog);
|
function->set_behavior(Function::Behavior::kProlog);
|
||||||
function->set_status(Symbol::Status::kDeclared);
|
function->set_status(Symbol::Status::kDeclared);
|
||||||
|
function->SetSaverest(cpu::SaveRestoreType::GPR, false, n);
|
||||||
|
|
||||||
address += 4;
|
address += 4;
|
||||||
}
|
}
|
||||||
address = gplr_start + 20 * 4;
|
address = gplr_start + 20 * 4;
|
||||||
|
@ -1612,6 +1614,7 @@ bool XexModule::FindSaveRest() {
|
||||||
// TODO(benvanik): set flags fn->flags |= FunctionSymbol::kFlagRestGprLr;
|
// TODO(benvanik): set flags fn->flags |= FunctionSymbol::kFlagRestGprLr;
|
||||||
function->set_behavior(Function::Behavior::kEpilogReturn);
|
function->set_behavior(Function::Behavior::kEpilogReturn);
|
||||||
function->set_status(Symbol::Status::kDeclared);
|
function->set_status(Symbol::Status::kDeclared);
|
||||||
|
function->SetSaverest(cpu::SaveRestoreType::GPR, true, n);
|
||||||
address += 4;
|
address += 4;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1628,6 +1631,8 @@ bool XexModule::FindSaveRest() {
|
||||||
// TODO(benvanik): set flags fn->flags |= FunctionSymbol::kFlagSaveFpr;
|
// TODO(benvanik): set flags fn->flags |= FunctionSymbol::kFlagSaveFpr;
|
||||||
function->set_behavior(Function::Behavior::kProlog);
|
function->set_behavior(Function::Behavior::kProlog);
|
||||||
function->set_status(Symbol::Status::kDeclared);
|
function->set_status(Symbol::Status::kDeclared);
|
||||||
|
|
||||||
|
function->SetSaverest(cpu::SaveRestoreType::FPR, false, n);
|
||||||
address += 4;
|
address += 4;
|
||||||
}
|
}
|
||||||
address = fpr_start + (18 * 4) + (1 * 4);
|
address = fpr_start + (18 * 4) + (1 * 4);
|
||||||
|
@ -1642,6 +1647,7 @@ bool XexModule::FindSaveRest() {
|
||||||
// TODO(benvanik): set flags fn->flags |= FunctionSymbol::kFlagRestFpr;
|
// TODO(benvanik): set flags fn->flags |= FunctionSymbol::kFlagRestFpr;
|
||||||
function->set_behavior(Function::Behavior::kEpilog);
|
function->set_behavior(Function::Behavior::kEpilog);
|
||||||
function->set_status(Symbol::Status::kDeclared);
|
function->set_status(Symbol::Status::kDeclared);
|
||||||
|
function->SetSaverest(cpu::SaveRestoreType::FPR, true, n);
|
||||||
address += 4;
|
address += 4;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1662,6 +1668,7 @@ bool XexModule::FindSaveRest() {
|
||||||
// TODO(benvanik): set flags fn->flags |= FunctionSymbol::kFlagSaveVmx;
|
// TODO(benvanik): set flags fn->flags |= FunctionSymbol::kFlagSaveVmx;
|
||||||
function->set_behavior(Function::Behavior::kProlog);
|
function->set_behavior(Function::Behavior::kProlog);
|
||||||
function->set_status(Symbol::Status::kDeclared);
|
function->set_status(Symbol::Status::kDeclared);
|
||||||
|
function->SetSaverest(cpu::SaveRestoreType::VMX, false, n);
|
||||||
address += 2 * 4;
|
address += 2 * 4;
|
||||||
}
|
}
|
||||||
address += 4;
|
address += 4;
|
||||||
|
@ -1675,6 +1682,7 @@ bool XexModule::FindSaveRest() {
|
||||||
// TODO(benvanik): set flags fn->flags |= FunctionSymbol::kFlagSaveVmx;
|
// TODO(benvanik): set flags fn->flags |= FunctionSymbol::kFlagSaveVmx;
|
||||||
function->set_behavior(Function::Behavior::kProlog);
|
function->set_behavior(Function::Behavior::kProlog);
|
||||||
function->set_status(Symbol::Status::kDeclared);
|
function->set_status(Symbol::Status::kDeclared);
|
||||||
|
function->SetSaverest(cpu::SaveRestoreType::VMX, false, n);
|
||||||
address += 2 * 4;
|
address += 2 * 4;
|
||||||
}
|
}
|
||||||
address = vmx_start + (18 * 2 * 4) + (1 * 4) + (64 * 2 * 4) + (1 * 4);
|
address = vmx_start + (18 * 2 * 4) + (1 * 4) + (64 * 2 * 4) + (1 * 4);
|
||||||
|
@ -1688,6 +1696,7 @@ bool XexModule::FindSaveRest() {
|
||||||
// TODO(benvanik): set flags fn->flags |= FunctionSymbol::kFlagRestVmx;
|
// TODO(benvanik): set flags fn->flags |= FunctionSymbol::kFlagRestVmx;
|
||||||
function->set_behavior(Function::Behavior::kEpilog);
|
function->set_behavior(Function::Behavior::kEpilog);
|
||||||
function->set_status(Symbol::Status::kDeclared);
|
function->set_status(Symbol::Status::kDeclared);
|
||||||
|
function->SetSaverest(cpu::SaveRestoreType::VMX, true, n);
|
||||||
address += 2 * 4;
|
address += 2 * 4;
|
||||||
}
|
}
|
||||||
address += 4;
|
address += 4;
|
||||||
|
@ -1701,6 +1710,7 @@ bool XexModule::FindSaveRest() {
|
||||||
// TODO(benvanik): set flags fn->flags |= FunctionSymbol::kFlagRestVmx;
|
// TODO(benvanik): set flags fn->flags |= FunctionSymbol::kFlagRestVmx;
|
||||||
function->set_behavior(Function::Behavior::kEpilog);
|
function->set_behavior(Function::Behavior::kEpilog);
|
||||||
function->set_status(Symbol::Status::kDeclared);
|
function->set_status(Symbol::Status::kDeclared);
|
||||||
|
function->SetSaverest(cpu::SaveRestoreType::VMX, true, n);
|
||||||
address += 2 * 4;
|
address += 2 * 4;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue