Disable most XOP code by default, the manual must be wrong for the shifts or we must be assembling them incorrectly, will return to it later and fix
comparisons and select done by xop are fine though
This commit is contained in:
parent
b26c6ee1b8
commit
b5ef3453c7
|
@ -1030,8 +1030,13 @@ static const vec128_t xmm_consts[] = {
|
|||
/*
|
||||
XMMF16PackLCPI6
|
||||
*/
|
||||
vec128i(0x8000)
|
||||
|
||||
vec128i(0x8000),
|
||||
/* XMMXOPByteShiftMask,*/
|
||||
vec128b(7),
|
||||
/*XMMXOPWordShiftMask*/
|
||||
vec128s(15),
|
||||
/*XMMXOPDwordShiftMask*/
|
||||
vec128i(31)
|
||||
};
|
||||
|
||||
void* X64Emitter::FindByteConstantOffset(unsigned bytevalue) {
|
||||
|
|
|
@ -167,7 +167,11 @@ enum XmmConst {
|
|||
XMMF16PackLCPI3,
|
||||
XMMF16PackLCPI4,
|
||||
XMMF16PackLCPI5,
|
||||
XMMF16PackLCPI6
|
||||
XMMF16PackLCPI6,
|
||||
XMMXOPByteShiftMask,
|
||||
XMMXOPWordShiftMask,
|
||||
XMMXOPDwordShiftMask,
|
||||
|
||||
};
|
||||
using amdfx::xopcompare_e;
|
||||
using Xbyak::Xmm;
|
||||
|
@ -383,7 +387,30 @@ class X64Emitter : public Xbyak::CodeGenerator {
|
|||
DEFINECOMPARE(vpcomud);
|
||||
DEFINECOMPARE(vpcomq);
|
||||
DEFINECOMPARE(vpcomuq);
|
||||
#undef DEFINECOMPARE
|
||||
#undef DEFINECOMPARE
|
||||
|
||||
#define DEFINESHIFTER(name) \
|
||||
void name(Xmm dest, Xmm src1, Xmm src2) { \
|
||||
auto xop_bytes = \
|
||||
amdfx::operations::name(dest.getIdx(), src1.getIdx(), src2.getIdx()); \
|
||||
EmitXOP(xop_bytes); \
|
||||
}
|
||||
|
||||
DEFINESHIFTER(vprotb)
|
||||
DEFINESHIFTER(vprotw)
|
||||
DEFINESHIFTER(vprotd)
|
||||
DEFINESHIFTER(vprotq)
|
||||
|
||||
DEFINESHIFTER(vpshab)
|
||||
DEFINESHIFTER(vpshaw)
|
||||
DEFINESHIFTER(vpshad)
|
||||
DEFINESHIFTER(vpshaq)
|
||||
|
||||
DEFINESHIFTER(vpshlb)
|
||||
DEFINESHIFTER(vpshlw)
|
||||
DEFINESHIFTER(vpshld)
|
||||
DEFINESHIFTER(vpshlq)
|
||||
|
||||
protected:
|
||||
void* Emplace(const EmitFunctionInfo& func_info,
|
||||
GuestFunction* function = nullptr);
|
||||
|
|
|
@ -19,6 +19,16 @@
|
|||
#include "xenia/base/cvar.h"
|
||||
#include "xenia/cpu/backend/x64/x64_stack_layout.h"
|
||||
|
||||
DEFINE_bool(xop_rotates, false, "rotate via xop", "X64");
|
||||
|
||||
DEFINE_bool(xop_left_shifts, false, "shl via xop", "X64");
|
||||
|
||||
DEFINE_bool(xop_right_shifts, false, "shr via xop", "X64");
|
||||
|
||||
DEFINE_bool(xop_arithmetic_right_shifts, false, "sar via xop", "X64");
|
||||
|
||||
DEFINE_bool(xop_compares, true, "compare via xop", "X64");
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace backend {
|
||||
|
@ -407,7 +417,7 @@ struct VECTOR_COMPARE_SGE_V128
|
|||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
EmitAssociativeBinaryXmmOp(
|
||||
e, i, [&i](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
||||
if (e.IsFeatureEnabled(kX64EmitXOP)) {
|
||||
if (cvars::xop_compares && e.IsFeatureEnabled(kX64EmitXOP)) {
|
||||
switch (i.instr->flags) {
|
||||
case INT8_TYPE:
|
||||
e.vpcomb(dest, src1, src2, xopcompare_e::GTE);
|
||||
|
@ -775,10 +785,38 @@ static __m128i EmulateVectorShl(void*, __m128i src1, __m128i src2) {
|
|||
// Store result and return it.
|
||||
return _mm_load_si128(reinterpret_cast<__m128i*>(value));
|
||||
}
|
||||
|
||||
static XmmConst GetShiftmaskForType(unsigned typ) {
|
||||
if (typ == INT8_TYPE) {
|
||||
return XMMXOPByteShiftMask;
|
||||
} else if (typ == INT16_TYPE) {
|
||||
return XMMXOPWordShiftMask;
|
||||
} else {
|
||||
return XMMXOPDwordShiftMask;
|
||||
}
|
||||
}
|
||||
struct VECTOR_SHL_V128
|
||||
: Sequence<VECTOR_SHL_V128, I<OPCODE_VECTOR_SHL, V128Op, V128Op, V128Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
if (cvars::xop_left_shifts && e.IsFeatureEnabled(kX64EmitXOP)) {
|
||||
Xmm src1 = GetInputRegOrConstant(e, i.src1, e.xmm0);
|
||||
Xmm src2 = GetInputRegOrConstant(e, i.src2, e.xmm1);
|
||||
|
||||
e.vpand(e.xmm2, src2,
|
||||
e.GetXmmConstPtr(GetShiftmaskForType(i.instr->flags)));
|
||||
|
||||
switch (i.instr->flags) {
|
||||
case INT8_TYPE:
|
||||
e.vpshlb(i.dest, src1, e.xmm2);
|
||||
break;
|
||||
case INT16_TYPE:
|
||||
e.vpshlw(i.dest, src1, e.xmm2);
|
||||
break;
|
||||
case INT32_TYPE:
|
||||
e.vpshld(i.dest, src1, e.xmm2);
|
||||
break;
|
||||
}
|
||||
|
||||
} else {
|
||||
switch (i.instr->flags) {
|
||||
case INT8_TYPE:
|
||||
EmitInt8(e, i);
|
||||
|
@ -794,6 +832,7 @@ struct VECTOR_SHL_V128
|
|||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void EmitInt8(X64Emitter& e, const EmitArgType& i) {
|
||||
// TODO(benvanik): native version (with shift magic).
|
||||
|
@ -1061,6 +1100,31 @@ static __m128i EmulateVectorShr(void*, __m128i src1, __m128i src2) {
|
|||
struct VECTOR_SHR_V128
|
||||
: Sequence<VECTOR_SHR_V128, I<OPCODE_VECTOR_SHR, V128Op, V128Op, V128Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
if (cvars::xop_right_shifts && e.IsFeatureEnabled(kX64EmitXOP)) {
|
||||
Xmm src1 = GetInputRegOrConstant(e, i.src1, e.xmm0);
|
||||
Xmm src2 = GetInputRegOrConstant(e, i.src2, e.xmm1);
|
||||
|
||||
e.vpand(e.xmm2, src2,
|
||||
e.GetXmmConstPtr(GetShiftmaskForType(i.instr->flags)));
|
||||
|
||||
e.vpcmpeqb(e.xmm3, e.xmm3);
|
||||
|
||||
switch (i.instr->flags) {
|
||||
case INT8_TYPE:
|
||||
e.vpsignb(e.xmm2, e.xmm3);
|
||||
e.vpshlb(i.dest, src1, e.xmm2);
|
||||
break;
|
||||
case INT16_TYPE:
|
||||
e.vpsignw(e.xmm2, e.xmm3);
|
||||
e.vpshlw(i.dest, src1, e.xmm2);
|
||||
break;
|
||||
case INT32_TYPE:
|
||||
e.vpsignd(e.xmm2, e.xmm3);
|
||||
e.vpshld(i.dest, src1, e.xmm2);
|
||||
break;
|
||||
}
|
||||
|
||||
} else {
|
||||
switch (i.instr->flags) {
|
||||
case INT8_TYPE:
|
||||
EmitInt8(e, i);
|
||||
|
@ -1076,6 +1140,7 @@ struct VECTOR_SHR_V128
|
|||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void EmitInt8(X64Emitter& e, const EmitArgType& i) {
|
||||
// TODO(benvanik): native version (with shift magic).
|
||||
|
@ -1244,6 +1309,31 @@ EMITTER_OPCODE_TABLE(OPCODE_VECTOR_SHR, VECTOR_SHR_V128);
|
|||
struct VECTOR_SHA_V128
|
||||
: Sequence<VECTOR_SHA_V128, I<OPCODE_VECTOR_SHA, V128Op, V128Op, V128Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
if (cvars::xop_arithmetic_right_shifts && e.IsFeatureEnabled(kX64EmitXOP)) {
|
||||
Xmm src1 = GetInputRegOrConstant(e, i.src1, e.xmm0);
|
||||
Xmm src2 = GetInputRegOrConstant(e, i.src2, e.xmm1);
|
||||
|
||||
e.vpand(e.xmm2, src2,
|
||||
e.GetXmmConstPtr(GetShiftmaskForType(i.instr->flags)));
|
||||
|
||||
e.vpcmpeqb(e.xmm3, e.xmm3);
|
||||
|
||||
switch (i.instr->flags) {
|
||||
case INT8_TYPE:
|
||||
e.vpsignb(e.xmm2, e.xmm3);
|
||||
e.vpshab(i.dest, src1, e.xmm2);
|
||||
break;
|
||||
case INT16_TYPE:
|
||||
e.vpsignw(e.xmm2, e.xmm3);
|
||||
e.vpshaw(i.dest, src1, e.xmm2);
|
||||
break;
|
||||
case INT32_TYPE:
|
||||
e.vpsignd(e.xmm2, e.xmm3);
|
||||
e.vpshad(i.dest, src1, e.xmm2);
|
||||
break;
|
||||
}
|
||||
|
||||
} else {
|
||||
switch (i.instr->flags) {
|
||||
case INT8_TYPE:
|
||||
EmitInt8(e, i);
|
||||
|
@ -1259,6 +1349,7 @@ struct VECTOR_SHA_V128
|
|||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void EmitInt8(X64Emitter& e, const EmitArgType& i) {
|
||||
// TODO(benvanik): native version (with shift magic).
|
||||
|
@ -1432,11 +1523,32 @@ struct VECTOR_ROTATE_LEFT_V128
|
|||
: Sequence<VECTOR_ROTATE_LEFT_V128,
|
||||
I<OPCODE_VECTOR_ROTATE_LEFT, V128Op, V128Op, V128Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
if (cvars::xop_rotates && e.IsFeatureEnabled(kX64EmitXOP)) {
|
||||
Xmm src1 = GetInputRegOrConstant(e, i.src1, e.xmm0);
|
||||
Xmm src2 = GetInputRegOrConstant(e, i.src2, e.xmm1);
|
||||
|
||||
e.vpand(e.xmm2, src2,
|
||||
e.GetXmmConstPtr(GetShiftmaskForType(i.instr->flags)));
|
||||
|
||||
switch (i.instr->flags) {
|
||||
case INT8_TYPE:
|
||||
e.vprotb(i.dest, src1, e.xmm2);
|
||||
break;
|
||||
case INT16_TYPE:
|
||||
e.vprotw(i.dest, src1, e.xmm2);
|
||||
break;
|
||||
case INT32_TYPE:
|
||||
e.vprotd(i.dest, src1, e.xmm2);
|
||||
break;
|
||||
}
|
||||
|
||||
} else {
|
||||
switch (i.instr->flags) {
|
||||
case INT8_TYPE:
|
||||
// TODO(benvanik): native version (with shift magic).
|
||||
if (i.src2.is_constant) {
|
||||
e.lea(e.GetNativeParam(1), e.StashConstantXmm(1, i.src2.constant()));
|
||||
e.lea(e.GetNativeParam(1),
|
||||
e.StashConstantXmm(1, i.src2.constant()));
|
||||
} else {
|
||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
|
||||
}
|
||||
|
@ -1448,7 +1560,8 @@ struct VECTOR_ROTATE_LEFT_V128
|
|||
case INT16_TYPE:
|
||||
// TODO(benvanik): native version (with shift magic).
|
||||
if (i.src2.is_constant) {
|
||||
e.lea(e.GetNativeParam(1), e.StashConstantXmm(1, i.src2.constant()));
|
||||
e.lea(e.GetNativeParam(1),
|
||||
e.StashConstantXmm(1, i.src2.constant()));
|
||||
} else {
|
||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
|
||||
}
|
||||
|
@ -1499,6 +1612,7 @@ struct VECTOR_ROTATE_LEFT_V128
|
|||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(OPCODE_VECTOR_ROTATE_LEFT, VECTOR_ROTATE_LEFT_V128);
|
||||
|
||||
|
|
|
@ -452,7 +452,7 @@ Affected: FX, FEX, VX, OX (if Rc = 1)
|
|||
*/
|
||||
// f.UpdateFPSCR(v, i.X.Rc);
|
||||
if (i.X.Rc) {
|
||||
|
||||
// todo
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -469,7 +469,10 @@ int InstrEmit_fnabsx(PPCHIRBuilder& f, const InstrData& i) {
|
|||
// frD <- !abs(frB)
|
||||
Value* v = f.Neg(f.Abs(f.LoadFPR(i.X.RB)));
|
||||
f.StoreFPR(i.X.RT, v);
|
||||
f.UpdateFPSCR(v, i.X.Rc);
|
||||
//f.UpdateFPSCR(v, i.X.Rc);
|
||||
if (i.X.Rc) {
|
||||
//todo
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -477,7 +480,10 @@ int InstrEmit_fnegx(PPCHIRBuilder& f, const InstrData& i) {
|
|||
// frD <- ¬ frB[0] || frB[1-63]
|
||||
Value* v = f.Neg(f.LoadFPR(i.X.RB));
|
||||
f.StoreFPR(i.X.RT, v);
|
||||
f.UpdateFPSCR(v, i.X.Rc);
|
||||
//f.UpdateFPSCR(v, i.X.Rc);
|
||||
if (i.X.Rc) {
|
||||
//todo
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue