Fix some more constant folding
fabsx does NOT set fpscr turns out that our vector unsigned compare instructions are a bit wierd?
This commit is contained in:
parent
0ebc109d4d
commit
b26c6ee1b8
|
@ -143,6 +143,12 @@ X64Emitter::X64Emitter(X64Backend* backend, XbyakAllocator* allocator)
|
|||
feature_flags_ |= kX64EmitTBM;
|
||||
}
|
||||
}
|
||||
if (amd_flags & (1U << 11)) {
|
||||
if ((cvars::x64_extension_mask & kX64EmitXOP) == kX64EmitXOP) {
|
||||
feature_flags_ |= kX64EmitXOP;
|
||||
XELOGCPU("Cpu support XOP!\n\n");
|
||||
}
|
||||
}
|
||||
if (cpu_.has(Xbyak::util::Cpu::tAMD)) {
|
||||
bool is_zennish = cpu_.displayFamily >= 0x17;
|
||||
/*
|
||||
|
|
|
@ -143,6 +143,7 @@ struct VECTOR_DENORMFLUSH
|
|||
e.vandps(e.xmm0, i.src1,
|
||||
e.GetXmmConstPtr(XMMSingleDenormalMask)); // 0.25 P0123
|
||||
e.vcmpneqps(e.xmm2, e.xmm0, e.xmm1); // 0.5 P01
|
||||
// todo: xop vpcmov here
|
||||
e.vandps(e.xmm1, i.src1,
|
||||
e.GetXmmConstPtr(XMMSignMaskF32)); // 0.5 P0123 take signs, zeros
|
||||
// must keep their signs
|
||||
|
@ -457,68 +458,52 @@ struct VECTOR_COMPARE_UGT_V128
|
|||
: Sequence<VECTOR_COMPARE_UGT_V128,
|
||||
I<OPCODE_VECTOR_COMPARE_UGT, V128Op, V128Op, V128Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
if (i.instr->flags != FLOAT32_TYPE && e.IsFeatureEnabled(kX64EmitXOP)) {
|
||||
Xmm src1 = GetInputRegOrConstant(e, i.src1, e.xmm0);
|
||||
Xmm src2 = GetInputRegOrConstant(e, i.src2, e.xmm1);
|
||||
switch (i.instr->flags) {
|
||||
case INT8_TYPE:
|
||||
e.vpcomub(i.dest, src1, src2, xopcompare_e::GT);
|
||||
break;
|
||||
case INT16_TYPE:
|
||||
e.vpcomuw(i.dest, src1, src2, xopcompare_e::GT);
|
||||
break;
|
||||
case INT32_TYPE:
|
||||
e.vpcomud(i.dest, src1, src2, xopcompare_e::GT);
|
||||
break;
|
||||
}
|
||||
Xbyak::Address sign_addr = e.ptr[e.rax]; // dummy
|
||||
switch (i.instr->flags) {
|
||||
case INT8_TYPE:
|
||||
sign_addr = e.GetXmmConstPtr(XMMSignMaskI8);
|
||||
break;
|
||||
case INT16_TYPE:
|
||||
sign_addr = e.GetXmmConstPtr(XMMSignMaskI16);
|
||||
break;
|
||||
case INT32_TYPE:
|
||||
sign_addr = e.GetXmmConstPtr(XMMSignMaskI32);
|
||||
break;
|
||||
case FLOAT32_TYPE:
|
||||
e.ChangeMxcsrMode(MXCSRMode::Vmx);
|
||||
sign_addr = e.GetXmmConstPtr(XMMSignMaskF32);
|
||||
break;
|
||||
default:
|
||||
assert_always();
|
||||
break;
|
||||
}
|
||||
if (i.src1.is_constant) {
|
||||
// TODO(benvanik): make this constant.
|
||||
e.LoadConstantXmm(e.xmm0, i.src1.constant());
|
||||
e.vpxor(e.xmm0, sign_addr);
|
||||
} else {
|
||||
Xbyak::Address sign_addr = e.ptr[e.rax]; // dummy
|
||||
switch (i.instr->flags) {
|
||||
case INT8_TYPE:
|
||||
sign_addr = e.GetXmmConstPtr(XMMSignMaskI8);
|
||||
break;
|
||||
case INT16_TYPE:
|
||||
sign_addr = e.GetXmmConstPtr(XMMSignMaskI16);
|
||||
break;
|
||||
case INT32_TYPE:
|
||||
sign_addr = e.GetXmmConstPtr(XMMSignMaskI32);
|
||||
break;
|
||||
case FLOAT32_TYPE:
|
||||
e.ChangeMxcsrMode(MXCSRMode::Vmx);
|
||||
sign_addr = e.GetXmmConstPtr(XMMSignMaskF32);
|
||||
break;
|
||||
default:
|
||||
assert_always();
|
||||
break;
|
||||
}
|
||||
if (i.src1.is_constant) {
|
||||
// TODO(benvanik): make this constant.
|
||||
e.LoadConstantXmm(e.xmm0, i.src1.constant());
|
||||
e.vpxor(e.xmm0, sign_addr);
|
||||
} else {
|
||||
e.vpxor(e.xmm0, i.src1, sign_addr);
|
||||
}
|
||||
if (i.src2.is_constant) {
|
||||
// TODO(benvanik): make this constant.
|
||||
e.LoadConstantXmm(e.xmm1, i.src2.constant());
|
||||
e.vpxor(e.xmm1, sign_addr);
|
||||
} else {
|
||||
e.vpxor(e.xmm1, i.src2, sign_addr);
|
||||
}
|
||||
switch (i.instr->flags) {
|
||||
case INT8_TYPE:
|
||||
e.vpcmpgtb(i.dest, e.xmm0, e.xmm1);
|
||||
break;
|
||||
case INT16_TYPE:
|
||||
e.vpcmpgtw(i.dest, e.xmm0, e.xmm1);
|
||||
break;
|
||||
case INT32_TYPE:
|
||||
e.vpcmpgtd(i.dest, e.xmm0, e.xmm1);
|
||||
break;
|
||||
case FLOAT32_TYPE:
|
||||
e.vcmpgtps(i.dest, e.xmm0, e.xmm1);
|
||||
break;
|
||||
}
|
||||
e.vpxor(e.xmm0, i.src1, sign_addr);
|
||||
}
|
||||
if (i.src2.is_constant) {
|
||||
// TODO(benvanik): make this constant.
|
||||
e.LoadConstantXmm(e.xmm1, i.src2.constant());
|
||||
e.vpxor(e.xmm1, sign_addr);
|
||||
} else {
|
||||
e.vpxor(e.xmm1, i.src2, sign_addr);
|
||||
}
|
||||
switch (i.instr->flags) {
|
||||
case INT8_TYPE:
|
||||
e.vpcmpgtb(i.dest, e.xmm0, e.xmm1);
|
||||
break;
|
||||
case INT16_TYPE:
|
||||
e.vpcmpgtw(i.dest, e.xmm0, e.xmm1);
|
||||
break;
|
||||
case INT32_TYPE:
|
||||
e.vpcmpgtd(i.dest, e.xmm0, e.xmm1);
|
||||
break;
|
||||
case FLOAT32_TYPE:
|
||||
e.vcmpgtps(i.dest, e.xmm0, e.xmm1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -634,6 +619,7 @@ struct VECTOR_ADD
|
|||
// overflowed (only need to check one input)
|
||||
// if (src1 > res) then overflowed
|
||||
// http://locklessinc.com/articles/sat_arithmetic/
|
||||
// chrispy: todo - add xop stuff here
|
||||
e.vpxor(e.xmm2, src1, e.GetXmmConstPtr(XMMSignMaskI32));
|
||||
e.vpxor(e.xmm0, e.xmm1, e.GetXmmConstPtr(XMMSignMaskI32));
|
||||
e.vpcmpgtd(e.xmm0, e.xmm2, e.xmm0);
|
||||
|
|
|
@ -781,11 +781,12 @@ struct SELECT_V128_V128
|
|||
} else if (mayblend == PermittedBlend::Ps) {
|
||||
e.vblendvps(i.dest, src2, src3, src1);
|
||||
} else {
|
||||
if (e.IsFeatureEnabled(kX64EmitXOP)) {
|
||||
if (1 && e.IsFeatureEnabled(kX64EmitXOP)) {
|
||||
XELOGCPU("Doing vpcmov!!");
|
||||
e.vpcmov(i.dest, src2, src3, src1);
|
||||
e.vpcmov(i.dest, src3, src2, src1);
|
||||
} else {
|
||||
// src1 ? src2 : src3;
|
||||
|
||||
e.vpandn(e.xmm3, src1, src2);
|
||||
e.vpand(i.dest, src1, src3);
|
||||
e.vpor(i.dest, i.dest, e.xmm3);
|
||||
|
|
|
@ -1023,13 +1023,6 @@ Value* HIRBuilder::Truncate(Value* value, TypeName target_type) {
|
|||
|
||||
Value* HIRBuilder::Convert(Value* value, TypeName target_type,
|
||||
RoundMode round_mode) {
|
||||
if (value->type == target_type) {
|
||||
return value;
|
||||
} else if (value->IsConstant()) {
|
||||
Value* dest = CloneValue(value);
|
||||
dest->Convert(target_type, round_mode);
|
||||
return dest;
|
||||
}
|
||||
|
||||
Instr* i =
|
||||
AppendInstr(OPCODE_CONVERT_info, round_mode, AllocValue(target_type));
|
||||
|
@ -1041,11 +1034,6 @@ Value* HIRBuilder::Convert(Value* value, TypeName target_type,
|
|||
Value* HIRBuilder::Round(Value* value, RoundMode round_mode) {
|
||||
ASSERT_FLOAT_OR_VECTOR_TYPE(value);
|
||||
|
||||
if (value->IsConstant()) {
|
||||
Value* dest = CloneValue(value);
|
||||
dest->Round(round_mode);
|
||||
return dest;
|
||||
}
|
||||
|
||||
Instr* i =
|
||||
AppendInstr(OPCODE_ROUND_info, round_mode, AllocValue(value->type));
|
||||
|
@ -1295,7 +1283,7 @@ void HIRBuilder::SetNJM(Value* value) {
|
|||
Value* HIRBuilder::Max(Value* value1, Value* value2) {
|
||||
ASSERT_TYPES_EQUAL(value1, value2);
|
||||
|
||||
if (value1->type != VEC128_TYPE && value1->IsConstant() &&
|
||||
if (IsScalarIntegralType( value1->type) && value1->IsConstant() &&
|
||||
value2->IsConstant()) {
|
||||
return value1->Compare(OPCODE_COMPARE_SLT, value2) ? value2 : value1;
|
||||
}
|
||||
|
@ -1323,7 +1311,7 @@ Value* HIRBuilder::VectorMax(Value* value1, Value* value2, TypeName part_type,
|
|||
Value* HIRBuilder::Min(Value* value1, Value* value2) {
|
||||
ASSERT_TYPES_EQUAL(value1, value2);
|
||||
|
||||
if (value1->type != VEC128_TYPE && value1->IsConstant() &&
|
||||
if (IsScalarIntegralType(value1->type) && value1->IsConstant() &&
|
||||
value2->IsConstant()) {
|
||||
return value1->Compare(OPCODE_COMPARE_SLT, value2) ? value1 : value2;
|
||||
}
|
||||
|
@ -1351,8 +1339,9 @@ Value* HIRBuilder::VectorMin(Value* value1, Value* value2, TypeName part_type,
|
|||
Value* HIRBuilder::Select(Value* cond, Value* value1, Value* value2) {
|
||||
assert_true(cond->type == INT8_TYPE || cond->type == VEC128_TYPE); // for now
|
||||
ASSERT_TYPES_EQUAL(value1, value2);
|
||||
|
||||
if (cond->IsConstant()) {
|
||||
// chrispy: this was being done with V128, which was breaking stuff obviously
|
||||
// because that should be an element by element select
|
||||
if (cond->IsConstant() && IsScalarIntegralType(cond->type)) {
|
||||
return cond->IsConstantTrue() ? value1 : value2;
|
||||
}
|
||||
|
||||
|
@ -1518,7 +1507,8 @@ Value* HIRBuilder::Add(Value* value1, Value* value2,
|
|||
ASSERT_TYPES_EQUAL(value1, value2);
|
||||
|
||||
// TODO(benvanik): optimize when flags set.
|
||||
if (!arithmetic_flags) {
|
||||
|
||||
if (!arithmetic_flags && IsScalarIntegralType(value1->type)) {
|
||||
if (value1->IsConstantZero()) {
|
||||
return value2;
|
||||
} else if (value2->IsConstantZero()) {
|
||||
|
|
|
@ -442,7 +442,18 @@ int InstrEmit_fabsx(PPCHIRBuilder& f, const InstrData& i) {
|
|||
// frD <- abs(frB)
|
||||
Value* v = f.Abs(f.LoadFPR(i.X.RB));
|
||||
f.StoreFPR(i.X.RT, v);
|
||||
f.UpdateFPSCR(v, i.X.Rc);
|
||||
/*
|
||||
The contents of frB with bit 0 cleared are placed into frD.
|
||||
Note that the fabs instruction treats NaNs just like any other kind of value. That is, the sign
|
||||
bit of a NaN may be altered by fabs. This instruction does not alter the FPSCR.
|
||||
Other registers altered:
|
||||
• Condition Register (CR1 field):
|
||||
Affected: FX, FEX, VX, OX (if Rc = 1)
|
||||
*/
|
||||
// f.UpdateFPSCR(v, i.X.Rc);
|
||||
if (i.X.Rc) {
|
||||
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue