Merge pull request #453 from DrChat/instr_fixes

Fix vcmpbfp having incorrect behavior when the value is equal to the bounds
This commit is contained in:
Ben Vanik 2015-11-07 12:28:44 -08:00
commit 563552f62f
2 changed files with 42 additions and 6 deletions

View File

@ -597,20 +597,20 @@ int InstrEmit_vcmpbfp_(PPCHIRBuilder& f, InstrData& i, uint32_t vd, uint32_t va,
// if vA or vB are NaN, the 2 high-order bits are set (0xC0000000) // if vA or vB are NaN, the 2 high-order bits are set (0xC0000000)
Value* va_value = f.LoadVR(va); Value* va_value = f.LoadVR(va);
Value* vb_value = f.LoadVR(vb); Value* vb_value = f.LoadVR(vb);
Value* ge = f.VectorCompareSGE(va_value, vb_value, FLOAT32_TYPE); Value* gt = f.VectorCompareSGT(va_value, vb_value, FLOAT32_TYPE);
Value* le = Value* lt =
f.Not(f.VectorCompareSGT(va_value, f.Neg(vb_value), FLOAT32_TYPE)); f.Not(f.VectorCompareSGE(va_value, f.Neg(vb_value), FLOAT32_TYPE));
Value* v = Value* v =
f.Or(f.And(ge, f.LoadConstantVec128(vec128i(0x80000000, 0x80000000, f.Or(f.And(gt, f.LoadConstantVec128(vec128i(0x80000000, 0x80000000,
0x80000000, 0x80000000))), 0x80000000, 0x80000000))),
f.And(le, f.LoadConstantVec128(vec128i(0x40000000, 0x40000000, f.And(lt, f.LoadConstantVec128(vec128i(0x40000000, 0x40000000,
0x40000000, 0x40000000)))); 0x40000000, 0x40000000))));
f.StoreVR(vd, v); f.StoreVR(vd, v);
if (rc) { if (rc) {
// CR0:4 = 0; CR0:5 = VT == 0; CR0:6 = CR0:7 = 0; // CR0:4 = 0; CR0:5 = VT == 0; CR0:6 = CR0:7 = 0;
// If all of the elements are within bounds, CR6[2] is set // If all of the elements are within bounds, CR6[2] is set
// FIXME: Does not affect CR6[0], but the following function does. // FIXME: Does not affect CR6[0], but the following function does.
f.UpdateCR6(f.Or(ge, le)); f.UpdateCR6(f.Or(gt, lt));
} }
return 0; return 0;
} }

View File

@ -30,3 +30,39 @@ test_vcmpxxfp_3:
#_ REGISTER_OUT v4 [3f800000, 3f800000, 3f800000, 3f800000] #_ REGISTER_OUT v4 [3f800000, 3f800000, 3f800000, 3f800000]
#_ REGISTER_OUT v5 [3f800001, 3f800001, 3f800001, 3f800001] #_ REGISTER_OUT v5 [3f800001, 3f800001, 3f800001, 3f800001]
#_ REGISTER_OUT r3 0x00000020 #_ REGISTER_OUT r3 0x00000020
test_vcmpbfp_1:
# [5.0, 5.0, 5.0, 5.0]
#_ REGISTER_IN v4 [40A00000, 40A00000, 40A00000, 40A00000]
#_ REGISTER_IN v5 [40A00000, 40A00000, 40A00000, 40A00000]
vcmpbfp. v3, v4, v5
mfocrf r3, 2 # cr6
blr
#_ REGISTER_OUT v4 [40A00000, 40A00000, 40A00000, 40A00000]
#_ REGISTER_OUT v5 [40A00000, 40A00000, 40A00000, 40A00000]
#_ REGISTER_OUT v3 [00000000, 00000000, 00000000, 00000000]
#_ REGISTER_OUT r3 0x00000020
test_vcmpbfp_2:
# [-5.0, -5.0, -5.0, -5.0]
#_ REGISTER_IN v4 [C0A00000, C0A00000, C0A00000, C0A00000]
#_ REGISTER_IN v5 [40A00000, 40A00000, 40A00000, 40A00000]
vcmpbfp. v3, v4, v5
mfocrf r3, 2 # cr6
blr
#_ REGISTER_OUT v4 [C0A00000, C0A00000, C0A00000, C0A00000]
#_ REGISTER_OUT v5 [40A00000, 40A00000, 40A00000, 40A00000]
#_ REGISTER_OUT v3 [00000000, 00000000, 00000000, 00000000]
#_ REGISTER_OUT r3 0x00000020
test_vcmpbfp_3:
# [7.0, -7.0, 5.0, 5.0]
#_ REGISTER_IN v4 [40E00000, C0E00000, 40A00000, 40A00000]
#_ REGISTER_IN v5 [40A00000, 40A00000, 40A00000, 40A00000]
vcmpbfp. v3, v4, v5
mfocrf r3, 2 # cr6
blr
#_ REGISTER_OUT v4 [40E00000, C0E00000, 40A00000, 40A00000]
#_ REGISTER_OUT v5 [40A00000, 40A00000, 40A00000, 40A00000]
#_ REGISTER_OUT v3 [80000000, 40000000, 00000000, 00000000]
#_ REGISTER_OUT r3 0x00000000