From 9c2cf49755a9df0e56f6c7277a4e3109dc668e5f Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Fri, 29 Aug 2014 21:49:26 -0700 Subject: [PATCH] Tweaking some instructions. --- src/alloy/backend/x64/x64_sequences.cc | 6 ++-- src/alloy/frontend/ppc/ppc_emit_altivec.cc | 40 +++++++++++++--------- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/src/alloy/backend/x64/x64_sequences.cc b/src/alloy/backend/x64/x64_sequences.cc index f992b9949..c11016f03 100644 --- a/src/alloy/backend/x64/x64_sequences.cc +++ b/src/alloy/backend/x64/x64_sequences.cc @@ -3681,7 +3681,7 @@ EMITTER(POW2_F32, MATCH(I, F32<>>)) { static __m128 EmulatePow2(void*, __m128 src) { float src_value; _mm_store_ss(&src_value, src); - float result = std::pow(2.0f, src_value); + float result = std::exp2(src_value); return _mm_load_ss(&result); } static void Emit(X64Emitter& e, const EmitArgType& i) { @@ -3695,7 +3695,7 @@ EMITTER(POW2_F64, MATCH(I, F64<>>)) { static __m128d EmulatePow2(void*, __m128d src) { double src_value; _mm_store_sd(&src_value, src); - double result = std::pow(2, src_value); + double result = std::exp2(src_value); return _mm_load_sd(&result); } static void Emit(X64Emitter& e, const EmitArgType& i) { @@ -3710,7 +3710,7 @@ EMITTER(POW2_V128, MATCH(I, V128<>>)) { alignas(16) float values[4]; _mm_store_ps(values, src); for (size_t i = 0; i < 4; ++i) { - values[i] = std::pow(2.0f, values[i]); + values[i] = std::exp2(values[i]); } return _mm_load_ps(values); } diff --git a/src/alloy/frontend/ppc/ppc_emit_altivec.cc b/src/alloy/frontend/ppc/ppc_emit_altivec.cc index 479c94f79..cfbb58ecd 100644 --- a/src/alloy/frontend/ppc/ppc_emit_altivec.cc +++ b/src/alloy/frontend/ppc/ppc_emit_altivec.cc @@ -489,9 +489,9 @@ XEEMITTER(vavguw, 0x10000482, VX)(PPCHIRBuilder& f, InstrData& i) { int InstrEmit_vcfsx_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb, uint32_t uimm) { // (VD) <- float(VB as signed) / 2^uimm - uimm = uimm ? (2 << (uimm - 1)) : 1; + float fuimm = std::exp2(uimm); Value* v = f.Div(f.VectorConvertI2F(f.LoadVR(vb)), - f.Splat(f.LoadConstant((float)uimm), VEC128_TYPE)); + f.Splat(f.LoadConstant(fuimm), VEC128_TYPE)); f.StoreVR(vd, v); return 0; } @@ -506,9 +506,9 @@ XEEMITTER(vcsxwfp128, VX128_3(6, 688), VX128_3)(PPCHIRBuilder& f, int InstrEmit_vcfux_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb, uint32_t uimm) { // (VD) <- float(VB as unsigned) / 2^uimm - uimm = uimm ? (2 << (uimm - 1)) : 1; + float fuimm = std::exp2(uimm); Value* v = f.Div(f.VectorConvertI2F(f.LoadVR(vb), ARITHMETIC_UNSIGNED), - f.Splat(f.LoadConstant((float)uimm), VEC128_TYPE)); + f.Splat(f.LoadConstant(fuimm), VEC128_TYPE)); f.StoreVR(vd, v); return 0; } @@ -523,9 +523,8 @@ XEEMITTER(vcuxwfp128, VX128_3(6, 752), VX128_3)(PPCHIRBuilder& f, int InstrEmit_vctsxs_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb, uint32_t uimm) { // (VD) <- int_sat(VB as signed * 2^uimm) - uimm = uimm ? (2 << (uimm - 1)) : 1; - Value* v = - f.Mul(f.LoadVR(vb), f.Splat(f.LoadConstant((float)uimm), VEC128_TYPE)); + float fuimm = std::exp2(uimm); + Value* v = f.Mul(f.LoadVR(vb), f.Splat(f.LoadConstant(fuimm), VEC128_TYPE)); v = f.VectorConvertF2I(v, ARITHMETIC_SATURATE); f.StoreVR(vd, v); return 0; @@ -541,9 +540,8 @@ XEEMITTER(vcfpsxws128, VX128_3(6, 560), VX128_3)(PPCHIRBuilder& f, int InstrEmit_vctuxs_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb, uint32_t uimm) { // (VD) <- int_sat(VB as unsigned * 2^uimm) - uimm = uimm ? (2 << (uimm - 1)) : 1; - Value* v = - f.Mul(f.LoadVR(vb), f.Splat(f.LoadConstant((float)uimm), VEC128_TYPE)); + float fuimm = std::exp2(uimm); + Value* v = f.Mul(f.LoadVR(vb), f.Splat(f.LoadConstant(fuimm), VEC128_TYPE)); v = f.VectorConvertF2I(v, ARITHMETIC_UNSIGNED | ARITHMETIC_SATURATE); f.StoreVR(vd, v); return 0; @@ -556,6 +554,8 @@ XEEMITTER(vcfpuxws128, VX128_3(6, 624), VX128_3)(PPCHIRBuilder& f, return InstrEmit_vctuxs_(f, VX128_3_VD128, VX128_3_VB128, VX128_3_IMM); } +// vcmpbfp128 VT, VA, VB VT.u0 = ((VA.x < VB.x) << 31)| ((VA.x > -VB.x) << 30); ...; VT.u0 = ((VA.x > VB.x) << 31)| ((VA.x < -VB.x) << 30); +// vcmpbfp128. VT, VA, VB VT.u0 = ((VA.x < VB.x) << 31)| ((VA.x > -VB.x) << 30); ...; VT.u0 = ((VA.x > VB.x) << 31)| ((VA.x < -VB.x) << 30); CR0:4 = 0; CR0:5 = VT == 0; CR0:6 = CR0:7 = 0; int InstrEmit_vcmpbfp_(PPCHIRBuilder& f, InstrData& i, uint32_t vd, uint32_t va, uint32_t vb, uint32_t rc) { XEINSTRNOTIMPLEMENTED(); @@ -1151,7 +1151,7 @@ XEEMITTER(vpermwi128, VX128_P(6, 528), VX128_P)(PPCHIRBuilder& f, int InstrEmit_vrefp_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb) { // (VD) <- 1/(VB) - vec128_t one = {{{1, 1, 1, 1}}}; + vec128_t one = vec128f(1.0f); Value* v = f.Div(f.LoadConstant(one), f.LoadVR(vb)); f.StoreVR(vd, v); return 0; @@ -1310,8 +1310,8 @@ XEEMITTER(vrsqrtefp128, VX128_3(6, 1648), VX128_3)(PPCHIRBuilder& f, int InstrEmit_vsel_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb, uint32_t vc) { - Value* a = f.LoadVR(va); - Value* v = f.Xor(f.And(f.Xor(a, f.LoadVR(vb)), f.LoadVR(vc)), a); + Value* c = f.LoadVR(vc); + Value* v = f.Or(f.And(f.LoadVR(va), f.Not(c)), f.And(f.LoadVR(vb), c)); f.StoreVR(vd, v); return 0; } @@ -1323,8 +1323,11 @@ XEEMITTER(vsel128, VX128(5, 848), VX128)(PPCHIRBuilder& f, InstrData& i) { } XEEMITTER(vsl, 0x100001C4, VX)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + Value* v = f.Shl(f.LoadVR(i.VX.VA), + f.And(f.Extract(f.LoadVR(i.VX.VB), 15, INT8_TYPE), + f.LoadConstant(int8_t(0x7F)))); + f.StoreVR(i.VX.VD, v); + return 0; } XEEMITTER(vslb, 0x10000104, VX)(PPCHIRBuilder& f, InstrData& i) { @@ -1499,8 +1502,11 @@ XEEMITTER(vspltisw128, VX128_3(6, 1904), VX128_3)(PPCHIRBuilder& f, } XEEMITTER(vsr, 0x100002C4, VX)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + Value* v = f.Shr(f.LoadVR(i.VX.VA), + f.And(f.Extract(f.LoadVR(i.VX.VB), 15, INT8_TYPE), + f.LoadConstant(int8_t(0x7F)))); + f.StoreVR(i.VX.VD, v); + return 0; } XEEMITTER(vsrab, 0x10000304, VX)(PPCHIRBuilder& f, InstrData& i) {