diff --git a/src/alloy/backend/ivm/ivm_intcode.cc b/src/alloy/backend/ivm/ivm_intcode.cc index 000908151..30edbee25 100644 --- a/src/alloy/backend/ivm/ivm_intcode.cc +++ b/src/alloy/backend/ivm/ivm_intcode.cc @@ -1071,6 +1071,15 @@ uint32_t IntCode_VECTOR_CONVERT_I2F(IntCodeState& ics, const IntCode* i) { dest.f4[3] = (float)(int32_t)src1.i4[3]; return IA_NEXT; } +uint32_t IntCode_VECTOR_CONVERT_I2F_U(IntCodeState& ics, const IntCode* i) { + const vec128_t& src1 = ics.rf[i->src1_reg].v128; + vec128_t& dest = ics.rf[i->dest_reg].v128; + dest.f4[0] = (float)(uint32_t)src1.i4[0]; + dest.f4[1] = (float)(uint32_t)src1.i4[1]; + dest.f4[2] = (float)(uint32_t)src1.i4[2]; + dest.f4[3] = (float)(uint32_t)src1.i4[3]; + return IA_NEXT; +} int Translate_VECTOR_CONVERT_I2F(TranslationContext& ctx, Instr* i) { return DispatchToC(ctx, i, IntCode_VECTOR_CONVERT_I2F); } diff --git a/src/alloy/frontend/ppc/ppc_emit_altivec.cc b/src/alloy/frontend/ppc/ppc_emit_altivec.cc index 7ad3cf6cd..01e4080cd 100644 --- a/src/alloy/frontend/ppc/ppc_emit_altivec.cc +++ b/src/alloy/frontend/ppc/ppc_emit_altivec.cc @@ -504,42 +504,58 @@ XEEMITTER(vavguw, 0x10000482, VX )(PPCHIRBuilder& f, InstrData& i) { return 1; } -XEEMITTER(vcfsx, 0x1000034A, VX )(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; -} - -XEEMITTER(vcsxwfp128, VX128_3(6, 688), VX128_3)(PPCHIRBuilder& f, InstrData& i) { - // (VD) <- float(VB) / 2^uimm - uint32_t uimm = VX128_3_IMM; +int InstrEmit_vcfsx_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb, uint32_t uimm) { + // (VD) <- float(VB as signed) / 2^uimm uimm = uimm ? (2 << (uimm - 1)) : 1; Value* v = f.Div( - f.VectorConvertI2F(f.LoadVR(VX128_3_VB128)), + f.VectorConvertI2F(f.LoadVR(vb)), f.Splat(f.LoadConstant((float)uimm), VEC128_TYPE)); - f.StoreVR(VX128_3_VD128, v); + f.StoreVR(vd, v); return 0; } +XEEMITTER(vcfsx, 0x1000034A, VX )(PPCHIRBuilder& f, InstrData& i) { + return InstrEmit_vcfsx_(f, i.VX.VD, i.VX.VB, i.VX.VA); +} +XEEMITTER(vcsxwfp128, VX128_3(6, 688), VX128_3)(PPCHIRBuilder& f, InstrData& i) { + return InstrEmit_vcfsx_(f, VX128_3_VD128, VX128_3_VB128, VX128_3_IMM); +} + +int InstrEmit_vcfux_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb, uint32_t uimm) { + // (VD) <- float(VB as unsigned) / 2^uimm + uimm = uimm ? (2 << (uimm - 1)) : 1; + Value* v = f.Div( + f.VectorConvertI2F(f.LoadVR(vb), ARITHMETIC_UNSIGNED), + f.Splat(f.LoadConstant((float)uimm), VEC128_TYPE)); + f.StoreVR(vd, v); + return 0; +} +XEEMITTER(vcfux, 0x1000030A, VX )(PPCHIRBuilder& f, InstrData& i) { + return InstrEmit_vcfux_(f, i.VX.VD, i.VX.VB, i.VX.VA); +} +XEEMITTER(vcuxwfp128, VX128_3(6, 752), VX128_3)(PPCHIRBuilder& f, InstrData& i) { + return InstrEmit_vcfux_(f, VX128_3_VD128, VX128_3_VB128, VX128_3_IMM); +} XEEMITTER(vcfpsxws128, VX128_3(6, 560), VX128_3)(PPCHIRBuilder& f, InstrData& i) { XEINSTRNOTIMPLEMENTED(); return 1; } -XEEMITTER(vcfux, 0x1000030A, VX )(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; -} - -XEEMITTER(vcuxwfp128, VX128_3(6, 752), VX128_3)(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; -} - XEEMITTER(vcfpuxws128, VX128_3(6, 624), VX128_3)(PPCHIRBuilder& f, InstrData& i) { XEINSTRNOTIMPLEMENTED(); return 1; } +XEEMITTER(vctsxs, 0x100003CA, VX )(PPCHIRBuilder& f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(vctuxs, 0x1000038A, VX )(PPCHIRBuilder& f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + int InstrEmit_vcmpbfp_(PPCHIRBuilder& f, InstrData& i, uint32_t vd, uint32_t va, uint32_t vb, uint32_t rc) { XEINSTRNOTIMPLEMENTED(); return 1; @@ -693,16 +709,6 @@ XEEMITTER(vcmpgtuw, 0x10000286, VXR )(PPCHIRBuilder& f, InstrData& i) { return InstrEmit_vcmpxxi_(f, i, vcmpxxi_gt_unsigned, 4, i.VXR.VD, i.VXR.VA, i.VXR.VB, i.VXR.Rc); } -XEEMITTER(vctsxs, 0x100003CA, VX )(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; -} - -XEEMITTER(vctuxs, 0x1000038A, VX )(PPCHIRBuilder& f, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; -} - int InstrEmit_vexptefp_(PPCHIRBuilder& f, uint32_t vd, uint32_t vb) { // (VD) <- pow2(VB) Value* v = f.Pow2(f.LoadVR(vb)); diff --git a/src/alloy/frontend/ppc/ppc_emit_alu.cc b/src/alloy/frontend/ppc/ppc_emit_alu.cc index ed9fbdf38..7c7378f6a 100644 --- a/src/alloy/frontend/ppc/ppc_emit_alu.cc +++ b/src/alloy/frontend/ppc/ppc_emit_alu.cc @@ -1115,57 +1115,46 @@ XEEMITTER(srwx, 0x7C000430, X )(PPCHIRBuilder& f, InstrData& i) { return 0; } -// XEEMITTER(sradx, 0x7C000634, X )(PPCHIRBuilder& f, InstrData& i) { -// // n <- rB[58-63] -// // r <- ROTL[64](rS, 64 - n) -// // if rB[57] = 0 then m ← MASK(n, 63) -// // else m ← (64)0 -// // S ← rS[0] -// // rA <- (r & m) | (((64)S) & ¬ m) -// // XER[CA] <- S & ((r & ¬ m) ¦ 0) +XEEMITTER(sradx, 0x7C000634, X )(PPCHIRBuilder& f, InstrData& i) { + // n <- rB[58-63] + // r <- ROTL[64](rS, 64 - n) + // if rB[57] = 0 then m ← MASK(n, 63) + // else m ← (64)0 + // S ← rS[0] + // rA <- (r & m) | (((64)S) & ¬ m) + // XER[CA] <- S & ((r & ¬ m) ¦ 0) -// // if n == 0: rA <- rS, XER[CA] = 0 -// // if n >= 64: rA <- 64 sign bits of rS, XER[CA] = sign bit of rS + // if n == 0: rA <- rS, XER[CA] = 0 + // if n >= 64: rA <- 64 sign bits of rS, XER[CA] = sign bit of rS -// GpVar v(c.newGpVar()); -// c.mov(v, f.LoadGPR(i.X.RT)); -// GpVar sh(c.newGpVar()); -// c.mov(sh, f.LoadGPR(i.X.RB)); -// c.and_(sh, imm(0x7F)); + Value* v = f.LoadGPR(i.X.RT); + Value* sh = f.And(f.Truncate(f.LoadGPR(i.X.RB), INT8_TYPE), + f.LoadConstant((int8_t)0x7F)); -// // CA is set if any bits are shifted out of the right and if the result -// // is negative. Start tracking that here. -// GpVar ca(c.newGpVar()); -// c.mov(ca, imm(0xFFFFFFFFFFFFFFFF)); -// GpVar ca_sh(c.newGpVar()); -// c.mov(ca_sh, imm(63)); -// c.sub(ca_sh, sh); -// c.shl(ca, ca_sh); -// c.shr(ca, ca_sh); -// c.and_(ca, v); -// c.cmp(ca, imm(0)); -// c.xor_(ca, ca); -// c.setnz(ca.r8()); + // CA is set if any bits are shifted out of the right and if the result + // is negative. Start tracking that here. + // TODO(benvanik): dynamically generate mask better than this. + Value* ca_sh = f.Sub(f.LoadConstant((int8_t)63), sh); + Value* ca = + f.Shr(f.Shl(f.LoadConstant(0xFFFFFFFFFFFFFFFFull), ca_sh), ca_sh); + ca = f.CompareNE(f.And(ca, v), f.LoadZero(INT64_TYPE)); -// // Shift right. -// c.sar(v, sh); + // Shift right. + v = f.Sha(v, sh); -// // CA is set to 1 if the low-order 32 bits of (RS) contain a negative number -// // and any 1-bits are shifted out of position 63; otherwise CA is set to 0. -// // We already have ca set to indicate the pos 63 bit, now just and in sign. -// GpVar ca_2(c.newGpVar()); -// c.mov(ca_2, v); -// c.shr(ca_2, imm(63)); -// c.and_(ca, ca_2); + // CA is set to 1 if the low-order 32 bits of (RS) contain a negative number + // and any 1-bits are shifted out of position 63; otherwise CA is set to 0. + // We already have ca set to indicate the pos 63 bit, now just and in sign. + ca = f.And(ca, f.Shr(v, 63)); -// f.StoreGPR(i.X.RA, v); -// e.update_xer_with_carry(ca); + f.StoreCA(ca); + f.StoreGPR(i.X.RA, v); -// if (i.X.Rc) { -// f.UpdateCR(0, v); -// } -// return 0; -// } + if (i.X.Rc) { + f.UpdateCR(0, v); + } + return 0; +} XEEMITTER(sradix, 0x7C000674, XS )(PPCHIRBuilder& f, InstrData& i) { // n <- sh[5] || sh[0-4] @@ -1189,7 +1178,7 @@ XEEMITTER(sradix, 0x7C000674, XS )(PPCHIRBuilder& f, InstrData& i) { f.StoreCA(ca); v = f.Sha(v, sh); - if (i.X.Rc) { + if (i.XS.Rc) { f.UpdateCR(0, v); } f.StoreGPR(i.XS.RA, v); @@ -1325,7 +1314,7 @@ void RegisterEmitCategoryALU() { XEREGISTERINSTR(slwx, 0x7C000030); XEREGISTERINSTR(srdx, 0x7C000436); XEREGISTERINSTR(srwx, 0x7C000430); - // XEREGISTERINSTR(sradx, 0x7C000634); + XEREGISTERINSTR(sradx, 0x7C000634); XEREGISTERINSTR(sradix, 0x7C000674); XEREGISTERINSTR(srawx, 0x7C000630); XEREGISTERINSTR(srawix, 0x7C000670); diff --git a/src/alloy/hir/hir_builder.cc b/src/alloy/hir/hir_builder.cc index c08f96a72..198e6502c 100644 --- a/src/alloy/hir/hir_builder.cc +++ b/src/alloy/hir/hir_builder.cc @@ -733,11 +733,11 @@ Value* HIRBuilder::Round(Value* value, RoundMode round_mode) { return i->dest; } -Value* HIRBuilder::VectorConvertI2F(Value* value) { +Value* HIRBuilder::VectorConvertI2F(Value* value, uint32_t arithmetic_flags) { ASSERT_VECTOR_TYPE(value); Instr* i = AppendInstr( - OPCODE_VECTOR_CONVERT_I2F_info, 0, + OPCODE_VECTOR_CONVERT_I2F_info, arithmetic_flags, AllocValue(value->type)); i->set_src1(value); i->src2.value = i->src3.value = NULL; diff --git a/src/alloy/hir/hir_builder.h b/src/alloy/hir/hir_builder.h index 78f156dfa..c4dcd86bc 100644 --- a/src/alloy/hir/hir_builder.h +++ b/src/alloy/hir/hir_builder.h @@ -98,7 +98,7 @@ public: // TODO(benvanik): make this cleaner -- not happy with it. // It'd be nice if Convert() supported this, however then we'd need a // VEC128_INT32_TYPE or something. - Value* VectorConvertI2F(Value* value); + Value* VectorConvertI2F(Value* value, uint32_t arithmetic_flags = 0); Value* VectorConvertF2I(Value* value, RoundMode round_mode = ROUND_TO_ZERO); Value* LoadZero(TypeName type);