diff --git a/src/alloy/backend/ivm/ivm_intcode.cc b/src/alloy/backend/ivm/ivm_intcode.cc index b0aef6bdd..29682a2a9 100644 --- a/src/alloy/backend/ivm/ivm_intcode.cc +++ b/src/alloy/backend/ivm/ivm_intcode.cc @@ -1311,6 +1311,36 @@ int Translate_LOAD(TranslationContext& ctx, Instr* i) { return DispatchToC(ctx, i, fns[i->dest->type]); } +uint32_t IntCode_LOAD_VECTOR_LEFT_V128(IntCodeState& ics, const IntCode* i) { + const uint32_t address = ics.rf[i->src1_reg].u32; + const size_t eb = address & 0xF; + const size_t size = 16 - eb; + const uint8_t* p = ics.membase + address; + vec128_t& dest = ics.rf[i->dest_reg].v128; + for (size_t i = 0; i < size; i++) { + dest.b16[15 - i] = p[i]; + } + return IA_NEXT; +} +int Translate_LOAD_VECTOR_LEFT(TranslationContext& ctx, Instr* i) { + return DispatchToC(ctx, i, IntCode_LOAD_VECTOR_LEFT_V128); +} + +uint32_t IntCode_LOAD_VECTOR_RIGHT_V128(IntCodeState& ics, const IntCode* i) { + const uint32_t address = ics.rf[i->src1_reg].u32; + const size_t eb = address & 0xF; + const size_t size = eb; + const uint8_t* p = ics.membase + address; + vec128_t& dest = ics.rf[i->dest_reg].v128; + for (size_t i = 0; i < size; i++) { + dest.b16[i] = p[size - 1 - i]; + } + return IA_NEXT; +} +int Translate_LOAD_VECTOR_RIGHT(TranslationContext& ctx, Instr* i) { + return DispatchToC(ctx, i, IntCode_LOAD_VECTOR_RIGHT_V128); +} + uint32_t IntCode_LOAD_ACQUIRE_I8(IntCodeState& ics, const IntCode* i) { uint32_t address = ics.rf[i->src1_reg].u32; xe_atomic_exchange_32(address, ics.reserve_address); @@ -1534,6 +1564,38 @@ int Translate_STORE_RELEASE(TranslationContext& ctx, Instr* i) { return DispatchToC(ctx, i, fns[i->src2.value->type]); } +uint32_t IntCode_STORE_VECTOR_LEFT_V128(IntCodeState& ics, const IntCode* i) { + const uint32_t address = ics.rf[i->src1_reg].u32; + const size_t eb = address & 0xF; + const size_t size = 16 - eb; + uint8_t* p = ics.membase + address; + const vec128_t& src = ics.rf[i->src2_reg].v128; + // Note that if the input is already 16b aligned no bytes are stored. + for (size_t i = 0; i < size; i++) { + p[i] = src.b16[15 - i]; + } + return IA_NEXT; +} +int Translate_STORE_VECTOR_LEFT(TranslationContext& ctx, Instr* i) { + return DispatchToC(ctx, i, IntCode_STORE_VECTOR_LEFT_V128); +} + +uint32_t IntCode_STORE_VECTOR_RIGHT_V128(IntCodeState& ics, const IntCode* i) { + const uint32_t address = ics.rf[i->src1_reg].u32; + const size_t eb = address & 0xF; + const size_t size = eb; + uint8_t* p = ics.membase + (address & ~0xF); + const vec128_t& src = ics.rf[i->src2_reg].v128; + // Note that if the input is already 16b aligned no bytes are stored. + for (size_t i = 0; i < size; i++) { + p[size - 1 - i] = src.b16[i]; + } + return IA_NEXT; +} +int Translate_STORE_VECTOR_RIGHT(TranslationContext& ctx, Instr* i) { + return DispatchToC(ctx, i, IntCode_STORE_VECTOR_RIGHT_V128); +} + uint32_t IntCode_PREFETCH(IntCodeState& ics, const IntCode* i) { return IA_NEXT; } @@ -3072,8 +3134,12 @@ static const TranslateFn dispatch_table[] = { Translate_LOAD, Translate_LOAD_ACQUIRE, + Translate_LOAD_VECTOR_LEFT, + Translate_LOAD_VECTOR_RIGHT, Translate_STORE, Translate_STORE_RELEASE, + Translate_STORE_VECTOR_LEFT, + Translate_STORE_VECTOR_RIGHT, Translate_PREFETCH, TranslateInvalid, //Translate_MAX, diff --git a/src/alloy/frontend/ppc/ppc_emit_altivec.cc b/src/alloy/frontend/ppc/ppc_emit_altivec.cc index 77218928b..3982030c3 100644 --- a/src/alloy/frontend/ppc/ppc_emit_altivec.cc +++ b/src/alloy/frontend/ppc/ppc_emit_altivec.cc @@ -140,12 +140,7 @@ XEEMITTER(lvewx128, VX128_1(4, 131), VX128_1)(PPCFunctionBuilder& f, Inst } int InstrEmit_lvsl_(PPCFunctionBuilder& f, InstrData& i, uint32_t vd, uint32_t ra, uint32_t rb) { - Value* ea; - if (ra) { - ea = f.Add(f.LoadGPR(ra), f.LoadGPR(rb)); - } else { - ea = f.LoadGPR(rb); - } + Value* ea = ra ? f.Add(f.LoadGPR(ra), f.LoadGPR(rb)) : f.LoadGPR(rb); Value* sh = f.Truncate(f.And(ea, f.LoadConstant((int64_t)0xF)), INT8_TYPE); Value* v = f.LoadVectorShl(sh); f.StoreVR(vd, v); @@ -159,12 +154,7 @@ XEEMITTER(lvsl128, VX128_1(4, 3), VX128_1)(PPCFunctionBuilder& f, Inst } int InstrEmit_lvsr_(PPCFunctionBuilder& f, InstrData& i, uint32_t vd, uint32_t ra, uint32_t rb) { - Value* ea; - if (ra) { - ea = f.Add(f.LoadGPR(ra), f.LoadGPR(rb)); - } else { - ea = f.LoadGPR(rb); - } + Value* ea = ra ? f.Add(f.LoadGPR(ra), f.LoadGPR(rb)) : f.LoadGPR(rb); Value* sh = f.Truncate(f.And(ea, f.LoadConstant((int64_t)0xF)), INT8_TYPE); Value* v = f.LoadVectorShr(sh); f.StoreVR(vd, v); @@ -234,185 +224,83 @@ XEEMITTER(stvxl128, VX128_1(4, 963), VX128_1)(PPCFunctionBuilder& f, Inst return InstrEmit_stvx128(f, i); } -// // The lvlx/lvrx/etc instructions are in Cell docs only: -// // https://www-01.ibm.com/chips/techlib/techlib.nsf/techdocs/C40E4C6133B31EE8872570B500791108/$file/vector_simd_pem_v_2.07c_26Oct2006_cell.pdf -// int InstrEmit_lvlx_(PPCFunctionBuilder& f, InstrData& i, uint32_t vd, uint32_t ra, uint32_t rb) { -// GpVar ea(c.newGpVar()); -// c.mov(ea, e.gpr_value(rb)); -// if (ra) { -// c.add(ea, e.gpr_value(ra)); -// } -// GpVar sh(c.newGpVar()); -// c.mov(sh, ea); -// c.and_(sh, imm(0xF)); -// XmmVar v = e.ReadMemoryXmm(i.address, ea, 4); -// // If fully aligned skip complex work. -// Label done(c.newLabel()); -// c.test(sh, sh); -// c.jz(done); -// { -// // Shift left by the number of bytes offset and fill with zeros. -// // We reuse the lvsl table here, as it does that for us. -// GpVar gt(c.newGpVar()); -// c.xor_(gt, gt); -// c.pinsrb(v, gt.r8(), imm(15)); -// c.shl(sh, imm(4)); // table offset = (16b * sh) -// c.mov(gt, imm((sysint_t)__shift_table_left)); -// c.pshufb(v, xmmword_ptr(gt, sh)); -// } -// c.bind(done); -// c.shufps(v, v, imm(SHUFPS_SWAP_DWORDS)); -// f.StoreVR(vd, v); -// e.TraceVR(vd); -// return 0; -// } -// XEEMITTER(lvlx, 0x7C00040E, X )(PPCFunctionBuilder& f, InstrData& i) { -// return InstrEmit_lvlx_(f, i, i.X.RT, i.X.RA, i.X.RB); -// } -// XEEMITTER(lvlx128, VX128_1(4, 1027), VX128_1)(PPCFunctionBuilder& f, InstrData& i) { -// return InstrEmit_lvlx_(f, i, VX128_1_VD128, i.VX128_1.RA, i.VX128_1.RB); -// } -// XEEMITTER(lvlxl, 0x7C00060E, X )(PPCFunctionBuilder& f, InstrData& i) { -// return InstrEmit_lvlx(f, i); -// } -// XEEMITTER(lvlxl128, VX128_1(4, 1539), VX128_1)(PPCFunctionBuilder& f, InstrData& i) { -// return InstrEmit_lvlx128(f, i); -// } +// The lvlx/lvrx/etc instructions are in Cell docs only: +// https://www-01.ibm.com/chips/techlib/techlib.nsf/techdocs/C40E4C6133B31EE8872570B500791108/$file/vector_simd_pem_v_2.07c_26Oct2006_cell.pdf +int InstrEmit_lvlx_(PPCFunctionBuilder& f, InstrData& i, uint32_t vd, uint32_t ra, uint32_t rb) { + Value* ea = ra ? f.Add(f.LoadGPR(ra), f.LoadGPR(rb)) : f.LoadGPR(rb); + Value* v = f.LoadVectorLeft(ea, VEC128_TYPE); + f.StoreVR(vd, v); + return 0; +} +XEEMITTER(lvlx, 0x7C00040E, X )(PPCFunctionBuilder& f, InstrData& i) { + return InstrEmit_lvlx_(f, i, i.X.RT, i.X.RA, i.X.RB); +} +XEEMITTER(lvlx128, VX128_1(4, 1027), VX128_1)(PPCFunctionBuilder& f, InstrData& i) { + return InstrEmit_lvlx_(f, i, VX128_1_VD128, i.VX128_1.RA, i.VX128_1.RB); +} +XEEMITTER(lvlxl, 0x7C00060E, X )(PPCFunctionBuilder& f, InstrData& i) { + return InstrEmit_lvlx(f, i); +} +XEEMITTER(lvlxl128, VX128_1(4, 1539), VX128_1)(PPCFunctionBuilder& f, InstrData& i) { + return InstrEmit_lvlx128(f, i); +} -// int InstrEmit_lvrx_(PPCFunctionBuilder& f, InstrData& i, uint32_t vd, uint32_t ra, uint32_t rb) { -// GpVar ea(c.newGpVar()); -// c.mov(ea, e.gpr_value(rb)); -// if (ra) { -// c.add(ea, e.gpr_value(ra)); -// } -// GpVar sh(c.newGpVar()); -// c.mov(sh, ea); -// c.and_(sh, imm(0xF)); -// // If fully aligned skip complex work. -// XmmVar v(c.newXmmVar()); -// c.pxor(v, v); -// Label done(c.newLabel()); -// c.test(sh, sh); -// c.jz(done); -// { -// // Shift left by the number of bytes offset and fill with zeros. -// // We reuse the lvsl table here, as it does that for us. -// c.movaps(v, e.ReadMemoryXmm(i.address, ea, 4)); -// GpVar gt(c.newGpVar()); -// c.xor_(gt, gt); -// c.pinsrb(v, gt.r8(), imm(0)); -// c.shl(sh, imm(4)); // table offset = (16b * sh) -// c.mov(gt, imm((sysint_t)__shift_table_right)); -// c.pshufb(v, xmmword_ptr(gt, sh)); -// c.shufps(v, v, imm(SHUFPS_SWAP_DWORDS)); -// } -// c.bind(done); -// f.StoreVR(vd, v); -// e.TraceVR(vd); -// return 0; -// } -// XEEMITTER(lvrx, 0x7C00044E, X )(PPCFunctionBuilder& f, InstrData& i) { -// return InstrEmit_lvrx_(f, i, i.X.RT, i.X.RA, i.X.RB); -// } -// XEEMITTER(lvrx128, VX128_1(4, 1091), VX128_1)(PPCFunctionBuilder& f, InstrData& i) { -// return InstrEmit_lvrx_(f, i, VX128_1_VD128, i.VX128_1.RA, i.VX128_1.RB); -// } -// XEEMITTER(lvrxl, 0x7C00064E, X )(PPCFunctionBuilder& f, InstrData& i) { -// return InstrEmit_lvrx(f, i); -// } -// XEEMITTER(lvrxl128, VX128_1(4, 1603), VX128_1)(PPCFunctionBuilder& f, InstrData& i) { -// return InstrEmit_lvrx128(f, i); -// } +int InstrEmit_lvrx_(PPCFunctionBuilder& f, InstrData& i, uint32_t vd, uint32_t ra, uint32_t rb) { + Value* ea = ra ? f.Add(f.LoadGPR(ra), f.LoadGPR(rb)) : f.LoadGPR(rb); + Value* v = f.LoadVectorRight(ea, VEC128_TYPE); + f.StoreVR(vd, v); + return 0; +} +XEEMITTER(lvrx, 0x7C00044E, X )(PPCFunctionBuilder& f, InstrData& i) { + return InstrEmit_lvrx_(f, i, i.X.RT, i.X.RA, i.X.RB); +} +XEEMITTER(lvrx128, VX128_1(4, 1091), VX128_1)(PPCFunctionBuilder& f, InstrData& i) { + return InstrEmit_lvrx_(f, i, VX128_1_VD128, i.VX128_1.RA, i.VX128_1.RB); +} +XEEMITTER(lvrxl, 0x7C00064E, X )(PPCFunctionBuilder& f, InstrData& i) { + return InstrEmit_lvrx(f, i); +} +XEEMITTER(lvrxl128, VX128_1(4, 1603), VX128_1)(PPCFunctionBuilder& f, InstrData& i) { + return InstrEmit_lvrx128(f, i); +} -// // TODO(benvanik): implement for real - this is in the memcpy path. -// static void __emulated_stvlx(uint64_t addr, __m128i vd) { -// // addr here is the fully translated address. -// const uint8_t eb = addr & 0xF; -// const size_t size = 16 - eb; -// uint8_t* p = (uint8_t*)addr; -// for (size_t i = 0; i < size; i++) { -// p[i] = vd.m128i_u8[15 - i]; -// } -// } -// int InstrEmit_stvlx_(PPCFunctionBuilder& f, InstrData& i, uint32_t vd, uint32_t ra, uint32_t rb) { -// GpVar ea(c.newGpVar()); -// c.mov(ea, e.gpr_value(rb)); -// if (ra) { -// c.add(ea, e.gpr_value(ra)); -// } -// ea = e.TouchMemoryAddress(i.address, ea); -// XmmVar tvd(c.newXmmVar()); -// c.movaps(tvd, f.LoadVR(vd)); -// c.shufps(tvd, tvd, imm(SHUFPS_SWAP_DWORDS)); -// c.save(tvd); -// GpVar pvd(c.newGpVar()); -// c.lea(pvd, tvd.m128()); -// X86CompilerFuncCall* call = c.call(__emulated_stvlx); -// uint32_t args[] = {kX86VarTypeGpq, kX86VarTypeGpq}; -// call->setPrototype(kX86FuncConvDefault, kX86VarTypeGpq, args, XECOUNT(args)); -// call->setArgument(0, ea); -// call->setArgument(1, pvd); -// e.TraceVR(vd); -// return 0; -// } -// XEEMITTER(stvlx, 0x7C00050E, X )(PPCFunctionBuilder& f, InstrData& i) { -// return InstrEmit_stvlx_(f, i, i.X.RT, i.X.RA, i.X.RB); -// } -// XEEMITTER(stvlx128, VX128_1(4, 1283), VX128_1)(PPCFunctionBuilder& f, InstrData& i) { -// return InstrEmit_stvlx_(f, i, VX128_1_VD128, i.VX128_1.RA, i.VX128_1.RB); -// } -// XEEMITTER(stvlxl, 0x7C00070E, X )(PPCFunctionBuilder& f, InstrData& i) { -// return InstrEmit_stvlx(f, i); -// } -// XEEMITTER(stvlxl128, VX128_1(4, 1795), VX128_1)(PPCFunctionBuilder& f, InstrData& i) { -// return InstrEmit_stvlx128(f, i); -// } +int InstrEmit_stvlx_(PPCFunctionBuilder& f, InstrData& i, uint32_t vd, uint32_t ra, uint32_t rb) { + Value* ea = ra ? f.Add(f.LoadGPR(ra), f.LoadGPR(rb)) : f.LoadGPR(rb); + Value* v = f.LoadVR(vd); + f.StoreVectorLeft(ea, v); + return 0; +} +XEEMITTER(stvlx, 0x7C00050E, X )(PPCFunctionBuilder& f, InstrData& i) { + return InstrEmit_stvlx_(f, i, i.X.RT, i.X.RA, i.X.RB); +} +XEEMITTER(stvlx128, VX128_1(4, 1283), VX128_1)(PPCFunctionBuilder& f, InstrData& i) { + return InstrEmit_stvlx_(f, i, VX128_1_VD128, i.VX128_1.RA, i.VX128_1.RB); +} +XEEMITTER(stvlxl, 0x7C00070E, X )(PPCFunctionBuilder& f, InstrData& i) { + return InstrEmit_stvlx(f, i); +} +XEEMITTER(stvlxl128, VX128_1(4, 1795), VX128_1)(PPCFunctionBuilder& f, InstrData& i) { + return InstrEmit_stvlx128(f, i); +} -// // TODO(benvanik): implement for real - this is in the memcpy path. -// static void __emulated_stvrx(uint64_t addr, __m128i vd) { -// // addr here is the fully translated address. -// const uint8_t eb = addr & 0xF; -// const size_t size = eb; -// addr &= ~0xF; -// uint8_t* p = (uint8_t*)addr; -// // Note that if the input is already 16b aligned no bytes are stored. -// for (size_t i = 0; i < size; i++) { -// p[size - 1 - i] = vd.m128i_u8[i]; -// } -// } -// int InstrEmit_stvrx_(PPCFunctionBuilder& f, InstrData& i, uint32_t vd, uint32_t ra, uint32_t rb) { -// GpVar ea(c.newGpVar()); -// c.mov(ea, e.gpr_value(rb)); -// if (ra) { -// c.add(ea, e.gpr_value(ra)); -// } -// ea = e.TouchMemoryAddress(i.address, ea); -// XmmVar tvd(c.newXmmVar()); -// c.movaps(tvd, f.LoadVR(vd)); -// c.shufps(tvd, tvd, imm(SHUFPS_SWAP_DWORDS)); -// c.save(tvd); -// GpVar pvd(c.newGpVar()); -// c.lea(pvd, tvd.m128()); -// X86CompilerFuncCall* call = c.call(__emulated_stvrx); -// uint32_t args[] = {kX86VarTypeGpq, kX86VarTypeGpq}; -// call->setPrototype(kX86FuncConvDefault, kX86VarTypeGpq, args, XECOUNT(args)); -// call->setArgument(0, ea); -// call->setArgument(1, pvd); -// e.TraceVR(vd); -// return 0; -// } -// XEEMITTER(stvrx, 0x7C00054E, X )(PPCFunctionBuilder& f, InstrData& i) { -// return InstrEmit_stvrx_(f, i, i.X.RT, i.X.RA, i.X.RB); -// } -// XEEMITTER(stvrx128, VX128_1(4, 1347), VX128_1)(PPCFunctionBuilder& f, InstrData& i) { -// return InstrEmit_stvrx_(f, i, VX128_1_VD128, i.VX128_1.RA, i.VX128_1.RB); -// } -// XEEMITTER(stvrxl, 0x7C00074E, X )(PPCFunctionBuilder& f, InstrData& i) { -// return InstrEmit_stvrx(f, i); -// } -// XEEMITTER(stvrxl128, VX128_1(4, 1859), VX128_1)(PPCFunctionBuilder& f, InstrData& i) { -// return InstrEmit_stvrx128(f, i); -// } +int InstrEmit_stvrx_(PPCFunctionBuilder& f, InstrData& i, uint32_t vd, uint32_t ra, uint32_t rb) { + Value* ea = ra ? f.Add(f.LoadGPR(ra), f.LoadGPR(rb)) : f.LoadGPR(rb); + Value* v = f.LoadVR(vd); + f.StoreVectorRight(ea, v); + return 0; +} +XEEMITTER(stvrx, 0x7C00054E, X )(PPCFunctionBuilder& f, InstrData& i) { + return InstrEmit_stvrx_(f, i, i.X.RT, i.X.RA, i.X.RB); +} +XEEMITTER(stvrx128, VX128_1(4, 1347), VX128_1)(PPCFunctionBuilder& f, InstrData& i) { + return InstrEmit_stvrx_(f, i, VX128_1_VD128, i.VX128_1.RA, i.VX128_1.RB); +} +XEEMITTER(stvrxl, 0x7C00074E, X )(PPCFunctionBuilder& f, InstrData& i) { + return InstrEmit_stvrx(f, i); +} +XEEMITTER(stvrxl128, VX128_1(4, 1859), VX128_1)(PPCFunctionBuilder& f, InstrData& i) { + return InstrEmit_stvrx128(f, i); +} XEEMITTER(mfvscr, 0x10000604, VX )(PPCFunctionBuilder& f, InstrData& i) { XEINSTRNOTIMPLEMENTED(); @@ -1896,22 +1784,22 @@ void RegisterEmitCategoryAltivec() { XEREGISTERINSTR(stvx128, VX128_1(4, 451)); XEREGISTERINSTR(stvxl, 0x7C0003CE); XEREGISTERINSTR(stvxl128, VX128_1(4, 963)); - // XEREGISTERINSTR(lvlx, 0x7C00040E); - // XEREGISTERINSTR(lvlx128, VX128_1(4, 1027)); - // XEREGISTERINSTR(lvlxl, 0x7C00060E); - // XEREGISTERINSTR(lvlxl128, VX128_1(4, 1539)); - // XEREGISTERINSTR(lvrx, 0x7C00044E); - // XEREGISTERINSTR(lvrx128, VX128_1(4, 1091)); - // XEREGISTERINSTR(lvrxl, 0x7C00064E); - // XEREGISTERINSTR(lvrxl128, VX128_1(4, 1603)); - // XEREGISTERINSTR(stvlx, 0x7C00050E); - // XEREGISTERINSTR(stvlx128, VX128_1(4, 1283)); - // XEREGISTERINSTR(stvlxl, 0x7C00070E); - // XEREGISTERINSTR(stvlxl128, VX128_1(4, 1795)); - // XEREGISTERINSTR(stvrx, 0x7C00054E); - // XEREGISTERINSTR(stvrx128, VX128_1(4, 1347)); - // XEREGISTERINSTR(stvrxl, 0x7C00074E); - // XEREGISTERINSTR(stvrxl128, VX128_1(4, 1859)); + XEREGISTERINSTR(lvlx, 0x7C00040E); + XEREGISTERINSTR(lvlx128, VX128_1(4, 1027)); + XEREGISTERINSTR(lvlxl, 0x7C00060E); + XEREGISTERINSTR(lvlxl128, VX128_1(4, 1539)); + XEREGISTERINSTR(lvrx, 0x7C00044E); + XEREGISTERINSTR(lvrx128, VX128_1(4, 1091)); + XEREGISTERINSTR(lvrxl, 0x7C00064E); + XEREGISTERINSTR(lvrxl128, VX128_1(4, 1603)); + XEREGISTERINSTR(stvlx, 0x7C00050E); + XEREGISTERINSTR(stvlx128, VX128_1(4, 1283)); + XEREGISTERINSTR(stvlxl, 0x7C00070E); + XEREGISTERINSTR(stvlxl128, VX128_1(4, 1795)); + XEREGISTERINSTR(stvrx, 0x7C00054E); + XEREGISTERINSTR(stvrx128, VX128_1(4, 1347)); + XEREGISTERINSTR(stvrxl, 0x7C00074E); + XEREGISTERINSTR(stvrxl128, VX128_1(4, 1859)); XEREGISTERINSTR(mfvscr, 0x10000604); XEREGISTERINSTR(mtvscr, 0x10000644); diff --git a/src/alloy/hir/function_builder.cc b/src/alloy/hir/function_builder.cc index fafe583e3..e892fcbeb 100644 --- a/src/alloy/hir/function_builder.cc +++ b/src/alloy/hir/function_builder.cc @@ -781,6 +781,28 @@ Value* FunctionBuilder::Load( return i->dest; } +Value* FunctionBuilder::LoadVectorLeft( + Value* address, TypeName type, uint32_t load_flags) { + ASSERT_ADDRESS_TYPE(address); + Instr* i = AppendInstr( + OPCODE_LOAD_VECTOR_LEFT_info, load_flags, + AllocValue(type)); + i->set_src1(address); + i->src2.value = i->src3.value = NULL; + return i->dest; +} + +Value* FunctionBuilder::LoadVectorRight( + Value* address, TypeName type, uint32_t load_flags) { + ASSERT_ADDRESS_TYPE(address); + Instr* i = AppendInstr( + OPCODE_LOAD_VECTOR_RIGHT_info, load_flags, + AllocValue(type)); + i->set_src1(address); + i->src2.value = i->src3.value = NULL; + return i->dest; +} + Value* FunctionBuilder::LoadAcquire( Value* address, TypeName type, uint32_t load_flags) { ASSERT_ADDRESS_TYPE(address); @@ -812,6 +834,26 @@ Value* FunctionBuilder::StoreRelease( return i->dest; } +void FunctionBuilder::StoreVectorLeft( + Value* address, Value* value, uint32_t store_flags) { + ASSERT_ADDRESS_TYPE(address); + ASSERT_VECTOR_TYPE(value); + Instr* i = AppendInstr(OPCODE_STORE_VECTOR_LEFT_info, store_flags); + i->set_src1(address); + i->set_src2(value); + i->src3.value = NULL; +} + +void FunctionBuilder::StoreVectorRight( + Value* address, Value* value, uint32_t store_flags) { + ASSERT_ADDRESS_TYPE(address); + ASSERT_VECTOR_TYPE(value); + Instr* i = AppendInstr(OPCODE_STORE_VECTOR_RIGHT_info, store_flags); + i->set_src1(address); + i->set_src2(value); + i->src3.value = NULL; +} + void FunctionBuilder::Prefetch( Value* address, size_t length, uint32_t prefetch_flags) { ASSERT_ADDRESS_TYPE(address); diff --git a/src/alloy/hir/function_builder.h b/src/alloy/hir/function_builder.h index 5bf8814e5..cae613f5b 100644 --- a/src/alloy/hir/function_builder.h +++ b/src/alloy/hir/function_builder.h @@ -118,8 +118,15 @@ public: Value* Load(Value* address, TypeName type, uint32_t load_flags = 0); Value* LoadAcquire(Value* address, TypeName type, uint32_t load_flags = 0); + Value* LoadVectorLeft(Value* address, TypeName type, + uint32_t load_flags = 0); + Value* LoadVectorRight(Value* address, TypeName type, + uint32_t load_flags = 0); void Store(Value* address, Value* value, uint32_t store_flags = 0); Value* StoreRelease(Value* address, Value* value, uint32_t store_flags = 0); + void StoreVectorLeft(Value* address, Value* value, uint32_t store_flags = 0); + void StoreVectorRight(Value* address, Value* value, + uint32_t store_flags = 0); void Prefetch(Value* address, size_t length, uint32_t prefetch_flags = 0); Value* Max(Value* value1, Value* value2); diff --git a/src/alloy/hir/opcodes.h b/src/alloy/hir/opcodes.h index 5c3f73083..bbb93c68b 100644 --- a/src/alloy/hir/opcodes.h +++ b/src/alloy/hir/opcodes.h @@ -100,8 +100,12 @@ enum Opcode { OPCODE_LOAD, OPCODE_LOAD_ACQUIRE, + OPCODE_LOAD_VECTOR_LEFT, + OPCODE_LOAD_VECTOR_RIGHT, OPCODE_STORE, OPCODE_STORE_RELEASE, + OPCODE_STORE_VECTOR_LEFT, + OPCODE_STORE_VECTOR_RIGHT, OPCODE_PREFETCH, OPCODE_MAX, diff --git a/src/alloy/hir/opcodes.inl b/src/alloy/hir/opcodes.inl index ea0df04f7..660ec25fd 100644 --- a/src/alloy/hir/opcodes.inl +++ b/src/alloy/hir/opcodes.inl @@ -194,6 +194,18 @@ DEFINE_OPCODE( OPCODE_SIG_V_V, OPCODE_FLAG_MEMORY | OPCODE_FLAG_VOLATILE); +DEFINE_OPCODE( + OPCODE_LOAD_VECTOR_LEFT, + "load_vector_left", + OPCODE_SIG_V_V, + OPCODE_FLAG_MEMORY); + +DEFINE_OPCODE( + OPCODE_LOAD_VECTOR_RIGHT, + "load_vector_right", + OPCODE_SIG_V_V, + OPCODE_FLAG_MEMORY); + DEFINE_OPCODE( OPCODE_STORE, "store", @@ -206,6 +218,18 @@ DEFINE_OPCODE( OPCODE_SIG_V_V_V, OPCODE_FLAG_MEMORY | OPCODE_FLAG_VOLATILE); +DEFINE_OPCODE( + OPCODE_STORE_VECTOR_LEFT, + "store_vector_left", + OPCODE_SIG_X_V_V, + OPCODE_FLAG_MEMORY); + +DEFINE_OPCODE( + OPCODE_STORE_VECTOR_RIGHT, + "store_vector_right", + OPCODE_SIG_X_V_V, + OPCODE_FLAG_MEMORY); + DEFINE_OPCODE( OPCODE_PREFETCH, "prefetch",