From b9df6dc7030df9aee818944c722999e89cfaac6f Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sun, 8 Dec 2013 20:58:24 -0800 Subject: [PATCH] load_vector_sh* --- src/alloy/backend/ivm/ivm_intcode.cc | 64 +++++++++++ src/alloy/frontend/ppc/ppc_emit_altivec.cc | 126 +++++++-------------- src/alloy/hir/function_builder.cc | 20 ++++ src/alloy/hir/function_builder.h | 3 + src/alloy/hir/opcodes.h | 3 + src/alloy/hir/opcodes.inl | 12 ++ 6 files changed, 142 insertions(+), 86 deletions(-) diff --git a/src/alloy/backend/ivm/ivm_intcode.cc b/src/alloy/backend/ivm/ivm_intcode.cc index 12ca2d7f7..b0aef6bdd 100644 --- a/src/alloy/backend/ivm/ivm_intcode.cc +++ b/src/alloy/backend/ivm/ivm_intcode.cc @@ -1066,6 +1066,67 @@ int Translate_VECTOR_CONVERT_I2F(TranslationContext& ctx, Instr* i) { return DispatchToC(ctx, i, IntCode_VECTOR_CONVERT_I2F); } +static uint8_t __lvsl_table[16][16] = { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}, + { 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}, + { 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}, + { 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, + { 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21}, + { 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22}, + { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}, + { 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}, + {10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25}, + {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26}, + {12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}, + {13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28}, + {14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29}, + {15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30}, +}; +static uint8_t __lvsr_table[16][16] = { + {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, + {15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30}, + {14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29}, + {13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28}, + {12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}, + {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26}, + {10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25}, + { 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}, + { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}, + { 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22}, + { 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21}, + { 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, + { 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}, + { 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}, + { 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}, + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, +}; + +uint32_t IntCode_LOAD_VECTOR_SHL(IntCodeState& ics, const IntCode* i) { + int8_t sh = ics.rf[i->src1_reg].i8; + vec128_t& dest = ics.rf[i->dest_reg].v128; + for (int n = 0; n < 16; n++) { + dest.b16[n] = __lvsl_table[sh][n]; + } + return IA_NEXT; +} +int Translate_LOAD_VECTOR_SHL(TranslationContext& ctx, Instr* i) { + return DispatchToC(ctx, i, IntCode_LOAD_VECTOR_SHL); +} + +uint32_t IntCode_LOAD_VECTOR_SHR(IntCodeState& ics, const IntCode* i) { + int8_t sh = ics.rf[i->src1_reg].i8; + vec128_t& dest = ics.rf[i->dest_reg].v128; + for (int n = 0; n < 4; n++) { + dest.b16[n] = __lvsr_table[sh][n]; + } + return IA_NEXT; +} +int Translate_LOAD_VECTOR_SHR(TranslationContext& ctx, Instr* i) { + return DispatchToC(ctx, i, IntCode_LOAD_VECTOR_SHR); +} + uint32_t IntCode_LOAD_CONTEXT_I8(IntCodeState& ics, const IntCode* i) { ics.rf[i->dest_reg].i8 = *((int8_t*)(ics.context + ics.rf[i->src1_reg].u64)); DPRINT("%d (%.X) = ctx i8 +%d\n", ics.rf[i->dest_reg].i8, ics.rf[i->dest_reg].u8, ics.rf[i->src1_reg].u64); @@ -3003,6 +3064,9 @@ static const TranslateFn dispatch_table[] = { Translate_VECTOR_CONVERT_I2F, TranslateInvalid, //Translate_VECTOR_CONVERT_F2I, + Translate_LOAD_VECTOR_SHL, + Translate_LOAD_VECTOR_SHR, + Translate_LOAD_CONTEXT, Translate_STORE_CONTEXT, diff --git a/src/alloy/frontend/ppc/ppc_emit_altivec.cc b/src/alloy/frontend/ppc/ppc_emit_altivec.cc index 0a470eba4..77218928b 100644 --- a/src/alloy/frontend/ppc/ppc_emit_altivec.cc +++ b/src/alloy/frontend/ppc/ppc_emit_altivec.cc @@ -139,89 +139,43 @@ XEEMITTER(lvewx128, VX128_1(4, 131), VX128_1)(PPCFunctionBuilder& f, Inst return InstrEmit_lvewx_(f, i, i.X.RT, i.X.RA, i.X.RB); } -// static __m128i __lvsl_table[16] = { -// _mm_set_epi8( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), -// _mm_set_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), -// _mm_set_epi8( 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17), -// _mm_set_epi8( 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18), -// _mm_set_epi8( 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19), -// _mm_set_epi8( 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20), -// _mm_set_epi8( 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21), -// _mm_set_epi8( 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22), -// _mm_set_epi8( 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23), -// _mm_set_epi8( 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24), -// _mm_set_epi8(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25), -// _mm_set_epi8(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), -// _mm_set_epi8(12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27), -// _mm_set_epi8(13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28), -// _mm_set_epi8(14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29), -// _mm_set_epi8(15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30), -// }; -// int InstrEmit_lvsl_(PPCFunctionBuilder& f, InstrData& i, uint32_t vd, uint32_t ra, uint32_t rb) { -// GpVar ea(c.newGpVar()); -// c.mov(ea, e.gpr_value(rb)); -// if (ra) { -// c.add(ea, e.gpr_value(ra)); -// } -// c.and_(ea, imm(0xF)); -// c.shl(ea, imm(4)); // table offset = (16b * sh) -// GpVar gt(c.newGpVar()); -// c.mov(gt, imm((sysint_t)__lvsl_table)); -// XmmVar v(c.newXmmVar()); -// c.movaps(v, xmmword_ptr(gt, ea)); -// c.shufps(v, v, imm(SHUFPS_SWAP_DWORDS)); -// f.StoreVR(vd, v); -// e.TraceVR(vd); -// return 0; -// } -// XEEMITTER(lvsl, 0x7C00000C, X )(PPCFunctionBuilder& f, InstrData& i) { -// return InstrEmit_lvsl_(f, i, i.X.RT, i.X.RA, i.X.RB); -// } -// XEEMITTER(lvsl128, VX128_1(4, 3), VX128_1)(PPCFunctionBuilder& f, InstrData& i) { -// return InstrEmit_lvsl_(f, i, i.X.RT, i.X.RA, i.X.RB); -// } +int InstrEmit_lvsl_(PPCFunctionBuilder& f, InstrData& i, uint32_t vd, uint32_t ra, uint32_t rb) { + Value* ea; + if (ra) { + ea = f.Add(f.LoadGPR(ra), f.LoadGPR(rb)); + } else { + ea = f.LoadGPR(rb); + } + Value* sh = f.Truncate(f.And(ea, f.LoadConstant((int64_t)0xF)), INT8_TYPE); + Value* v = f.LoadVectorShl(sh); + f.StoreVR(vd, v); + return 0; +} +XEEMITTER(lvsl, 0x7C00000C, X )(PPCFunctionBuilder& f, InstrData& i) { + return InstrEmit_lvsl_(f, i, i.X.RT, i.X.RA, i.X.RB); +} +XEEMITTER(lvsl128, VX128_1(4, 3), VX128_1)(PPCFunctionBuilder& f, InstrData& i) { + return InstrEmit_lvsl_(f, i, i.X.RT, i.X.RA, i.X.RB); +} -// static __m128i __lvsr_table[16] = { -// _mm_set_epi8(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31), -// _mm_set_epi8(15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30), -// _mm_set_epi8(14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29), -// _mm_set_epi8(13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28), -// _mm_set_epi8(12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27), -// _mm_set_epi8(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), -// _mm_set_epi8(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25), -// _mm_set_epi8( 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24), -// _mm_set_epi8( 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23), -// _mm_set_epi8( 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22), -// _mm_set_epi8( 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21), -// _mm_set_epi8( 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20), -// _mm_set_epi8( 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19), -// _mm_set_epi8( 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18), -// _mm_set_epi8( 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17), -// _mm_set_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), -// }; -// int InstrEmit_lvsr_(PPCFunctionBuilder& f, InstrData& i, uint32_t vd, uint32_t ra, uint32_t rb) { -// GpVar ea(c.newGpVar()); -// c.mov(ea, e.gpr_value(rb)); -// if (ra) { -// c.add(ea, e.gpr_value(ra)); -// } -// c.and_(ea, imm(0xF)); -// c.shl(ea, imm(4)); // table offset = (16b * sh) -// GpVar gt(c.newGpVar()); -// c.mov(gt, imm((sysint_t)__lvsr_table)); -// XmmVar v(c.newXmmVar()); -// c.movaps(v, xmmword_ptr(gt, ea)); -// c.shufps(v, v, imm(SHUFPS_SWAP_DWORDS)); -// f.StoreVR(vd, v); -// e.TraceVR(vd); -// return 0; -// } -// XEEMITTER(lvsr, 0x7C00004C, X )(PPCFunctionBuilder& f, InstrData& i) { -// return InstrEmit_lvsr_(f, i, i.X.RT, i.X.RA, i.X.RB); -// } -// XEEMITTER(lvsr128, VX128_1(4, 67), VX128_1)(PPCFunctionBuilder& f, InstrData& i) { -// return InstrEmit_lvsr_(f, i, i.X.RT, i.X.RA, i.X.RB); -// } +int InstrEmit_lvsr_(PPCFunctionBuilder& f, InstrData& i, uint32_t vd, uint32_t ra, uint32_t rb) { + Value* ea; + if (ra) { + ea = f.Add(f.LoadGPR(ra), f.LoadGPR(rb)); + } else { + ea = f.LoadGPR(rb); + } + Value* sh = f.Truncate(f.And(ea, f.LoadConstant((int64_t)0xF)), INT8_TYPE); + Value* v = f.LoadVectorShr(sh); + f.StoreVR(vd, v); + return 0; +} +XEEMITTER(lvsr, 0x7C00004C, X )(PPCFunctionBuilder& f, InstrData& i) { + return InstrEmit_lvsr_(f, i, i.X.RT, i.X.RA, i.X.RB); +} +XEEMITTER(lvsr128, VX128_1(4, 67), VX128_1)(PPCFunctionBuilder& f, InstrData& i) { + return InstrEmit_lvsr_(f, i, i.X.RT, i.X.RA, i.X.RB); +} int InstrEmit_lvx_(PPCFunctionBuilder& f, InstrData& i, uint32_t vd, uint32_t ra, uint32_t rb) { Value* ea = ra ? f.Add(f.LoadGPR(ra), f.LoadGPR(rb)) : f.LoadGPR(rb); @@ -1926,10 +1880,10 @@ void RegisterEmitCategoryAltivec() { XEREGISTERINSTR(lvehx, 0x7C00004E); XEREGISTERINSTR(lvewx, 0x7C00008E); XEREGISTERINSTR(lvewx128, VX128_1(4, 131)); - // XEREGISTERINSTR(lvsl, 0x7C00000C); - // XEREGISTERINSTR(lvsl128, VX128_1(4, 3)); - // XEREGISTERINSTR(lvsr, 0x7C00004C); - // XEREGISTERINSTR(lvsr128, VX128_1(4, 67)); + XEREGISTERINSTR(lvsl, 0x7C00000C); + XEREGISTERINSTR(lvsl128, VX128_1(4, 3)); + XEREGISTERINSTR(lvsr, 0x7C00004C); + XEREGISTERINSTR(lvsr128, VX128_1(4, 67)); XEREGISTERINSTR(lvx, 0x7C0000CE); XEREGISTERINSTR(lvx128, VX128_1(4, 195)); XEREGISTERINSTR(lvxl, 0x7C0002CE); diff --git a/src/alloy/hir/function_builder.cc b/src/alloy/hir/function_builder.cc index 05e9500b3..fafe583e3 100644 --- a/src/alloy/hir/function_builder.cc +++ b/src/alloy/hir/function_builder.cc @@ -734,6 +734,26 @@ Value* FunctionBuilder::LoadConstant(const vec128_t& value) { return dest; } +Value* FunctionBuilder::LoadVectorShl(Value* sh) { + XEASSERT(sh->type == INT8_TYPE); + Instr* i = AppendInstr( + OPCODE_LOAD_VECTOR_SHL_info, 0, + AllocValue(VEC128_TYPE)); + i->set_src1(sh); + i->src2.value = i->src3.value = NULL; + return i->dest; +} + +Value* FunctionBuilder::LoadVectorShr(Value* sh) { + XEASSERT(sh->type == INT8_TYPE); + Instr* i = AppendInstr( + OPCODE_LOAD_VECTOR_SHR_info, 0, + AllocValue(VEC128_TYPE)); + i->set_src1(sh); + i->src2.value = i->src3.value = NULL; + return i->dest; +} + Value* FunctionBuilder::LoadContext(size_t offset, TypeName type) { Instr* i = AppendInstr( OPCODE_LOAD_CONTEXT_info, 0, diff --git a/src/alloy/hir/function_builder.h b/src/alloy/hir/function_builder.h index a1b160796..5bf8814e5 100644 --- a/src/alloy/hir/function_builder.h +++ b/src/alloy/hir/function_builder.h @@ -110,6 +110,9 @@ public: Value* LoadConstant(double value); Value* LoadConstant(const vec128_t& value); + Value* LoadVectorShl(Value* sh); + Value* LoadVectorShr(Value* sh); + Value* LoadContext(size_t offset, TypeName type); void StoreContext(size_t offset, Value* value); diff --git a/src/alloy/hir/opcodes.h b/src/alloy/hir/opcodes.h index 00907d789..5c3f73083 100644 --- a/src/alloy/hir/opcodes.h +++ b/src/alloy/hir/opcodes.h @@ -92,6 +92,9 @@ enum Opcode { OPCODE_VECTOR_CONVERT_I2F, OPCODE_VECTOR_CONVERT_F2I, + OPCODE_LOAD_VECTOR_SHL, + OPCODE_LOAD_VECTOR_SHR, + OPCODE_LOAD_CONTEXT, OPCODE_STORE_CONTEXT, diff --git a/src/alloy/hir/opcodes.inl b/src/alloy/hir/opcodes.inl index 848c8dd95..ea0df04f7 100644 --- a/src/alloy/hir/opcodes.inl +++ b/src/alloy/hir/opcodes.inl @@ -158,6 +158,18 @@ DEFINE_OPCODE( OPCODE_SIG_V_V, 0); +DEFINE_OPCODE( + OPCODE_LOAD_VECTOR_SHL, + "load_vector_shl", + OPCODE_SIG_V_V, + 0); + +DEFINE_OPCODE( + OPCODE_LOAD_VECTOR_SHR, + "load_vector_shr", + OPCODE_SIG_V_V, + 0); + DEFINE_OPCODE( OPCODE_LOAD_CONTEXT, "load_context",