load_vector_sh*

This commit is contained in:
Ben Vanik 2013-12-08 20:58:24 -08:00
parent 07b5c3ad81
commit b9df6dc703
6 changed files with 142 additions and 86 deletions

View File

@ -1066,6 +1066,67 @@ int Translate_VECTOR_CONVERT_I2F(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, IntCode_VECTOR_CONVERT_I2F); return DispatchToC(ctx, i, IntCode_VECTOR_CONVERT_I2F);
} }
static uint8_t __lvsl_table[16][16] = {
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
{ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17},
{ 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18},
{ 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19},
{ 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20},
{ 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21},
{ 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22},
{ 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23},
{ 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
{10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25},
{11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26},
{12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
{13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28},
{14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29},
{15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30},
};
static uint8_t __lvsr_table[16][16] = {
{16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31},
{15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30},
{14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29},
{13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28},
{12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
{11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26},
{10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25},
{ 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
{ 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23},
{ 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22},
{ 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21},
{ 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20},
{ 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19},
{ 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18},
{ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17},
{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
};
uint32_t IntCode_LOAD_VECTOR_SHL(IntCodeState& ics, const IntCode* i) {
int8_t sh = ics.rf[i->src1_reg].i8;
vec128_t& dest = ics.rf[i->dest_reg].v128;
for (int n = 0; n < 16; n++) {
dest.b16[n] = __lvsl_table[sh][n];
}
return IA_NEXT;
}
int Translate_LOAD_VECTOR_SHL(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, IntCode_LOAD_VECTOR_SHL);
}
uint32_t IntCode_LOAD_VECTOR_SHR(IntCodeState& ics, const IntCode* i) {
int8_t sh = ics.rf[i->src1_reg].i8;
vec128_t& dest = ics.rf[i->dest_reg].v128;
for (int n = 0; n < 4; n++) {
dest.b16[n] = __lvsr_table[sh][n];
}
return IA_NEXT;
}
int Translate_LOAD_VECTOR_SHR(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, IntCode_LOAD_VECTOR_SHR);
}
uint32_t IntCode_LOAD_CONTEXT_I8(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_LOAD_CONTEXT_I8(IntCodeState& ics, const IntCode* i) {
ics.rf[i->dest_reg].i8 = *((int8_t*)(ics.context + ics.rf[i->src1_reg].u64)); ics.rf[i->dest_reg].i8 = *((int8_t*)(ics.context + ics.rf[i->src1_reg].u64));
DPRINT("%d (%.X) = ctx i8 +%d\n", ics.rf[i->dest_reg].i8, ics.rf[i->dest_reg].u8, ics.rf[i->src1_reg].u64); DPRINT("%d (%.X) = ctx i8 +%d\n", ics.rf[i->dest_reg].i8, ics.rf[i->dest_reg].u8, ics.rf[i->src1_reg].u64);
@ -3003,6 +3064,9 @@ static const TranslateFn dispatch_table[] = {
Translate_VECTOR_CONVERT_I2F, Translate_VECTOR_CONVERT_I2F,
TranslateInvalid, //Translate_VECTOR_CONVERT_F2I, TranslateInvalid, //Translate_VECTOR_CONVERT_F2I,
Translate_LOAD_VECTOR_SHL,
Translate_LOAD_VECTOR_SHR,
Translate_LOAD_CONTEXT, Translate_LOAD_CONTEXT,
Translate_STORE_CONTEXT, Translate_STORE_CONTEXT,

View File

@ -139,89 +139,43 @@ XEEMITTER(lvewx128, VX128_1(4, 131), VX128_1)(PPCFunctionBuilder& f, Inst
return InstrEmit_lvewx_(f, i, i.X.RT, i.X.RA, i.X.RB); return InstrEmit_lvewx_(f, i, i.X.RT, i.X.RA, i.X.RB);
} }
// static __m128i __lvsl_table[16] = { int InstrEmit_lvsl_(PPCFunctionBuilder& f, InstrData& i, uint32_t vd, uint32_t ra, uint32_t rb) {
// _mm_set_epi8( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), Value* ea;
// _mm_set_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), if (ra) {
// _mm_set_epi8( 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17), ea = f.Add(f.LoadGPR(ra), f.LoadGPR(rb));
// _mm_set_epi8( 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18), } else {
// _mm_set_epi8( 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19), ea = f.LoadGPR(rb);
// _mm_set_epi8( 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20), }
// _mm_set_epi8( 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21), Value* sh = f.Truncate(f.And(ea, f.LoadConstant((int64_t)0xF)), INT8_TYPE);
// _mm_set_epi8( 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22), Value* v = f.LoadVectorShl(sh);
// _mm_set_epi8( 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23), f.StoreVR(vd, v);
// _mm_set_epi8( 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24), return 0;
// _mm_set_epi8(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25), }
// _mm_set_epi8(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), XEEMITTER(lvsl, 0x7C00000C, X )(PPCFunctionBuilder& f, InstrData& i) {
// _mm_set_epi8(12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27), return InstrEmit_lvsl_(f, i, i.X.RT, i.X.RA, i.X.RB);
// _mm_set_epi8(13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28), }
// _mm_set_epi8(14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29), XEEMITTER(lvsl128, VX128_1(4, 3), VX128_1)(PPCFunctionBuilder& f, InstrData& i) {
// _mm_set_epi8(15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30), return InstrEmit_lvsl_(f, i, i.X.RT, i.X.RA, i.X.RB);
// }; }
// int InstrEmit_lvsl_(PPCFunctionBuilder& f, InstrData& i, uint32_t vd, uint32_t ra, uint32_t rb) {
// GpVar ea(c.newGpVar());
// c.mov(ea, e.gpr_value(rb));
// if (ra) {
// c.add(ea, e.gpr_value(ra));
// }
// c.and_(ea, imm(0xF));
// c.shl(ea, imm(4)); // table offset = (16b * sh)
// GpVar gt(c.newGpVar());
// c.mov(gt, imm((sysint_t)__lvsl_table));
// XmmVar v(c.newXmmVar());
// c.movaps(v, xmmword_ptr(gt, ea));
// c.shufps(v, v, imm(SHUFPS_SWAP_DWORDS));
// f.StoreVR(vd, v);
// e.TraceVR(vd);
// return 0;
// }
// XEEMITTER(lvsl, 0x7C00000C, X )(PPCFunctionBuilder& f, InstrData& i) {
// return InstrEmit_lvsl_(f, i, i.X.RT, i.X.RA, i.X.RB);
// }
// XEEMITTER(lvsl128, VX128_1(4, 3), VX128_1)(PPCFunctionBuilder& f, InstrData& i) {
// return InstrEmit_lvsl_(f, i, i.X.RT, i.X.RA, i.X.RB);
// }
// static __m128i __lvsr_table[16] = { int InstrEmit_lvsr_(PPCFunctionBuilder& f, InstrData& i, uint32_t vd, uint32_t ra, uint32_t rb) {
// _mm_set_epi8(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31), Value* ea;
// _mm_set_epi8(15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30), if (ra) {
// _mm_set_epi8(14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29), ea = f.Add(f.LoadGPR(ra), f.LoadGPR(rb));
// _mm_set_epi8(13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28), } else {
// _mm_set_epi8(12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27), ea = f.LoadGPR(rb);
// _mm_set_epi8(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), }
// _mm_set_epi8(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25), Value* sh = f.Truncate(f.And(ea, f.LoadConstant((int64_t)0xF)), INT8_TYPE);
// _mm_set_epi8( 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24), Value* v = f.LoadVectorShr(sh);
// _mm_set_epi8( 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23), f.StoreVR(vd, v);
// _mm_set_epi8( 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22), return 0;
// _mm_set_epi8( 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21), }
// _mm_set_epi8( 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20), XEEMITTER(lvsr, 0x7C00004C, X )(PPCFunctionBuilder& f, InstrData& i) {
// _mm_set_epi8( 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19), return InstrEmit_lvsr_(f, i, i.X.RT, i.X.RA, i.X.RB);
// _mm_set_epi8( 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18), }
// _mm_set_epi8( 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17), XEEMITTER(lvsr128, VX128_1(4, 67), VX128_1)(PPCFunctionBuilder& f, InstrData& i) {
// _mm_set_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), return InstrEmit_lvsr_(f, i, i.X.RT, i.X.RA, i.X.RB);
// }; }
// int InstrEmit_lvsr_(PPCFunctionBuilder& f, InstrData& i, uint32_t vd, uint32_t ra, uint32_t rb) {
// GpVar ea(c.newGpVar());
// c.mov(ea, e.gpr_value(rb));
// if (ra) {
// c.add(ea, e.gpr_value(ra));
// }
// c.and_(ea, imm(0xF));
// c.shl(ea, imm(4)); // table offset = (16b * sh)
// GpVar gt(c.newGpVar());
// c.mov(gt, imm((sysint_t)__lvsr_table));
// XmmVar v(c.newXmmVar());
// c.movaps(v, xmmword_ptr(gt, ea));
// c.shufps(v, v, imm(SHUFPS_SWAP_DWORDS));
// f.StoreVR(vd, v);
// e.TraceVR(vd);
// return 0;
// }
// XEEMITTER(lvsr, 0x7C00004C, X )(PPCFunctionBuilder& f, InstrData& i) {
// return InstrEmit_lvsr_(f, i, i.X.RT, i.X.RA, i.X.RB);
// }
// XEEMITTER(lvsr128, VX128_1(4, 67), VX128_1)(PPCFunctionBuilder& f, InstrData& i) {
// return InstrEmit_lvsr_(f, i, i.X.RT, i.X.RA, i.X.RB);
// }
int InstrEmit_lvx_(PPCFunctionBuilder& f, InstrData& i, uint32_t vd, uint32_t ra, uint32_t rb) { int InstrEmit_lvx_(PPCFunctionBuilder& f, InstrData& i, uint32_t vd, uint32_t ra, uint32_t rb) {
Value* ea = ra ? f.Add(f.LoadGPR(ra), f.LoadGPR(rb)) : f.LoadGPR(rb); Value* ea = ra ? f.Add(f.LoadGPR(ra), f.LoadGPR(rb)) : f.LoadGPR(rb);
@ -1926,10 +1880,10 @@ void RegisterEmitCategoryAltivec() {
XEREGISTERINSTR(lvehx, 0x7C00004E); XEREGISTERINSTR(lvehx, 0x7C00004E);
XEREGISTERINSTR(lvewx, 0x7C00008E); XEREGISTERINSTR(lvewx, 0x7C00008E);
XEREGISTERINSTR(lvewx128, VX128_1(4, 131)); XEREGISTERINSTR(lvewx128, VX128_1(4, 131));
// XEREGISTERINSTR(lvsl, 0x7C00000C); XEREGISTERINSTR(lvsl, 0x7C00000C);
// XEREGISTERINSTR(lvsl128, VX128_1(4, 3)); XEREGISTERINSTR(lvsl128, VX128_1(4, 3));
// XEREGISTERINSTR(lvsr, 0x7C00004C); XEREGISTERINSTR(lvsr, 0x7C00004C);
// XEREGISTERINSTR(lvsr128, VX128_1(4, 67)); XEREGISTERINSTR(lvsr128, VX128_1(4, 67));
XEREGISTERINSTR(lvx, 0x7C0000CE); XEREGISTERINSTR(lvx, 0x7C0000CE);
XEREGISTERINSTR(lvx128, VX128_1(4, 195)); XEREGISTERINSTR(lvx128, VX128_1(4, 195));
XEREGISTERINSTR(lvxl, 0x7C0002CE); XEREGISTERINSTR(lvxl, 0x7C0002CE);

View File

@ -734,6 +734,26 @@ Value* FunctionBuilder::LoadConstant(const vec128_t& value) {
return dest; return dest;
} }
Value* FunctionBuilder::LoadVectorShl(Value* sh) {
XEASSERT(sh->type == INT8_TYPE);
Instr* i = AppendInstr(
OPCODE_LOAD_VECTOR_SHL_info, 0,
AllocValue(VEC128_TYPE));
i->set_src1(sh);
i->src2.value = i->src3.value = NULL;
return i->dest;
}
Value* FunctionBuilder::LoadVectorShr(Value* sh) {
XEASSERT(sh->type == INT8_TYPE);
Instr* i = AppendInstr(
OPCODE_LOAD_VECTOR_SHR_info, 0,
AllocValue(VEC128_TYPE));
i->set_src1(sh);
i->src2.value = i->src3.value = NULL;
return i->dest;
}
Value* FunctionBuilder::LoadContext(size_t offset, TypeName type) { Value* FunctionBuilder::LoadContext(size_t offset, TypeName type) {
Instr* i = AppendInstr( Instr* i = AppendInstr(
OPCODE_LOAD_CONTEXT_info, 0, OPCODE_LOAD_CONTEXT_info, 0,

View File

@ -110,6 +110,9 @@ public:
Value* LoadConstant(double value); Value* LoadConstant(double value);
Value* LoadConstant(const vec128_t& value); Value* LoadConstant(const vec128_t& value);
Value* LoadVectorShl(Value* sh);
Value* LoadVectorShr(Value* sh);
Value* LoadContext(size_t offset, TypeName type); Value* LoadContext(size_t offset, TypeName type);
void StoreContext(size_t offset, Value* value); void StoreContext(size_t offset, Value* value);

View File

@ -92,6 +92,9 @@ enum Opcode {
OPCODE_VECTOR_CONVERT_I2F, OPCODE_VECTOR_CONVERT_I2F,
OPCODE_VECTOR_CONVERT_F2I, OPCODE_VECTOR_CONVERT_F2I,
OPCODE_LOAD_VECTOR_SHL,
OPCODE_LOAD_VECTOR_SHR,
OPCODE_LOAD_CONTEXT, OPCODE_LOAD_CONTEXT,
OPCODE_STORE_CONTEXT, OPCODE_STORE_CONTEXT,

View File

@ -158,6 +158,18 @@ DEFINE_OPCODE(
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0); 0);
DEFINE_OPCODE(
OPCODE_LOAD_VECTOR_SHL,
"load_vector_shl",
OPCODE_SIG_V_V,
0);
DEFINE_OPCODE(
OPCODE_LOAD_VECTOR_SHR,
"load_vector_shr",
OPCODE_SIG_V_V,
0);
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_LOAD_CONTEXT, OPCODE_LOAD_CONTEXT,
"load_context", "load_context",