Replacing store vector left/right opcodes by just using permutes.
This commit is contained in:
parent
a44551c418
commit
88cdb9e36c
|
@ -1340,46 +1340,6 @@ int Translate_LOAD(TranslationContext& ctx, Instr* i) {
|
|||
return DispatchToC(ctx, i, fns[i->dest->type]);
|
||||
}
|
||||
|
||||
uint32_t IntCode_LOAD_VECTOR_LEFT_V128(IntCodeState& ics, const IntCode* i) {
|
||||
const uint32_t address = ics.rf[i->src1_reg].u32;
|
||||
const size_t eb = address & 0xF;
|
||||
const size_t size = 16 - eb;
|
||||
const uint8_t* p = ics.membase + address;
|
||||
vec128_t& dest = ics.rf[i->dest_reg].v128;
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
dest.b16[size - 1 - i] = p[i];
|
||||
}
|
||||
DPRINT("[%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X] = load_vector_left v128 %.8X\n",
|
||||
dest.f4[0], dest.f4[1], dest.f4[2], dest.f4[3],
|
||||
dest.i4[0], dest.i4[1], dest.i4[2], dest.i4[3],
|
||||
address);
|
||||
DFLUSH();
|
||||
return IA_NEXT;
|
||||
}
|
||||
int Translate_LOAD_VECTOR_LEFT(TranslationContext& ctx, Instr* i) {
|
||||
return DispatchToC(ctx, i, IntCode_LOAD_VECTOR_LEFT_V128);
|
||||
}
|
||||
|
||||
uint32_t IntCode_LOAD_VECTOR_RIGHT_V128(IntCodeState& ics, const IntCode* i) {
|
||||
const uint32_t address = ics.rf[i->src1_reg].u32;
|
||||
const size_t eb = address & 0xF;
|
||||
const size_t size = eb;
|
||||
const uint8_t* p = ics.membase + address;
|
||||
vec128_t& dest = ics.rf[i->dest_reg].v128;
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
dest.b16[i] = p[size - 1 - i];
|
||||
}
|
||||
DPRINT("[%e, %e, %e, %e] [%.8X, %.8X, %.8X, %.8X] = load_vector_right v128 %.8X\n",
|
||||
dest.f4[0], dest.f4[1], dest.f4[2], dest.f4[3],
|
||||
dest.i4[0], dest.i4[1], dest.i4[2], dest.i4[3],
|
||||
address);
|
||||
DFLUSH();
|
||||
return IA_NEXT;
|
||||
}
|
||||
int Translate_LOAD_VECTOR_RIGHT(TranslationContext& ctx, Instr* i) {
|
||||
return DispatchToC(ctx, i, IntCode_LOAD_VECTOR_RIGHT_V128);
|
||||
}
|
||||
|
||||
uint32_t IntCode_LOAD_ACQUIRE_I8(IntCodeState& ics, const IntCode* i) {
|
||||
uint32_t address = ics.rf[i->src1_reg].u32;
|
||||
xe_atomic_exchange_32(address, ics.reserve_address);
|
||||
|
@ -1614,38 +1574,6 @@ int Translate_STORE_RELEASE(TranslationContext& ctx, Instr* i) {
|
|||
return DispatchToC(ctx, i, fns[i->src2.value->type]);
|
||||
}
|
||||
|
||||
uint32_t IntCode_STORE_VECTOR_LEFT_V128(IntCodeState& ics, const IntCode* i) {
|
||||
const uint32_t address = ics.rf[i->src1_reg].u32;
|
||||
const size_t eb = address & 0xF;
|
||||
const size_t size = 16 - eb;
|
||||
uint8_t* p = ics.membase + address;
|
||||
const vec128_t& src = ics.rf[i->src2_reg].v128;
|
||||
// Note that if the input is already 16b aligned no bytes are stored.
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
p[i] = src.b16[15 - i];
|
||||
}
|
||||
return IA_NEXT;
|
||||
}
|
||||
int Translate_STORE_VECTOR_LEFT(TranslationContext& ctx, Instr* i) {
|
||||
return DispatchToC(ctx, i, IntCode_STORE_VECTOR_LEFT_V128);
|
||||
}
|
||||
|
||||
uint32_t IntCode_STORE_VECTOR_RIGHT_V128(IntCodeState& ics, const IntCode* i) {
|
||||
const uint32_t address = ics.rf[i->src1_reg].u32;
|
||||
const size_t eb = address & 0xF;
|
||||
const size_t size = eb;
|
||||
uint8_t* p = ics.membase + (address & ~0xF);
|
||||
const vec128_t& src = ics.rf[i->src2_reg].v128;
|
||||
// Note that if the input is already 16b aligned no bytes are stored.
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
p[size - 1 - i] = src.b16[i];
|
||||
}
|
||||
return IA_NEXT;
|
||||
}
|
||||
int Translate_STORE_VECTOR_RIGHT(TranslationContext& ctx, Instr* i) {
|
||||
return DispatchToC(ctx, i, IntCode_STORE_VECTOR_RIGHT_V128);
|
||||
}
|
||||
|
||||
uint32_t IntCode_PREFETCH(IntCodeState& ics, const IntCode* i) {
|
||||
return IA_NEXT;
|
||||
}
|
||||
|
@ -3222,12 +3150,8 @@ static const TranslateFn dispatch_table[] = {
|
|||
|
||||
Translate_LOAD,
|
||||
Translate_LOAD_ACQUIRE,
|
||||
Translate_LOAD_VECTOR_LEFT,
|
||||
Translate_LOAD_VECTOR_RIGHT,
|
||||
Translate_STORE,
|
||||
Translate_STORE_RELEASE,
|
||||
Translate_STORE_VECTOR_LEFT,
|
||||
Translate_STORE_VECTOR_RIGHT,
|
||||
Translate_PREFETCH,
|
||||
|
||||
TranslateInvalid, //Translate_MAX,
|
||||
|
|
|
@ -239,7 +239,14 @@ XEEMITTER(stvxl128, VX128_1(4, 963), VX128_1)(PPCFunctionBuilder& f, Inst
|
|||
// https://www-01.ibm.com/chips/techlib/techlib.nsf/techdocs/C40E4C6133B31EE8872570B500791108/$file/vector_simd_pem_v_2.07c_26Oct2006_cell.pdf
|
||||
int InstrEmit_lvlx_(PPCFunctionBuilder& f, InstrData& i, uint32_t vd, uint32_t ra, uint32_t rb) {
|
||||
Value* ea = ra ? f.Add(f.LoadGPR(ra), f.LoadGPR(rb)) : f.LoadGPR(rb);
|
||||
Value* v = f.ByteSwap(f.LoadVectorLeft(ea, VEC128_TYPE));
|
||||
Value* eb = f.And(f.Truncate(ea, INT8_TYPE), f.LoadConstant((int8_t)0xF));
|
||||
// ea &= ~0xF (load takes care of this)
|
||||
// v = (new << eb)
|
||||
Value* v = f.Permute(
|
||||
f.LoadVectorShl(eb),
|
||||
f.ByteSwap(f.Load(ea, VEC128_TYPE)),
|
||||
f.LoadZero(VEC128_TYPE),
|
||||
INT8_TYPE);
|
||||
f.StoreVR(vd, v);
|
||||
return 0;
|
||||
}
|
||||
|
@ -258,7 +265,14 @@ XEEMITTER(lvlxl128, VX128_1(4, 1539), VX128_1)(PPCFunctionBuilder& f, Inst
|
|||
|
||||
int InstrEmit_lvrx_(PPCFunctionBuilder& f, InstrData& i, uint32_t vd, uint32_t ra, uint32_t rb) {
|
||||
Value* ea = ra ? f.Add(f.LoadGPR(ra), f.LoadGPR(rb)) : f.LoadGPR(rb);
|
||||
Value* v = f.ByteSwap(f.LoadVectorRight(ea, VEC128_TYPE));
|
||||
Value* eb = f.And(f.Truncate(ea, INT8_TYPE), f.LoadConstant((int8_t)0xF));
|
||||
// ea &= ~0xF (load takes care of this)
|
||||
// v = (new >> (16 - eb))
|
||||
Value* v = f.Permute(
|
||||
f.LoadVectorShr(f.Sub(f.LoadConstant((int8_t)16), eb)),
|
||||
f.LoadZero(VEC128_TYPE),
|
||||
f.ByteSwap(f.Load(ea, VEC128_TYPE)),
|
||||
INT8_TYPE);
|
||||
f.StoreVR(vd, v);
|
||||
return 0;
|
||||
}
|
||||
|
@ -276,9 +290,30 @@ XEEMITTER(lvrxl128, VX128_1(4, 1603), VX128_1)(PPCFunctionBuilder& f, Inst
|
|||
}
|
||||
|
||||
int InstrEmit_stvlx_(PPCFunctionBuilder& f, InstrData& i, uint32_t vd, uint32_t ra, uint32_t rb) {
|
||||
// NOTE: if eb == 0 (so 16b aligned) this equals new_value
|
||||
// we could optimize this to prevent the other load/mask, in that case.
|
||||
Value* ea = ra ? f.Add(f.LoadGPR(ra), f.LoadGPR(rb)) : f.LoadGPR(rb);
|
||||
Value* v = f.ByteSwap(f.LoadVR(vd));
|
||||
f.StoreVectorLeft(ea, v);
|
||||
Value* eb = f.And(f.Truncate(ea, INT8_TYPE), f.LoadConstant((int8_t)0xF));
|
||||
Value* new_value = f.ByteSwap(f.LoadVR(vd));
|
||||
// ea &= ~0xF (load takes care of this)
|
||||
Value* old_value = f.Load(ea, VEC128_TYPE);
|
||||
// v = (new >> eb) | (old & (ONE << (16 - eb)))
|
||||
Value* v = f.Permute(
|
||||
f.LoadVectorShr(eb),
|
||||
f.LoadZero(VEC128_TYPE),
|
||||
new_value,
|
||||
INT8_TYPE);
|
||||
v = f.Or(
|
||||
v,
|
||||
f.And(
|
||||
old_value,
|
||||
f.Permute(
|
||||
f.LoadVectorShl(f.Sub(f.LoadConstant((int8_t)16), eb)),
|
||||
f.Not(f.LoadZero(VEC128_TYPE)),
|
||||
f.LoadZero(VEC128_TYPE),
|
||||
INT8_TYPE)));
|
||||
// ea &= ~0xF (store takes care of this)
|
||||
f.Store(ea, v);
|
||||
return 0;
|
||||
}
|
||||
XEEMITTER(stvlx, 0x7C00050E, X )(PPCFunctionBuilder& f, InstrData& i) {
|
||||
|
@ -295,9 +330,31 @@ XEEMITTER(stvlxl128, VX128_1(4, 1795), VX128_1)(PPCFunctionBuilder& f, Inst
|
|||
}
|
||||
|
||||
int InstrEmit_stvrx_(PPCFunctionBuilder& f, InstrData& i, uint32_t vd, uint32_t ra, uint32_t rb) {
|
||||
// NOTE: if eb == 0 (so 16b aligned) this equals new_value
|
||||
// we could optimize this to prevent the other load/mask, in that case.
|
||||
Value* ea = ra ? f.Add(f.LoadGPR(ra), f.LoadGPR(rb)) : f.LoadGPR(rb);
|
||||
Value* v = f.ByteSwap(f.LoadVR(vd));
|
||||
f.StoreVectorRight(ea, v);
|
||||
Value* eb = f.And(f.Truncate(ea, INT8_TYPE), f.LoadConstant((int8_t)0xF));
|
||||
Value* ebits = f.Mul(eb, f.LoadConstant((int8_t)8));
|
||||
Value* new_value = f.ByteSwap(f.LoadVR(vd));
|
||||
// ea &= ~0xF (load takes care of this)
|
||||
Value* old_value = f.Load(ea, VEC128_TYPE);
|
||||
// v = (new << (16 - eb)) | (old & (ONE >> eb))
|
||||
Value* v = f.Permute(
|
||||
f.LoadVectorShl(f.Sub(f.LoadConstant((int8_t)16), eb)),
|
||||
new_value,
|
||||
f.LoadZero(VEC128_TYPE),
|
||||
INT8_TYPE);
|
||||
v = f.Or(
|
||||
v,
|
||||
f.And(
|
||||
old_value,
|
||||
f.Permute(
|
||||
f.LoadVectorShr(eb),
|
||||
f.LoadZero(VEC128_TYPE),
|
||||
f.Not(f.LoadZero(VEC128_TYPE)),
|
||||
INT8_TYPE)));
|
||||
// ea &= ~0xF (store takes care of this)
|
||||
f.Store(ea, v);
|
||||
return 0;
|
||||
}
|
||||
XEEMITTER(stvrx, 0x7C00054E, X )(PPCFunctionBuilder& f, InstrData& i) {
|
||||
|
|
|
@ -781,28 +781,6 @@ Value* FunctionBuilder::Load(
|
|||
return i->dest;
|
||||
}
|
||||
|
||||
Value* FunctionBuilder::LoadVectorLeft(
|
||||
Value* address, TypeName type, uint32_t load_flags) {
|
||||
ASSERT_ADDRESS_TYPE(address);
|
||||
Instr* i = AppendInstr(
|
||||
OPCODE_LOAD_VECTOR_LEFT_info, load_flags,
|
||||
AllocValue(type));
|
||||
i->set_src1(address);
|
||||
i->src2.value = i->src3.value = NULL;
|
||||
return i->dest;
|
||||
}
|
||||
|
||||
Value* FunctionBuilder::LoadVectorRight(
|
||||
Value* address, TypeName type, uint32_t load_flags) {
|
||||
ASSERT_ADDRESS_TYPE(address);
|
||||
Instr* i = AppendInstr(
|
||||
OPCODE_LOAD_VECTOR_RIGHT_info, load_flags,
|
||||
AllocValue(type));
|
||||
i->set_src1(address);
|
||||
i->src2.value = i->src3.value = NULL;
|
||||
return i->dest;
|
||||
}
|
||||
|
||||
Value* FunctionBuilder::LoadAcquire(
|
||||
Value* address, TypeName type, uint32_t load_flags) {
|
||||
ASSERT_ADDRESS_TYPE(address);
|
||||
|
@ -834,26 +812,6 @@ Value* FunctionBuilder::StoreRelease(
|
|||
return i->dest;
|
||||
}
|
||||
|
||||
void FunctionBuilder::StoreVectorLeft(
|
||||
Value* address, Value* value, uint32_t store_flags) {
|
||||
ASSERT_ADDRESS_TYPE(address);
|
||||
ASSERT_VECTOR_TYPE(value);
|
||||
Instr* i = AppendInstr(OPCODE_STORE_VECTOR_LEFT_info, store_flags);
|
||||
i->set_src1(address);
|
||||
i->set_src2(value);
|
||||
i->src3.value = NULL;
|
||||
}
|
||||
|
||||
void FunctionBuilder::StoreVectorRight(
|
||||
Value* address, Value* value, uint32_t store_flags) {
|
||||
ASSERT_ADDRESS_TYPE(address);
|
||||
ASSERT_VECTOR_TYPE(value);
|
||||
Instr* i = AppendInstr(OPCODE_STORE_VECTOR_RIGHT_info, store_flags);
|
||||
i->set_src1(address);
|
||||
i->set_src2(value);
|
||||
i->src3.value = NULL;
|
||||
}
|
||||
|
||||
void FunctionBuilder::Prefetch(
|
||||
Value* address, size_t length, uint32_t prefetch_flags) {
|
||||
ASSERT_ADDRESS_TYPE(address);
|
||||
|
|
|
@ -118,15 +118,8 @@ public:
|
|||
|
||||
Value* Load(Value* address, TypeName type, uint32_t load_flags = 0);
|
||||
Value* LoadAcquire(Value* address, TypeName type, uint32_t load_flags = 0);
|
||||
Value* LoadVectorLeft(Value* address, TypeName type,
|
||||
uint32_t load_flags = 0);
|
||||
Value* LoadVectorRight(Value* address, TypeName type,
|
||||
uint32_t load_flags = 0);
|
||||
void Store(Value* address, Value* value, uint32_t store_flags = 0);
|
||||
Value* StoreRelease(Value* address, Value* value, uint32_t store_flags = 0);
|
||||
void StoreVectorLeft(Value* address, Value* value, uint32_t store_flags = 0);
|
||||
void StoreVectorRight(Value* address, Value* value,
|
||||
uint32_t store_flags = 0);
|
||||
void Prefetch(Value* address, size_t length, uint32_t prefetch_flags = 0);
|
||||
|
||||
Value* Max(Value* value1, Value* value2);
|
||||
|
|
|
@ -105,12 +105,8 @@ enum Opcode {
|
|||
|
||||
OPCODE_LOAD,
|
||||
OPCODE_LOAD_ACQUIRE,
|
||||
OPCODE_LOAD_VECTOR_LEFT,
|
||||
OPCODE_LOAD_VECTOR_RIGHT,
|
||||
OPCODE_STORE,
|
||||
OPCODE_STORE_RELEASE,
|
||||
OPCODE_STORE_VECTOR_LEFT,
|
||||
OPCODE_STORE_VECTOR_RIGHT,
|
||||
OPCODE_PREFETCH,
|
||||
|
||||
OPCODE_MAX,
|
||||
|
|
|
@ -194,18 +194,6 @@ DEFINE_OPCODE(
|
|||
OPCODE_SIG_V_V,
|
||||
OPCODE_FLAG_MEMORY | OPCODE_FLAG_VOLATILE);
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_LOAD_VECTOR_LEFT,
|
||||
"load_vector_left",
|
||||
OPCODE_SIG_V_V,
|
||||
OPCODE_FLAG_MEMORY);
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_LOAD_VECTOR_RIGHT,
|
||||
"load_vector_right",
|
||||
OPCODE_SIG_V_V,
|
||||
OPCODE_FLAG_MEMORY);
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_STORE,
|
||||
"store",
|
||||
|
@ -218,18 +206,6 @@ DEFINE_OPCODE(
|
|||
OPCODE_SIG_V_V_V,
|
||||
OPCODE_FLAG_MEMORY | OPCODE_FLAG_VOLATILE);
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_STORE_VECTOR_LEFT,
|
||||
"store_vector_left",
|
||||
OPCODE_SIG_X_V_V,
|
||||
OPCODE_FLAG_MEMORY);
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_STORE_VECTOR_RIGHT,
|
||||
"store_vector_right",
|
||||
OPCODE_SIG_X_V_V,
|
||||
OPCODE_FLAG_MEMORY);
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_PREFETCH,
|
||||
"prefetch",
|
||||
|
|
|
@ -401,6 +401,10 @@ void Value::Not() {
|
|||
case INT64_TYPE:
|
||||
constant.i64 = ~constant.i64;
|
||||
break;
|
||||
case VEC128_TYPE:
|
||||
constant.v128.low = ~constant.v128.low;
|
||||
constant.v128.high = ~constant.v128.high;
|
||||
break;
|
||||
default:
|
||||
XEASSERTALWAYS();
|
||||
break;
|
||||
|
|
Loading…
Reference in New Issue