VectorRotateLeft for vrl*.
This commit is contained in:
parent
333fc71b44
commit
ff59f23de0
|
@ -3640,30 +3640,26 @@ int Translate_VECTOR_SHA(TranslationContext& ctx, Instr* i) {
|
|||
return DispatchToC(ctx, i, fns[i->flags]);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T ROTL(T v, int8_t sh) {
|
||||
return (T(v) << sh) | (T(v) >> ((sizeof(T) * 8) - sh));
|
||||
}
|
||||
uint32_t IntCode_ROTATE_LEFT_I8(IntCodeState& ics, const IntCode* i) {
|
||||
ics.rf[i->dest_reg].i8 =
|
||||
ROTL<uint8_t>(ics.rf[i->src1_reg].i8, ics.rf[i->src2_reg].i8);
|
||||
ics.rf[i->dest_reg].i8 = poly::rotate_left<uint8_t>(ics.rf[i->src1_reg].i8,
|
||||
ics.rf[i->src2_reg].i8);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_ROTATE_LEFT_I16(IntCodeState& ics, const IntCode* i) {
|
||||
ics.rf[i->dest_reg].i16 =
|
||||
ROTL<uint16_t>(ics.rf[i->src1_reg].i16, ics.rf[i->src2_reg].i8);
|
||||
ics.rf[i->dest_reg].i16 = poly::rotate_left<uint16_t>(ics.rf[i->src1_reg].i16,
|
||||
ics.rf[i->src2_reg].i8);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_ROTATE_LEFT_I32(IntCodeState& ics, const IntCode* i) {
|
||||
// TODO(benvanik): use _rtol on vc++
|
||||
ics.rf[i->dest_reg].i32 =
|
||||
ROTL<uint32_t>(ics.rf[i->src1_reg].i32, ics.rf[i->src2_reg].i8);
|
||||
ics.rf[i->dest_reg].i32 = poly::rotate_left<uint32_t>(ics.rf[i->src1_reg].i32,
|
||||
ics.rf[i->src2_reg].i8);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_ROTATE_LEFT_I64(IntCodeState& ics, const IntCode* i) {
|
||||
// TODO(benvanik): use _rtol64 on vc++
|
||||
ics.rf[i->dest_reg].i64 =
|
||||
ROTL<uint64_t>(ics.rf[i->src1_reg].i64, ics.rf[i->src2_reg].i8);
|
||||
ics.rf[i->dest_reg].i64 = poly::rotate_left<uint64_t>(ics.rf[i->src1_reg].i64,
|
||||
ics.rf[i->src2_reg].i8);
|
||||
return IA_NEXT;
|
||||
}
|
||||
int Translate_ROTATE_LEFT(TranslationContext& ctx, Instr* i) {
|
||||
|
@ -3675,6 +3671,11 @@ int Translate_ROTATE_LEFT(TranslationContext& ctx, Instr* i) {
|
|||
return DispatchToC(ctx, i, fns[i->dest->type]);
|
||||
}
|
||||
|
||||
int Translate_VECTOR_ROTATE_LEFT(TranslationContext& ctx, Instr* i) {
|
||||
assert_always();
|
||||
return 1;
|
||||
}
|
||||
|
||||
uint32_t IntCode_BYTE_SWAP_I16(IntCodeState& ics, const IntCode* i) {
|
||||
ics.rf[i->dest_reg].i16 = poly::byte_swap(ics.rf[i->src1_reg].i16);
|
||||
return IA_NEXT;
|
||||
|
@ -4218,11 +4219,12 @@ static const TranslateFn dispatch_table[] = {
|
|||
Translate_SHL, Translate_VECTOR_SHL,
|
||||
Translate_SHR, Translate_VECTOR_SHR,
|
||||
Translate_SHA, Translate_VECTOR_SHA,
|
||||
Translate_ROTATE_LEFT, Translate_BYTE_SWAP,
|
||||
Translate_CNTLZ, Translate_INSERT,
|
||||
Translate_EXTRACT, Translate_SPLAT,
|
||||
Translate_PERMUTE, Translate_SWIZZLE,
|
||||
Translate_PACK, Translate_UNPACK,
|
||||
Translate_ROTATE_LEFT, Translate_VECTOR_ROTATE_LEFT,
|
||||
Translate_BYTE_SWAP, Translate_CNTLZ,
|
||||
Translate_INSERT, Translate_EXTRACT,
|
||||
Translate_SPLAT, Translate_PERMUTE,
|
||||
Translate_SWIZZLE, Translate_PACK,
|
||||
Translate_UNPACK,
|
||||
TranslateInvalid, // Translate_COMPARE_EXCHANGE,
|
||||
Translate_ATOMIC_EXCHANGE,
|
||||
TranslateInvalid, // Translate_ATOMIC_ADD,
|
||||
|
|
|
@ -550,6 +550,7 @@ Address X64Emitter::GetXmmConstPtr(XmmConst id) {
|
|||
/* XMMUnsignedDwordMax */ vec128i(0xFFFFFFFFu, 0x00000000u,
|
||||
0xFFFFFFFFu, 0x00000000u),
|
||||
/* XMM255 */ vec128f(255.0f, 255.0f, 255.0f, 255.0f),
|
||||
/* XMMPI32 */ vec128i(32, 32, 32, 32),
|
||||
/* XMMSignMaskI8 */ vec128i(0x80808080u, 0x80808080u,
|
||||
0x80808080u, 0x80808080u),
|
||||
/* XMMSignMaskI16 */ vec128i(0x80008000u, 0x80008000u,
|
||||
|
|
|
@ -60,6 +60,7 @@ enum XmmConst {
|
|||
XMMShiftByteMask,
|
||||
XMMUnsignedDwordMax,
|
||||
XMM255,
|
||||
XMMPI32,
|
||||
XMMSignMaskI8,
|
||||
XMMSignMaskI16,
|
||||
XMMSignMaskI32,
|
||||
|
|
|
@ -4475,6 +4475,76 @@ EMITTER_OPCODE_TABLE(
|
|||
ROTATE_LEFT_I64);
|
||||
|
||||
|
||||
// ============================================================================
|
||||
// OPCODE_VECTOR_ROTATE_LEFT
|
||||
// ============================================================================
|
||||
// TODO(benvanik): AVX512 has a native variable rotate (rolv).
|
||||
EMITTER(VECTOR_ROTATE_LEFT_V128, MATCH(I<OPCODE_VECTOR_ROTATE_LEFT, V128<>, V128<>, V128<>>)) {
|
||||
static __m128i EmulateVectorRotateLeftI8(__m128i src1, __m128i src2) {
|
||||
alignas(16) __m128i value;
|
||||
alignas(16) __m128i shamt;
|
||||
_mm_store_si128(&value, src1);
|
||||
_mm_store_si128(&shamt, src2);
|
||||
for (size_t i = 0; i < 16; ++i) {
|
||||
value.m128i_u8[i] = poly::rotate_left<uint8_t>(
|
||||
value.m128i_u8[i], shamt.m128i_u8[i] & 0x3);
|
||||
}
|
||||
return _mm_load_si128(&value);
|
||||
}
|
||||
static __m128i EmulateVectorRotateLeftI16(__m128i src1, __m128i src2) {
|
||||
alignas(16) __m128i value;
|
||||
alignas(16) __m128i shamt;
|
||||
_mm_store_si128(&value, src1);
|
||||
_mm_store_si128(&shamt, src2);
|
||||
for (size_t i = 0; i < 8; ++i) {
|
||||
value.m128i_u16[i] = poly::rotate_left<uint16_t>(
|
||||
value.m128i_u16[i], shamt.m128i_u16[i] & 0xF);
|
||||
}
|
||||
return _mm_load_si128(&value);
|
||||
}
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
switch (i.instr->flags) {
|
||||
case INT8_TYPE:
|
||||
// TODO(benvanik): native version (with shift magic).
|
||||
e.lea(e.r8, e.StashXmm(i.src1));
|
||||
e.lea(e.r9, e.StashXmm(i.src2));
|
||||
e.CallNativeSafe(reinterpret_cast<void*>(EmulateVectorRotateLeftI8));
|
||||
e.vmovaps(i.dest, e.xmm0);
|
||||
break;
|
||||
case INT16_TYPE:
|
||||
// TODO(benvanik): native version (with shift magic).
|
||||
e.lea(e.r8, e.StashXmm(i.src1));
|
||||
e.lea(e.r9, e.StashXmm(i.src2));
|
||||
e.CallNativeSafe(reinterpret_cast<void*>(EmulateVectorRotateLeftI16));
|
||||
e.vmovaps(i.dest, e.xmm0);
|
||||
break;
|
||||
case INT32_TYPE: {
|
||||
Xmm temp = i.dest;
|
||||
if (i.dest == i.src1 || i.dest == i.src2) {
|
||||
temp = e.xmm2;
|
||||
}
|
||||
// Shift left (to get high bits):
|
||||
e.vpand(e.xmm0, i.src2, e.GetXmmConstPtr(XMMShiftMaskPS));
|
||||
e.vpsllvd(e.xmm1, i.src1, e.xmm0);
|
||||
// Shift right (to get low bits):
|
||||
e.vmovaps(temp, e.GetXmmConstPtr(XMMPI32));
|
||||
e.vpsubd(temp, e.xmm0);
|
||||
e.vpsrlvd(i.dest, i.src1, e.xmm0);
|
||||
// Merge:
|
||||
e.vpor(i.dest, e.xmm1);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
assert_always();
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(
|
||||
OPCODE_VECTOR_ROTATE_LEFT,
|
||||
VECTOR_ROTATE_LEFT_V128);
|
||||
|
||||
|
||||
// ============================================================================
|
||||
// OPCODE_BYTE_SWAP
|
||||
// ============================================================================
|
||||
|
@ -5287,6 +5357,7 @@ void RegisterSequences() {
|
|||
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_SHR);
|
||||
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_SHA);
|
||||
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_ROTATE_LEFT);
|
||||
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_ROTATE_LEFT);
|
||||
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_BYTE_SWAP);
|
||||
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_CNTLZ);
|
||||
//REGISTER_EMITTER_OPCODE_TABLE(OPCODE_INSERT);
|
||||
|
|
|
@ -1212,22 +1212,30 @@ XEEMITTER(vrfiz128, VX128_3(6, 1008), VX128_3)(PPCHIRBuilder& f, InstrData& i) {
|
|||
}
|
||||
|
||||
XEEMITTER(vrlb, 0x10000004, VX)(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
return 1;
|
||||
// (VD) <- ROTL((VA), (VB)&0x3)
|
||||
Value* v = f.VectorRotateLeft(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT8_TYPE);
|
||||
f.StoreVR(i.VX.VD, v);
|
||||
return 0;
|
||||
}
|
||||
|
||||
XEEMITTER(vrlh, 0x10000044, VX)(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
return 1;
|
||||
// (VD) <- ROTL((VA), (VB)&0xF)
|
||||
Value* v = f.VectorRotateLeft(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT16_TYPE);
|
||||
f.StoreVR(i.VX.VD, v);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int InstrEmit_vrlw_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) {
|
||||
// (VD) <- ROTL((VA), (VB)&0x1F)
|
||||
Value* v = f.VectorRotateLeft(f.LoadVR(va), f.LoadVR(vb), INT32_TYPE);
|
||||
f.StoreVR(vd, v);
|
||||
return 0;
|
||||
}
|
||||
XEEMITTER(vrlw, 0x10000084, VX)(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
return 1;
|
||||
return InstrEmit_vrlw_(f, i.VX.VD, i.VX.VA, i.VX.VB);
|
||||
}
|
||||
XEEMITTER(vrlw128, VX128(6, 80), VX128)(PPCHIRBuilder& f, InstrData& i) {
|
||||
XEINSTRNOTIMPLEMENTED();
|
||||
return 1;
|
||||
return InstrEmit_vrlw_(f, VX128_VD128, VX128_VA128, VX128_VB128);
|
||||
}
|
||||
|
||||
XEEMITTER(vrlimi128, VX128_4(6, 1808), VX128_4)(PPCHIRBuilder& f,
|
||||
|
|
|
@ -1661,6 +1661,17 @@ Value* HIRBuilder::RotateLeft(Value* value1, Value* value2) {
|
|||
return i->dest;
|
||||
}
|
||||
|
||||
Value* HIRBuilder::VectorRotateLeft(Value* value1, Value* value2, TypeName part_type) {
|
||||
ASSERT_VECTOR_TYPE(value1);
|
||||
ASSERT_VECTOR_TYPE(value2);
|
||||
|
||||
Instr* i = AppendInstr(OPCODE_VECTOR_ROTATE_LEFT_info, part_type, AllocValue(value1->type));
|
||||
i->set_src1(value1);
|
||||
i->set_src2(value2);
|
||||
i->src3.value = NULL;
|
||||
return i->dest;
|
||||
}
|
||||
|
||||
Value* HIRBuilder::ByteSwap(Value* value) {
|
||||
if (value->type == INT8_TYPE) {
|
||||
return value;
|
||||
|
|
|
@ -199,6 +199,7 @@ class HIRBuilder {
|
|||
Value* Sha(Value* value1, int8_t value2);
|
||||
Value* VectorSha(Value* value1, Value* value2, TypeName part_type);
|
||||
Value* RotateLeft(Value* value1, Value* value2);
|
||||
Value* VectorRotateLeft(Value* value1, Value* value2, TypeName part_type);
|
||||
Value* ByteSwap(Value* value);
|
||||
Value* CountLeadingZeros(Value* value);
|
||||
Value* Insert(Value* value, Value* index, Value* part);
|
||||
|
|
|
@ -165,6 +165,7 @@ enum Opcode {
|
|||
OPCODE_SHA,
|
||||
OPCODE_VECTOR_SHA,
|
||||
OPCODE_ROTATE_LEFT,
|
||||
OPCODE_VECTOR_ROTATE_LEFT,
|
||||
OPCODE_BYTE_SWAP,
|
||||
OPCODE_CNTLZ,
|
||||
OPCODE_INSERT,
|
||||
|
|
|
@ -539,6 +539,12 @@ DEFINE_OPCODE(
|
|||
OPCODE_SIG_V_V_V,
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_VECTOR_ROTATE_LEFT,
|
||||
"vector_rotate_left",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_BYTE_SWAP,
|
||||
"byte_swap",
|
||||
|
|
|
@ -108,6 +108,29 @@ inline bool bit_scan_forward(int64_t v, uint32_t* out_first_set_index) {
|
|||
return bit_scan_forward(static_cast<uint64_t>(v), out_first_set_index);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline T rotate_left(T v, uint8_t sh) {
|
||||
return (T(v) << sh) | (T(v) >> ((sizeof(T) * 8) - sh));
|
||||
}
|
||||
#if XE_COMPILER_MSVC
|
||||
template <>
|
||||
inline uint8_t rotate_left(uint8_t v, uint8_t sh) {
|
||||
return _rotl8(v, sh);
|
||||
}
|
||||
template <>
|
||||
inline uint16_t rotate_left(uint16_t v, uint8_t sh) {
|
||||
return _rotl16(v, sh);
|
||||
}
|
||||
template <>
|
||||
inline uint32_t rotate_left(uint32_t v, uint8_t sh) {
|
||||
return _rotl(v, sh);
|
||||
}
|
||||
template <>
|
||||
inline uint64_t rotate_left(uint64_t v, uint8_t sh) {
|
||||
return _rotl64(v, sh);
|
||||
}
|
||||
#endif // XE_COMPILER_MSVC
|
||||
|
||||
// Utilities for SSE values.
|
||||
template <int N>
|
||||
float m128_f32(const __m128& v) {
|
||||
|
|
Loading…
Reference in New Issue