VectorRotateLeft for vrl*.
This commit is contained in:
parent
333fc71b44
commit
ff59f23de0
|
@ -3640,30 +3640,26 @@ int Translate_VECTOR_SHA(TranslationContext& ctx, Instr* i) {
|
||||||
return DispatchToC(ctx, i, fns[i->flags]);
|
return DispatchToC(ctx, i, fns[i->flags]);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
T ROTL(T v, int8_t sh) {
|
|
||||||
return (T(v) << sh) | (T(v) >> ((sizeof(T) * 8) - sh));
|
|
||||||
}
|
|
||||||
uint32_t IntCode_ROTATE_LEFT_I8(IntCodeState& ics, const IntCode* i) {
|
uint32_t IntCode_ROTATE_LEFT_I8(IntCodeState& ics, const IntCode* i) {
|
||||||
ics.rf[i->dest_reg].i8 =
|
ics.rf[i->dest_reg].i8 = poly::rotate_left<uint8_t>(ics.rf[i->src1_reg].i8,
|
||||||
ROTL<uint8_t>(ics.rf[i->src1_reg].i8, ics.rf[i->src2_reg].i8);
|
ics.rf[i->src2_reg].i8);
|
||||||
return IA_NEXT;
|
return IA_NEXT;
|
||||||
}
|
}
|
||||||
uint32_t IntCode_ROTATE_LEFT_I16(IntCodeState& ics, const IntCode* i) {
|
uint32_t IntCode_ROTATE_LEFT_I16(IntCodeState& ics, const IntCode* i) {
|
||||||
ics.rf[i->dest_reg].i16 =
|
ics.rf[i->dest_reg].i16 = poly::rotate_left<uint16_t>(ics.rf[i->src1_reg].i16,
|
||||||
ROTL<uint16_t>(ics.rf[i->src1_reg].i16, ics.rf[i->src2_reg].i8);
|
ics.rf[i->src2_reg].i8);
|
||||||
return IA_NEXT;
|
return IA_NEXT;
|
||||||
}
|
}
|
||||||
uint32_t IntCode_ROTATE_LEFT_I32(IntCodeState& ics, const IntCode* i) {
|
uint32_t IntCode_ROTATE_LEFT_I32(IntCodeState& ics, const IntCode* i) {
|
||||||
// TODO(benvanik): use _rtol on vc++
|
// TODO(benvanik): use _rtol on vc++
|
||||||
ics.rf[i->dest_reg].i32 =
|
ics.rf[i->dest_reg].i32 = poly::rotate_left<uint32_t>(ics.rf[i->src1_reg].i32,
|
||||||
ROTL<uint32_t>(ics.rf[i->src1_reg].i32, ics.rf[i->src2_reg].i8);
|
ics.rf[i->src2_reg].i8);
|
||||||
return IA_NEXT;
|
return IA_NEXT;
|
||||||
}
|
}
|
||||||
uint32_t IntCode_ROTATE_LEFT_I64(IntCodeState& ics, const IntCode* i) {
|
uint32_t IntCode_ROTATE_LEFT_I64(IntCodeState& ics, const IntCode* i) {
|
||||||
// TODO(benvanik): use _rtol64 on vc++
|
// TODO(benvanik): use _rtol64 on vc++
|
||||||
ics.rf[i->dest_reg].i64 =
|
ics.rf[i->dest_reg].i64 = poly::rotate_left<uint64_t>(ics.rf[i->src1_reg].i64,
|
||||||
ROTL<uint64_t>(ics.rf[i->src1_reg].i64, ics.rf[i->src2_reg].i8);
|
ics.rf[i->src2_reg].i8);
|
||||||
return IA_NEXT;
|
return IA_NEXT;
|
||||||
}
|
}
|
||||||
int Translate_ROTATE_LEFT(TranslationContext& ctx, Instr* i) {
|
int Translate_ROTATE_LEFT(TranslationContext& ctx, Instr* i) {
|
||||||
|
@ -3675,6 +3671,11 @@ int Translate_ROTATE_LEFT(TranslationContext& ctx, Instr* i) {
|
||||||
return DispatchToC(ctx, i, fns[i->dest->type]);
|
return DispatchToC(ctx, i, fns[i->dest->type]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int Translate_VECTOR_ROTATE_LEFT(TranslationContext& ctx, Instr* i) {
|
||||||
|
assert_always();
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t IntCode_BYTE_SWAP_I16(IntCodeState& ics, const IntCode* i) {
|
uint32_t IntCode_BYTE_SWAP_I16(IntCodeState& ics, const IntCode* i) {
|
||||||
ics.rf[i->dest_reg].i16 = poly::byte_swap(ics.rf[i->src1_reg].i16);
|
ics.rf[i->dest_reg].i16 = poly::byte_swap(ics.rf[i->src1_reg].i16);
|
||||||
return IA_NEXT;
|
return IA_NEXT;
|
||||||
|
@ -4218,11 +4219,12 @@ static const TranslateFn dispatch_table[] = {
|
||||||
Translate_SHL, Translate_VECTOR_SHL,
|
Translate_SHL, Translate_VECTOR_SHL,
|
||||||
Translate_SHR, Translate_VECTOR_SHR,
|
Translate_SHR, Translate_VECTOR_SHR,
|
||||||
Translate_SHA, Translate_VECTOR_SHA,
|
Translate_SHA, Translate_VECTOR_SHA,
|
||||||
Translate_ROTATE_LEFT, Translate_BYTE_SWAP,
|
Translate_ROTATE_LEFT, Translate_VECTOR_ROTATE_LEFT,
|
||||||
Translate_CNTLZ, Translate_INSERT,
|
Translate_BYTE_SWAP, Translate_CNTLZ,
|
||||||
Translate_EXTRACT, Translate_SPLAT,
|
Translate_INSERT, Translate_EXTRACT,
|
||||||
Translate_PERMUTE, Translate_SWIZZLE,
|
Translate_SPLAT, Translate_PERMUTE,
|
||||||
Translate_PACK, Translate_UNPACK,
|
Translate_SWIZZLE, Translate_PACK,
|
||||||
|
Translate_UNPACK,
|
||||||
TranslateInvalid, // Translate_COMPARE_EXCHANGE,
|
TranslateInvalid, // Translate_COMPARE_EXCHANGE,
|
||||||
Translate_ATOMIC_EXCHANGE,
|
Translate_ATOMIC_EXCHANGE,
|
||||||
TranslateInvalid, // Translate_ATOMIC_ADD,
|
TranslateInvalid, // Translate_ATOMIC_ADD,
|
||||||
|
|
|
@ -550,6 +550,7 @@ Address X64Emitter::GetXmmConstPtr(XmmConst id) {
|
||||||
/* XMMUnsignedDwordMax */ vec128i(0xFFFFFFFFu, 0x00000000u,
|
/* XMMUnsignedDwordMax */ vec128i(0xFFFFFFFFu, 0x00000000u,
|
||||||
0xFFFFFFFFu, 0x00000000u),
|
0xFFFFFFFFu, 0x00000000u),
|
||||||
/* XMM255 */ vec128f(255.0f, 255.0f, 255.0f, 255.0f),
|
/* XMM255 */ vec128f(255.0f, 255.0f, 255.0f, 255.0f),
|
||||||
|
/* XMMPI32 */ vec128i(32, 32, 32, 32),
|
||||||
/* XMMSignMaskI8 */ vec128i(0x80808080u, 0x80808080u,
|
/* XMMSignMaskI8 */ vec128i(0x80808080u, 0x80808080u,
|
||||||
0x80808080u, 0x80808080u),
|
0x80808080u, 0x80808080u),
|
||||||
/* XMMSignMaskI16 */ vec128i(0x80008000u, 0x80008000u,
|
/* XMMSignMaskI16 */ vec128i(0x80008000u, 0x80008000u,
|
||||||
|
|
|
@ -60,6 +60,7 @@ enum XmmConst {
|
||||||
XMMShiftByteMask,
|
XMMShiftByteMask,
|
||||||
XMMUnsignedDwordMax,
|
XMMUnsignedDwordMax,
|
||||||
XMM255,
|
XMM255,
|
||||||
|
XMMPI32,
|
||||||
XMMSignMaskI8,
|
XMMSignMaskI8,
|
||||||
XMMSignMaskI16,
|
XMMSignMaskI16,
|
||||||
XMMSignMaskI32,
|
XMMSignMaskI32,
|
||||||
|
|
|
@ -4475,6 +4475,76 @@ EMITTER_OPCODE_TABLE(
|
||||||
ROTATE_LEFT_I64);
|
ROTATE_LEFT_I64);
|
||||||
|
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// OPCODE_VECTOR_ROTATE_LEFT
|
||||||
|
// ============================================================================
|
||||||
|
// TODO(benvanik): AVX512 has a native variable rotate (rolv).
|
||||||
|
EMITTER(VECTOR_ROTATE_LEFT_V128, MATCH(I<OPCODE_VECTOR_ROTATE_LEFT, V128<>, V128<>, V128<>>)) {
|
||||||
|
static __m128i EmulateVectorRotateLeftI8(__m128i src1, __m128i src2) {
|
||||||
|
alignas(16) __m128i value;
|
||||||
|
alignas(16) __m128i shamt;
|
||||||
|
_mm_store_si128(&value, src1);
|
||||||
|
_mm_store_si128(&shamt, src2);
|
||||||
|
for (size_t i = 0; i < 16; ++i) {
|
||||||
|
value.m128i_u8[i] = poly::rotate_left<uint8_t>(
|
||||||
|
value.m128i_u8[i], shamt.m128i_u8[i] & 0x3);
|
||||||
|
}
|
||||||
|
return _mm_load_si128(&value);
|
||||||
|
}
|
||||||
|
static __m128i EmulateVectorRotateLeftI16(__m128i src1, __m128i src2) {
|
||||||
|
alignas(16) __m128i value;
|
||||||
|
alignas(16) __m128i shamt;
|
||||||
|
_mm_store_si128(&value, src1);
|
||||||
|
_mm_store_si128(&shamt, src2);
|
||||||
|
for (size_t i = 0; i < 8; ++i) {
|
||||||
|
value.m128i_u16[i] = poly::rotate_left<uint16_t>(
|
||||||
|
value.m128i_u16[i], shamt.m128i_u16[i] & 0xF);
|
||||||
|
}
|
||||||
|
return _mm_load_si128(&value);
|
||||||
|
}
|
||||||
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
|
switch (i.instr->flags) {
|
||||||
|
case INT8_TYPE:
|
||||||
|
// TODO(benvanik): native version (with shift magic).
|
||||||
|
e.lea(e.r8, e.StashXmm(i.src1));
|
||||||
|
e.lea(e.r9, e.StashXmm(i.src2));
|
||||||
|
e.CallNativeSafe(reinterpret_cast<void*>(EmulateVectorRotateLeftI8));
|
||||||
|
e.vmovaps(i.dest, e.xmm0);
|
||||||
|
break;
|
||||||
|
case INT16_TYPE:
|
||||||
|
// TODO(benvanik): native version (with shift magic).
|
||||||
|
e.lea(e.r8, e.StashXmm(i.src1));
|
||||||
|
e.lea(e.r9, e.StashXmm(i.src2));
|
||||||
|
e.CallNativeSafe(reinterpret_cast<void*>(EmulateVectorRotateLeftI16));
|
||||||
|
e.vmovaps(i.dest, e.xmm0);
|
||||||
|
break;
|
||||||
|
case INT32_TYPE: {
|
||||||
|
Xmm temp = i.dest;
|
||||||
|
if (i.dest == i.src1 || i.dest == i.src2) {
|
||||||
|
temp = e.xmm2;
|
||||||
|
}
|
||||||
|
// Shift left (to get high bits):
|
||||||
|
e.vpand(e.xmm0, i.src2, e.GetXmmConstPtr(XMMShiftMaskPS));
|
||||||
|
e.vpsllvd(e.xmm1, i.src1, e.xmm0);
|
||||||
|
// Shift right (to get low bits):
|
||||||
|
e.vmovaps(temp, e.GetXmmConstPtr(XMMPI32));
|
||||||
|
e.vpsubd(temp, e.xmm0);
|
||||||
|
e.vpsrlvd(i.dest, i.src1, e.xmm0);
|
||||||
|
// Merge:
|
||||||
|
e.vpor(i.dest, e.xmm1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
assert_always();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
EMITTER_OPCODE_TABLE(
|
||||||
|
OPCODE_VECTOR_ROTATE_LEFT,
|
||||||
|
VECTOR_ROTATE_LEFT_V128);
|
||||||
|
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// OPCODE_BYTE_SWAP
|
// OPCODE_BYTE_SWAP
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
@ -5287,6 +5357,7 @@ void RegisterSequences() {
|
||||||
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_SHR);
|
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_SHR);
|
||||||
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_SHA);
|
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_SHA);
|
||||||
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_ROTATE_LEFT);
|
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_ROTATE_LEFT);
|
||||||
|
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_ROTATE_LEFT);
|
||||||
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_BYTE_SWAP);
|
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_BYTE_SWAP);
|
||||||
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_CNTLZ);
|
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_CNTLZ);
|
||||||
//REGISTER_EMITTER_OPCODE_TABLE(OPCODE_INSERT);
|
//REGISTER_EMITTER_OPCODE_TABLE(OPCODE_INSERT);
|
||||||
|
|
|
@ -1212,22 +1212,30 @@ XEEMITTER(vrfiz128, VX128_3(6, 1008), VX128_3)(PPCHIRBuilder& f, InstrData& i) {
|
||||||
}
|
}
|
||||||
|
|
||||||
XEEMITTER(vrlb, 0x10000004, VX)(PPCHIRBuilder& f, InstrData& i) {
|
XEEMITTER(vrlb, 0x10000004, VX)(PPCHIRBuilder& f, InstrData& i) {
|
||||||
XEINSTRNOTIMPLEMENTED();
|
// (VD) <- ROTL((VA), (VB)&0x3)
|
||||||
return 1;
|
Value* v = f.VectorRotateLeft(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT8_TYPE);
|
||||||
|
f.StoreVR(i.VX.VD, v);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
XEEMITTER(vrlh, 0x10000044, VX)(PPCHIRBuilder& f, InstrData& i) {
|
XEEMITTER(vrlh, 0x10000044, VX)(PPCHIRBuilder& f, InstrData& i) {
|
||||||
XEINSTRNOTIMPLEMENTED();
|
// (VD) <- ROTL((VA), (VB)&0xF)
|
||||||
return 1;
|
Value* v = f.VectorRotateLeft(f.LoadVR(i.VX.VA), f.LoadVR(i.VX.VB), INT16_TYPE);
|
||||||
|
f.StoreVR(i.VX.VD, v);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int InstrEmit_vrlw_(PPCHIRBuilder& f, uint32_t vd, uint32_t va, uint32_t vb) {
|
||||||
|
// (VD) <- ROTL((VA), (VB)&0x1F)
|
||||||
|
Value* v = f.VectorRotateLeft(f.LoadVR(va), f.LoadVR(vb), INT32_TYPE);
|
||||||
|
f.StoreVR(vd, v);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
XEEMITTER(vrlw, 0x10000084, VX)(PPCHIRBuilder& f, InstrData& i) {
|
XEEMITTER(vrlw, 0x10000084, VX)(PPCHIRBuilder& f, InstrData& i) {
|
||||||
XEINSTRNOTIMPLEMENTED();
|
return InstrEmit_vrlw_(f, i.VX.VD, i.VX.VA, i.VX.VB);
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
XEEMITTER(vrlw128, VX128(6, 80), VX128)(PPCHIRBuilder& f, InstrData& i) {
|
XEEMITTER(vrlw128, VX128(6, 80), VX128)(PPCHIRBuilder& f, InstrData& i) {
|
||||||
XEINSTRNOTIMPLEMENTED();
|
return InstrEmit_vrlw_(f, VX128_VD128, VX128_VA128, VX128_VB128);
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
XEEMITTER(vrlimi128, VX128_4(6, 1808), VX128_4)(PPCHIRBuilder& f,
|
XEEMITTER(vrlimi128, VX128_4(6, 1808), VX128_4)(PPCHIRBuilder& f,
|
||||||
|
|
|
@ -1661,6 +1661,17 @@ Value* HIRBuilder::RotateLeft(Value* value1, Value* value2) {
|
||||||
return i->dest;
|
return i->dest;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Value* HIRBuilder::VectorRotateLeft(Value* value1, Value* value2, TypeName part_type) {
|
||||||
|
ASSERT_VECTOR_TYPE(value1);
|
||||||
|
ASSERT_VECTOR_TYPE(value2);
|
||||||
|
|
||||||
|
Instr* i = AppendInstr(OPCODE_VECTOR_ROTATE_LEFT_info, part_type, AllocValue(value1->type));
|
||||||
|
i->set_src1(value1);
|
||||||
|
i->set_src2(value2);
|
||||||
|
i->src3.value = NULL;
|
||||||
|
return i->dest;
|
||||||
|
}
|
||||||
|
|
||||||
Value* HIRBuilder::ByteSwap(Value* value) {
|
Value* HIRBuilder::ByteSwap(Value* value) {
|
||||||
if (value->type == INT8_TYPE) {
|
if (value->type == INT8_TYPE) {
|
||||||
return value;
|
return value;
|
||||||
|
|
|
@ -199,6 +199,7 @@ class HIRBuilder {
|
||||||
Value* Sha(Value* value1, int8_t value2);
|
Value* Sha(Value* value1, int8_t value2);
|
||||||
Value* VectorSha(Value* value1, Value* value2, TypeName part_type);
|
Value* VectorSha(Value* value1, Value* value2, TypeName part_type);
|
||||||
Value* RotateLeft(Value* value1, Value* value2);
|
Value* RotateLeft(Value* value1, Value* value2);
|
||||||
|
Value* VectorRotateLeft(Value* value1, Value* value2, TypeName part_type);
|
||||||
Value* ByteSwap(Value* value);
|
Value* ByteSwap(Value* value);
|
||||||
Value* CountLeadingZeros(Value* value);
|
Value* CountLeadingZeros(Value* value);
|
||||||
Value* Insert(Value* value, Value* index, Value* part);
|
Value* Insert(Value* value, Value* index, Value* part);
|
||||||
|
|
|
@ -165,6 +165,7 @@ enum Opcode {
|
||||||
OPCODE_SHA,
|
OPCODE_SHA,
|
||||||
OPCODE_VECTOR_SHA,
|
OPCODE_VECTOR_SHA,
|
||||||
OPCODE_ROTATE_LEFT,
|
OPCODE_ROTATE_LEFT,
|
||||||
|
OPCODE_VECTOR_ROTATE_LEFT,
|
||||||
OPCODE_BYTE_SWAP,
|
OPCODE_BYTE_SWAP,
|
||||||
OPCODE_CNTLZ,
|
OPCODE_CNTLZ,
|
||||||
OPCODE_INSERT,
|
OPCODE_INSERT,
|
||||||
|
|
|
@ -539,6 +539,12 @@ DEFINE_OPCODE(
|
||||||
OPCODE_SIG_V_V_V,
|
OPCODE_SIG_V_V_V,
|
||||||
0)
|
0)
|
||||||
|
|
||||||
|
DEFINE_OPCODE(
|
||||||
|
OPCODE_VECTOR_ROTATE_LEFT,
|
||||||
|
"vector_rotate_left",
|
||||||
|
OPCODE_SIG_V_V_V,
|
||||||
|
0)
|
||||||
|
|
||||||
DEFINE_OPCODE(
|
DEFINE_OPCODE(
|
||||||
OPCODE_BYTE_SWAP,
|
OPCODE_BYTE_SWAP,
|
||||||
"byte_swap",
|
"byte_swap",
|
||||||
|
|
|
@ -108,6 +108,29 @@ inline bool bit_scan_forward(int64_t v, uint32_t* out_first_set_index) {
|
||||||
return bit_scan_forward(static_cast<uint64_t>(v), out_first_set_index);
|
return bit_scan_forward(static_cast<uint64_t>(v), out_first_set_index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline T rotate_left(T v, uint8_t sh) {
|
||||||
|
return (T(v) << sh) | (T(v) >> ((sizeof(T) * 8) - sh));
|
||||||
|
}
|
||||||
|
#if XE_COMPILER_MSVC
|
||||||
|
template <>
|
||||||
|
inline uint8_t rotate_left(uint8_t v, uint8_t sh) {
|
||||||
|
return _rotl8(v, sh);
|
||||||
|
}
|
||||||
|
template <>
|
||||||
|
inline uint16_t rotate_left(uint16_t v, uint8_t sh) {
|
||||||
|
return _rotl16(v, sh);
|
||||||
|
}
|
||||||
|
template <>
|
||||||
|
inline uint32_t rotate_left(uint32_t v, uint8_t sh) {
|
||||||
|
return _rotl(v, sh);
|
||||||
|
}
|
||||||
|
template <>
|
||||||
|
inline uint64_t rotate_left(uint64_t v, uint8_t sh) {
|
||||||
|
return _rotl64(v, sh);
|
||||||
|
}
|
||||||
|
#endif // XE_COMPILER_MSVC
|
||||||
|
|
||||||
// Utilities for SSE values.
|
// Utilities for SSE values.
|
||||||
template <int N>
|
template <int N>
|
||||||
float m128_f32(const __m128& v) {
|
float m128_f32(const __m128& v) {
|
||||||
|
|
Loading…
Reference in New Issue