[x64] Add AVX512 optimization for `VECTOR_ROTATE_LEFT(Int32)`

`vprolvd` is an almost 1:1 analog with this opcode and can be
conditionally emitted when the host supports AVX512{F,VL}.

Altivec docs say that `vrl{bhw}` masks the lower log2(n) bits of the
element-size.

[vprold](https://www.felixcloutier.com/x86/vprold:vprolvd:vprolq:vprolvq)
modulos the shift-value by the element size in bits, which is the same
as masking the lower log2(n) bits. So `vrlw` maps exactly to `vprold`.
This commit is contained in:
Wunkolo 2022-02-03 13:19:33 -08:00 committed by Rick Gibbed
parent 7e894d10a7
commit 337f0b2948
1 changed files with 5 additions and 4 deletions

View File

@ -2,7 +2,7 @@
****************************************************************************** ******************************************************************************
* Xenia : Xbox 360 Emulator Research Project * * Xenia : Xbox 360 Emulator Research Project *
****************************************************************************** ******************************************************************************
* Copyright 2018 Xenia Developers. All rights reserved. * * Copyright 2022 Xenia Developers. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. * * Released under the BSD license - see LICENSE in the root for more details. *
****************************************************************************** ******************************************************************************
*/ */
@ -1287,7 +1287,6 @@ static __m128i EmulateVectorRotateLeft(void*, __m128i src1, __m128i src2) {
return _mm_load_si128(reinterpret_cast<__m128i*>(value)); return _mm_load_si128(reinterpret_cast<__m128i*>(value));
} }
// TODO(benvanik): AVX512 has a native variable rotate (rolv).
struct VECTOR_ROTATE_LEFT_V128 struct VECTOR_ROTATE_LEFT_V128
: Sequence<VECTOR_ROTATE_LEFT_V128, : Sequence<VECTOR_ROTATE_LEFT_V128,
I<OPCODE_VECTOR_ROTATE_LEFT, V128Op, V128Op, V128Op>> { I<OPCODE_VECTOR_ROTATE_LEFT, V128Op, V128Op, V128Op>> {
@ -1318,7 +1317,9 @@ struct VECTOR_ROTATE_LEFT_V128
e.vmovaps(i.dest, e.xmm0); e.vmovaps(i.dest, e.xmm0);
break; break;
case INT32_TYPE: { case INT32_TYPE: {
if (e.IsFeatureEnabled(kX64EmitAVX2)) { if (e.IsFeatureEnabled(kX64EmitAVX512Ortho)) {
e.vprolvd(i.dest, i.src1, i.src2);
} else if (e.IsFeatureEnabled(kX64EmitAVX2)) {
Xmm temp = i.dest; Xmm temp = i.dest;
if (i.dest == i.src1 || i.dest == i.src2) { if (i.dest == i.src1 || i.dest == i.src2) {
temp = e.xmm2; temp = e.xmm2;
@ -2683,4 +2684,4 @@ EMITTER_OPCODE_TABLE(OPCODE_UNPACK, UNPACK);
} // namespace x64 } // namespace x64
} // namespace backend } // namespace backend
} // namespace cpu } // namespace cpu
} // namespace xe } // namespace xe