[x64] Add AVX512 optimization for `VECTOR_ROTATE_LEFT(Int32)`
`vprolvd` is an almost 1:1 analog with this opcode and can be conditionally emitted when the host supports AVX512{F,VL}. Altivec docs say that `vrl{bhw}` masks the lower log2(n) bits of the element-size. [vprold](https://www.felixcloutier.com/x86/vprold:vprolvd:vprolq:vprolvq) modulos the shift-value by the element size in bits, which is the same as masking the lower log2(n) bits. So `vrlw` maps exactly to `vprold`.
This commit is contained in:
parent
7e894d10a7
commit
337f0b2948
|
@ -2,7 +2,7 @@
|
|||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2018 Xenia Developers. All rights reserved. *
|
||||
* Copyright 2022 Xenia Developers. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
@ -1287,7 +1287,6 @@ static __m128i EmulateVectorRotateLeft(void*, __m128i src1, __m128i src2) {
|
|||
return _mm_load_si128(reinterpret_cast<__m128i*>(value));
|
||||
}
|
||||
|
||||
// TODO(benvanik): AVX512 has a native variable rotate (rolv).
|
||||
struct VECTOR_ROTATE_LEFT_V128
|
||||
: Sequence<VECTOR_ROTATE_LEFT_V128,
|
||||
I<OPCODE_VECTOR_ROTATE_LEFT, V128Op, V128Op, V128Op>> {
|
||||
|
@ -1318,7 +1317,9 @@ struct VECTOR_ROTATE_LEFT_V128
|
|||
e.vmovaps(i.dest, e.xmm0);
|
||||
break;
|
||||
case INT32_TYPE: {
|
||||
if (e.IsFeatureEnabled(kX64EmitAVX2)) {
|
||||
if (e.IsFeatureEnabled(kX64EmitAVX512Ortho)) {
|
||||
e.vprolvd(i.dest, i.src1, i.src2);
|
||||
} else if (e.IsFeatureEnabled(kX64EmitAVX2)) {
|
||||
Xmm temp = i.dest;
|
||||
if (i.dest == i.src1 || i.dest == i.src2) {
|
||||
temp = e.xmm2;
|
||||
|
|
Loading…
Reference in New Issue